70 files changed, 3132 insertions, 1999 deletions
diff --git a/llvm/lib/Support/APFloat.cpp b/llvm/lib/Support/APFloat.cpp
index eae4fdb6c3d0..4a73739b5282 100644
--- a/llvm/lib/Support/APFloat.cpp
+++ b/llvm/lib/Support/APFloat.cpp
@@ -14,8 +14,10 @@
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/APSInt.h"
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/FloatingPointMode.h"
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Config/llvm-config.h"
@@ -51,209 +53,303 @@ static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisi
 
 namespace llvm {
 
-  // How the nonfinite values Inf and NaN are represented.
-  enum class fltNonfiniteBehavior {
-    // Represents standard IEEE 754 behavior. A value is nonfinite if the
-    // exponent field is all 1s. In such cases, a value is Inf if the
-    // significand bits are all zero, and NaN otherwise
-    IEEE754,
-
-    // Only the Float8E5M2 has this behavior. There is no Inf representation. A
-    // value is NaN if the exponent field and the mantissa field are all 1s.
-    // This behavior matches the FP8 E4M3 type described in
-    // https://arxiv.org/abs/2209.05433. We treat both signed and unsigned NaNs
-    // as non-signalling, although the paper does not state whether the NaN
-    // values are signalling or not.
-    NanOnly,
-  };
+// How the nonfinite values Inf and NaN are represented.
+enum class fltNonfiniteBehavior {
+  // Represents standard IEEE 754 behavior. A value is nonfinite if the
+  // exponent field is all 1s. In such cases, a value is Inf if the
+  // significand bits are all zero, and NaN otherwise
+  IEEE754,
+
+  // This behavior is present in the Float8ExMyFN* types (Float8E4M3FN,
+  // Float8E5M2FNUZ, Float8E4M3FNUZ, and Float8E4M3B11FNUZ). There is no
+  // representation for Inf, and operations that would ordinarily produce Inf
+  // produce NaN instead.
+  // The details of the NaN representation(s) in this form are determined by the
+  // `fltNanEncoding` enum. We treat all NaNs as quiet, as the available
+  // encodings do not distinguish between signalling and quiet NaN.
+  NanOnly,
+};
 
-  /* Represents floating point arithmetic semantics.  */
-  struct fltSemantics {
-    /* The largest E such that 2^E is representable; this matches the
-       definition of IEEE 754.  */
-    APFloatBase::ExponentType maxExponent;
+// How NaN values are represented. This is curently only used in combination
+// with fltNonfiniteBehavior::NanOnly, and using a variant other than IEEE
+// while having IEEE non-finite behavior is liable to lead to unexpected
+// results.
+enum class fltNanEncoding {
+  // Represents the standard IEEE behavior where a value is NaN if its
+  // exponent is all 1s and the significand is non-zero.
+  IEEE,
+
+  // Represents the behavior in the Float8E4M3 floating point type where NaN is
+  // represented by having the exponent and mantissa set to all 1s.
+  // This behavior matches the FP8 E4M3 type described in
+  // https://arxiv.org/abs/2209.05433. We treat both signed and unsigned NaNs
+  // as non-signalling, although the paper does not state whether the NaN
+  // values are signalling or not.
+  AllOnes,
+
+  // Represents the behavior in Float8E{5,4}E{2,3}FNUZ floating point types
+  // where NaN is represented by a sign bit of 1 and all 0s in the exponent
+  // and mantissa (i.e. the negative zero encoding in a IEEE float). Since
+  // there is only one NaN value, it is treated as quiet NaN. This matches the
+  // behavior described in https://arxiv.org/abs/2206.02915 .
+  NegativeZero,
+};
 
-    /* The smallest E such that 2^E is a normalized number; this
-       matches the definition of IEEE 754.  */
-    APFloatBase::ExponentType minExponent;
+/* Represents floating point arithmetic semantics.  */
+struct fltSemantics {
+  /* The largest E such that 2^E is representable; this matches the
+     definition of IEEE 754.  */
+  APFloatBase::ExponentType maxExponent;
 
-    /* Number of bits in the significand.  This includes the integer
-       bit.  */
-    unsigned int precision;
+  /* The smallest E such that 2^E is a normalized number; this
+     matches the definition of IEEE 754.  */
+  APFloatBase::ExponentType minExponent;
 
-    /* Number of bits actually used in the semantics. */
-    unsigned int sizeInBits;
+  /* Number of bits in the significand.  This includes the integer
+     bit.  */
+  unsigned int precision;
 
-    fltNonfiniteBehavior nonFiniteBehavior = fltNonfiniteBehavior::IEEE754;
+  /* Number of bits actually used in the semantics. */
+  unsigned int sizeInBits;
 
-    // Returns true if any number described by this semantics can be precisely
-    // represented by the specified semantics. Does not take into account
-    // the value of fltNonfiniteBehavior.
-    bool isRepresentableBy(const fltSemantics &S) const {
-      return maxExponent <= S.maxExponent && minExponent >= S.minExponent &&
-             precision <= S.precision;
-    }
-  };
+  fltNonfiniteBehavior nonFiniteBehavior = fltNonfiniteBehavior::IEEE754;
 
-  static const fltSemantics semIEEEhalf = {15, -14, 11, 16};
-  static const fltSemantics semBFloat = {127, -126, 8, 16};
-  static const fltSemantics semIEEEsingle = {127, -126, 24, 32};
-  static const fltSemantics semIEEEdouble = {1023, -1022, 53, 64};
-  static const fltSemantics semIEEEquad = {16383, -16382, 113, 128};
-  static const fltSemantics semFloat8E5M2 = {15, -14, 3, 8};
-  static const fltSemantics semFloat8E4M3FN = {8, -6, 4, 8,
-                                               fltNonfiniteBehavior::NanOnly};
-  static const fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80};
-  static const fltSemantics semBogus = {0, 0, 0, 0};
-
-  /* The IBM double-double semantics. Such a number consists of a pair of IEEE
-     64-bit doubles (Hi, Lo), where |Hi| > |Lo|, and if normal,
-     (double)(Hi + Lo) == Hi. The numeric value it's modeling is Hi + Lo.
-     Therefore it has two 53-bit mantissa parts that aren't necessarily adjacent
-     to each other, and two 11-bit exponents.
-
-     Note: we need to make the value different from semBogus as otherwise
-     an unsafe optimization may collapse both values to a single address,
-     and we heavily rely on them having distinct addresses.             */
-  static const fltSemantics semPPCDoubleDouble = {-1, 0, 0, 128};
-
-  /* These are legacy semantics for the fallback, inaccrurate implementation of
-     IBM double-double, if the accurate semPPCDoubleDouble doesn't handle the
-     operation. It's equivalent to having an IEEE number with consecutive 106
-     bits of mantissa and 11 bits of exponent.
-
-     It's not equivalent to IBM double-double. For example, a legit IBM
-     double-double, 1 + epsilon:
-
-       1 + epsilon = 1 + (1 >> 1076)
-
-     is not representable by a consecutive 106 bits of mantissa.
-
-     Currently, these semantics are used in the following way:
-
-       semPPCDoubleDouble -> (IEEEdouble, IEEEdouble) ->
-       (64-bit APInt, 64-bit APInt) -> (128-bit APInt) ->
-       semPPCDoubleDoubleLegacy -> IEEE operations
-
-     We use bitcastToAPInt() to get the bit representation (in APInt) of the
-     underlying IEEEdouble, then use the APInt constructor to construct the
-     legacy IEEE float.
-
-     TODO: Implement all operations in semPPCDoubleDouble, and delete these
-     semantics.  */
-  static const fltSemantics semPPCDoubleDoubleLegacy = {1023, -1022 + 53,
-                                                        53 + 53, 128};
-
-  const llvm::fltSemantics &APFloatBase::EnumToSemantics(Semantics S) {
-    switch (S) {
-    case S_IEEEhalf:
-      return IEEEhalf();
-    case S_BFloat:
-      return BFloat();
-    case S_IEEEsingle:
-      return IEEEsingle();
-    case S_IEEEdouble:
-      return IEEEdouble();
-    case S_IEEEquad:
-      return IEEEquad();
-    case S_PPCDoubleDouble:
-      return PPCDoubleDouble();
-    case S_Float8E5M2:
-      return Float8E5M2();
-    case S_Float8E4M3FN:
-      return Float8E4M3FN();
-    case S_x87DoubleExtended:
-      return x87DoubleExtended();
-    }
-    llvm_unreachable("Unrecognised floating semantics");
-  }
-
-  APFloatBase::Semantics
-  APFloatBase::SemanticsToEnum(const llvm::fltSemantics &Sem) {
-    if (&Sem == &llvm::APFloat::IEEEhalf())
-      return S_IEEEhalf;
-    else if (&Sem == &llvm::APFloat::BFloat())
-      return S_BFloat;
-    else if (&Sem == &llvm::APFloat::IEEEsingle())
-      return S_IEEEsingle;
-    else if (&Sem == &llvm::APFloat::IEEEdouble())
-      return S_IEEEdouble;
-    else if (&Sem == &llvm::APFloat::IEEEquad())
-      return S_IEEEquad;
-    else if (&Sem == &llvm::APFloat::PPCDoubleDouble())
-      return S_PPCDoubleDouble;
-    else if (&Sem == &llvm::APFloat::Float8E5M2())
-      return S_Float8E5M2;
-    else if (&Sem == &llvm::APFloat::Float8E4M3FN())
-      return S_Float8E4M3FN;
-    else if (&Sem == &llvm::APFloat::x87DoubleExtended())
-      return S_x87DoubleExtended;
-    else
-      llvm_unreachable("Unknown floating semantics");
+  fltNanEncoding nanEncoding = fltNanEncoding::IEEE;
+  // Returns true if any number described by this semantics can be precisely
+  // represented by the specified semantics. Does not take into account
+  // the value of fltNonfiniteBehavior.
+  bool isRepresentableBy(const fltSemantics &S) const {
+    return maxExponent <= S.maxExponent && minExponent >= S.minExponent &&
+           precision <= S.precision;
   }
+};
 
-  const fltSemantics &APFloatBase::IEEEhalf() {
-    return semIEEEhalf;
-  }
-  const fltSemantics &APFloatBase::BFloat() {
-    return semBFloat;
-  }
-  const fltSemantics &APFloatBase::IEEEsingle() {
-    return semIEEEsingle;
-  }
-  const fltSemantics &APFloatBase::IEEEdouble() {
-    return semIEEEdouble;
-  }
-  const fltSemantics &APFloatBase::IEEEquad() { return semIEEEquad; }
-  const fltSemantics &APFloatBase::PPCDoubleDouble() {
-    return semPPCDoubleDouble;
-  }
-  const fltSemantics &APFloatBase::Float8E5M2() { return semFloat8E5M2; }
-  const fltSemantics &APFloatBase::Float8E4M3FN() { return semFloat8E4M3FN; }
-  const fltSemantics &APFloatBase::x87DoubleExtended() {
-    return semX87DoubleExtended;
-  }
-  const fltSemantics &APFloatBase::Bogus() { return semBogus; }
+static constexpr fltSemantics semIEEEhalf = {15, -14, 11, 16};
+static constexpr fltSemantics semBFloat = {127, -126, 8, 16};
+static constexpr fltSemantics semIEEEsingle = {127, -126, 24, 32};
+static constexpr fltSemantics semIEEEdouble = {1023, -1022, 53, 64};
+static constexpr fltSemantics semIEEEquad = {16383, -16382, 113, 128};
+static constexpr fltSemantics semFloat8E5M2 = {15, -14, 3, 8};
+static constexpr fltSemantics semFloat8E5M2FNUZ = {
+    15, -15, 3, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero};
+static constexpr fltSemantics semFloat8E4M3FN = {
+    8, -6, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::AllOnes};
+static constexpr fltSemantics semFloat8E4M3FNUZ = {
+    7, -7, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero};
+static constexpr fltSemantics semFloat8E4M3B11FNUZ = {
+    4, -10, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero};
+static constexpr fltSemantics semFloatTF32 = {127, -126, 11, 19};
+static constexpr fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80};
+static constexpr fltSemantics semBogus = {0, 0, 0, 0};
+
+/* The IBM double-double semantics. Such a number consists of a pair of IEEE
+   64-bit doubles (Hi, Lo), where |Hi| > |Lo|, and if normal,
+   (double)(Hi + Lo) == Hi. The numeric value it's modeling is Hi + Lo.
+   Therefore it has two 53-bit mantissa parts that aren't necessarily adjacent
+   to each other, and two 11-bit exponents.
+
+   Note: we need to make the value different from semBogus as otherwise
+   an unsafe optimization may collapse both values to a single address,
+   and we heavily rely on them having distinct addresses.             */
+static constexpr fltSemantics semPPCDoubleDouble = {-1, 0, 0, 128};
+
+/* These are legacy semantics for the fallback, inaccrurate implementation of
+   IBM double-double, if the accurate semPPCDoubleDouble doesn't handle the
+   operation. It's equivalent to having an IEEE number with consecutive 106
+   bits of mantissa and 11 bits of exponent.
+
+   It's not equivalent to IBM double-double. For example, a legit IBM
+   double-double, 1 + epsilon:
+
+     1 + epsilon = 1 + (1 >> 1076)
+
+   is not representable by a consecutive 106 bits of mantissa.
+
+   Currently, these semantics are used in the following way:
+
+     semPPCDoubleDouble -> (IEEEdouble, IEEEdouble) ->
+     (64-bit APInt, 64-bit APInt) -> (128-bit APInt) ->
+     semPPCDoubleDoubleLegacy -> IEEE operations
+
+   We use bitcastToAPInt() to get the bit representation (in APInt) of the
+   underlying IEEEdouble, then use the APInt constructor to construct the
+   legacy IEEE float.
+
+   TODO: Implement all operations in semPPCDoubleDouble, and delete these
+   semantics.  */
+static constexpr fltSemantics semPPCDoubleDoubleLegacy = {1023, -1022 + 53,
+                                                          53 + 53, 128};
+
+const llvm::fltSemantics &APFloatBase::EnumToSemantics(Semantics S) {
+  switch (S) {
+  case S_IEEEhalf:
+    return IEEEhalf();
+  case S_BFloat:
+    return BFloat();
+  case S_IEEEsingle:
+    return IEEEsingle();
+  case S_IEEEdouble:
+    return IEEEdouble();
+  case S_IEEEquad:
+    return IEEEquad();
+  case S_PPCDoubleDouble:
+    return PPCDoubleDouble();
+  case S_Float8E5M2:
+    return Float8E5M2();
+  case S_Float8E5M2FNUZ:
+    return Float8E5M2FNUZ();
+  case S_Float8E4M3FN:
+    return Float8E4M3FN();
+  case S_Float8E4M3FNUZ:
+    return Float8E4M3FNUZ();
+  case S_Float8E4M3B11FNUZ:
+    return Float8E4M3B11FNUZ();
+  case S_FloatTF32:
+    return FloatTF32();
+  case S_x87DoubleExtended:
+    return x87DoubleExtended();
+  }
+  llvm_unreachable("Unrecognised floating semantics");
+}
+
+APFloatBase::Semantics
+APFloatBase::SemanticsToEnum(const llvm::fltSemantics &Sem) {
+  if (&Sem == &llvm::APFloat::IEEEhalf())
+    return S_IEEEhalf;
+  else if (&Sem == &llvm::APFloat::BFloat())
+    return S_BFloat;
+  else if (&Sem == &llvm::APFloat::IEEEsingle())
+    return S_IEEEsingle;
+  else if (&Sem == &llvm::APFloat::IEEEdouble())
+    return S_IEEEdouble;
+  else if (&Sem == &llvm::APFloat::IEEEquad())
+    return S_IEEEquad;
+  else if (&Sem == &llvm::APFloat::PPCDoubleDouble())
+    return S_PPCDoubleDouble;
+  else if (&Sem == &llvm::APFloat::Float8E5M2())
+    return S_Float8E5M2;
+  else if (&Sem == &llvm::APFloat::Float8E5M2FNUZ())
+    return S_Float8E5M2FNUZ;
+  else if (&Sem == &llvm::APFloat::Float8E4M3FN())
+    return S_Float8E4M3FN;
+  else if (&Sem == &llvm::APFloat::Float8E4M3FNUZ())
+    return S_Float8E4M3FNUZ;
+  else if (&Sem == &llvm::APFloat::Float8E4M3B11FNUZ())
+    return S_Float8E4M3B11FNUZ;
+  else if (&Sem == &llvm::APFloat::FloatTF32())
+    return S_FloatTF32;
+  else if (&Sem == &llvm::APFloat::x87DoubleExtended())
+    return S_x87DoubleExtended;
+  else
+    llvm_unreachable("Unknown floating semantics");
+}
+
+const fltSemantics &APFloatBase::IEEEhalf() { return semIEEEhalf; }
+const fltSemantics &APFloatBase::BFloat() { return semBFloat; }
+const fltSemantics &APFloatBase::IEEEsingle() { return semIEEEsingle; }
+const fltSemantics &APFloatBase::IEEEdouble() { return semIEEEdouble; }
+const fltSemantics &APFloatBase::IEEEquad() { return semIEEEquad; }
+const fltSemantics &APFloatBase::PPCDoubleDouble() {
+  return semPPCDoubleDouble;
+}
+const fltSemantics &APFloatBase::Float8E5M2() { return semFloat8E5M2; }
+const fltSemantics &APFloatBase::Float8E5M2FNUZ() { return semFloat8E5M2FNUZ; }
+const fltSemantics &APFloatBase::Float8E4M3FN() { return semFloat8E4M3FN; }
+const fltSemantics &APFloatBase::Float8E4M3FNUZ() { return semFloat8E4M3FNUZ; }
+const fltSemantics &APFloatBase::Float8E4M3B11FNUZ() {
+  return semFloat8E4M3B11FNUZ;
+}
+const fltSemantics &APFloatBase::FloatTF32() { return semFloatTF32; }
+const fltSemantics &APFloatBase::x87DoubleExtended() {
+  return semX87DoubleExtended;
+}
+const fltSemantics &APFloatBase::Bogus() { return semBogus; }
+
+constexpr RoundingMode APFloatBase::rmNearestTiesToEven;
+constexpr RoundingMode APFloatBase::rmTowardPositive;
+constexpr RoundingMode APFloatBase::rmTowardNegative;
+constexpr RoundingMode APFloatBase::rmTowardZero;
+constexpr RoundingMode APFloatBase::rmNearestTiesToAway;
+
+/* A tight upper bound on number of parts required to hold the value
+   pow(5, power) is
+
+     power * 815 / (351 * integerPartWidth) + 1
+
+   However, whilst the result may require only this many parts,
+   because we are multiplying two values to get it, the
+   multiplication may require an extra part with the excess part
+   being zero (consider the trivial case of 1 * 1, tcFullMultiply
+   requires two parts to hold the single-part result).  So we add an
+   extra one to guarantee enough space whilst multiplying.  */
+const unsigned int maxExponent = 16383;
+const unsigned int maxPrecision = 113;
+const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
+const unsigned int maxPowerOfFiveParts =
+    2 +
+    ((maxPowerOfFiveExponent * 815) / (351 * APFloatBase::integerPartWidth));
+
+unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) {
+  return semantics.precision;
+}
+APFloatBase::ExponentType
+APFloatBase::semanticsMaxExponent(const fltSemantics &semantics) {
+  return semantics.maxExponent;
+}
+APFloatBase::ExponentType
+APFloatBase::semanticsMinExponent(const fltSemantics &semantics) {
+  return semantics.minExponent;
+}
+unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) {
+  return semantics.sizeInBits;
+}
+unsigned int APFloatBase::semanticsIntSizeInBits(const fltSemantics &semantics,
+                                                 bool isSigned) {
+  // The max FP value is pow(2, MaxExponent) * (1 + MaxFraction), so we need
+  // at least one more bit than the MaxExponent to hold the max FP value.
+  unsigned int MinBitWidth = semanticsMaxExponent(semantics) + 1;
+  // Extra sign bit needed.
+  if (isSigned)
+    ++MinBitWidth;
+  return MinBitWidth;
+}
+
+bool APFloatBase::isRepresentableAsNormalIn(const fltSemantics &Src,
+                                            const fltSemantics &Dst) {
+  // Exponent range must be larger.
+  if (Src.maxExponent >= Dst.maxExponent || Src.minExponent <= Dst.minExponent)
+    return false;
 
-  constexpr RoundingMode APFloatBase::rmNearestTiesToEven;
-  constexpr RoundingMode APFloatBase::rmTowardPositive;
-  constexpr RoundingMode APFloatBase::rmTowardNegative;
-  constexpr RoundingMode APFloatBase::rmTowardZero;
-  constexpr RoundingMode APFloatBase::rmNearestTiesToAway;
+  // If the mantissa is long enough, the result value could still be denormal
+  // with a larger exponent range.
+  //
+  // FIXME: This condition is probably not accurate but also shouldn't be a
+  // practical concern with existing types.
+  return Dst.precision >= Src.precision;
+}
 
-  /* A tight upper bound on number of parts required to hold the value
-     pow(5, power) is
+unsigned APFloatBase::getSizeInBits(const fltSemantics &Sem) {
+  return Sem.sizeInBits;
+}
 
-       power * 815 / (351 * integerPartWidth) + 1
+static constexpr APFloatBase::ExponentType
+exponentZero(const fltSemantics &semantics) {
+  return semantics.minExponent - 1;
+}
 
-     However, whilst the result may require only this many parts,
-     because we are multiplying two values to get it, the
-     multiplication may require an extra part with the excess part
-     being zero (consider the trivial case of 1 * 1, tcFullMultiply
-     requires two parts to hold the single-part result).  So we add an
-     extra one to guarantee enough space whilst multiplying.  */
-  const unsigned int maxExponent = 16383;
-  const unsigned int maxPrecision = 113;
-  const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
-  const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815) / (351 * APFloatBase::integerPartWidth));
+static constexpr APFloatBase::ExponentType
+exponentInf(const fltSemantics &semantics) {
+  return semantics.maxExponent + 1;
+}
 
-  unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) {
-    return semantics.precision;
-  }
-  APFloatBase::ExponentType
-  APFloatBase::semanticsMaxExponent(const fltSemantics &semantics) {
+static constexpr APFloatBase::ExponentType
+exponentNaN(const fltSemantics &semantics) {
+  if (semantics.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
+    if (semantics.nanEncoding == fltNanEncoding::NegativeZero)
+      return exponentZero(semantics);
     return semantics.maxExponent;
   }
-  APFloatBase::ExponentType
-  APFloatBase::semanticsMinExponent(const fltSemantics &semantics) {
-    return semantics.minExponent;
-  }
-  unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) {
-    return semantics.sizeInBits;
-  }
-
-  unsigned APFloatBase::getSizeInBits(const fltSemantics &Sem) {
-    return Sem.sizeInBits;
+  return semantics.maxExponent + 1;
 }
 
 /* A bunch of private, handy routines.  */
@@ -262,9 +358,7 @@ static inline Error createError(const Twine &Err) {
   return make_error<StringError>(Err, inconvertibleErrorCode());
 }
 
-static inline unsigned int
-partCountForBits(unsigned int bits)
-{
+static constexpr inline unsigned int partCountForBits(unsigned int bits) {
   return ((bits) + APFloatBase::integerPartWidth - 1) / APFloatBase::integerPartWidth;
 }
 
@@ -509,7 +603,7 @@ trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
 
   /* If we ran off the end it is exactly zero or one-half, otherwise
      a little more.  */
-  if (hexDigit == -1U)
+  if (hexDigit == UINT_MAX)
     return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
   else
     return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
@@ -526,7 +620,7 @@ lostFractionThroughTruncation(const APFloatBase::integerPart *parts,
 
   lsb = APInt::tcLSB(parts, partCount);
 
-  /* Note this is guaranteed true if bits == 0, or LSB == -1U.  */
+  /* Note this is guaranteed true if bits == 0, or LSB == UINT_MAX.  */
   if (bits <= lsb)
     return lfExactlyZero;
   if (bits == lsb + 1)
@@ -798,10 +892,15 @@ void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) {
 
   APInt fill_storage;
   if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
-    // The only NaN representation is where the mantissa is all 1s, which is
-    // non-signalling.
+    // Finite-only types do not distinguish signalling and quiet NaN, so
+    // make them all signalling.
     SNaN = false;
-    fill_storage = APInt::getAllOnes(semantics->precision - 1);
+    if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
+      sign = true;
+      fill_storage = APInt::getZero(semantics->precision - 1);
+    } else {
+      fill_storage = APInt::getAllOnes(semantics->precision - 1);
+    }
     fill = &fill_storage;
   }
 
@@ -832,6 +931,9 @@ void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) {
     // conventionally, this is the next bit down from the QNaN bit.
     if (APInt::tcIsZero(significand, numParts))
       APInt::tcSetBit(significand, QNaNBit - 1);
+  } else if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
+    // The only NaN is a quiet NaN, and it has no bits sets in the significand.
+    // Do nothing.
   } else {
     // We always have to set the QNaN bit to make it a QNaN.
     APInt::tcSetBit(significand, QNaNBit);
@@ -976,7 +1078,8 @@ bool IEEEFloat::isSignificandAllZerosExceptMSB() const {
 }
 
 bool IEEEFloat::isLargest() const {
-  if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
+  if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
+      semantics->nanEncoding == fltNanEncoding::AllOnes) {
     // The largest number by magnitude in our format will be the floating point
     // number with maximum exponent and with significand that is all ones except
     // the LSB.
@@ -1418,7 +1521,8 @@ IEEEFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) {
   exponent = semantics->maxExponent;
   tcSetLeastSignificantBits(significandParts(), partCount(),
                             semantics->precision);
-  if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
+  if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
+      semantics->nanEncoding == fltNanEncoding::AllOnes)
     APInt::tcClearBit(significandParts(), 0);
 
   return opInexact;
@@ -1519,7 +1623,10 @@ IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,
     }
   }
 
+  // The all-ones values is an overflow if NaN is all ones. If NaN is
+  // represented by negative zero, then it is a valid finite value.
   if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
+      semantics->nanEncoding == fltNanEncoding::AllOnes &&
       exponent == semantics->maxExponent && isSignificandAllOnes())
     return handleOverflow(rounding_mode);
 
@@ -1530,8 +1637,11 @@ IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,
      underflow for exact results.  */
   if (lost_fraction == lfExactlyZero) {
     /* Canonicalize zeroes.  */
-    if (omsb == 0)
+    if (omsb == 0) {
       category = fcZero;
+      if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
+        sign = false;
+    }
 
     return opOK;
   }
@@ -1549,18 +1659,22 @@ IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,
       /* Renormalize by incrementing the exponent and shifting our
          significand right one.  However if we already have the
          maximum exponent we overflow to infinity.  */
-      if (exponent == semantics->maxExponent) {
-        category = fcInfinity;
-
-        return (opStatus) (opOverflow | opInexact);
-      }
+      if (exponent == semantics->maxExponent)
+        // Invoke overflow handling with a rounding mode that will guarantee
+        // that the result gets turned into the correct infinity representation.
+        // This is needed instead of just setting the category to infinity to
+        // account for 8-bit floating point types that have no inf, only NaN.
+        return handleOverflow(sign ? rmTowardNegative : rmTowardPositive);
 
       shiftSignificandRight(1);
 
       return opInexact;
     }
 
+    // The all-ones values is an overflow if NaN is all ones. If NaN is
+    // represented by negative zero, then it is a valid finite value.
     if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
+        semantics->nanEncoding == fltNanEncoding::AllOnes &&
         exponent == semantics->maxExponent && isSignificandAllOnes())
       return handleOverflow(rounding_mode);
   }
@@ -1574,8 +1688,11 @@ IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,
   assert(omsb < semantics->precision);
 
   /* Canonicalize zeroes.  */
-  if (omsb == 0)
+  if (omsb == 0) {
     category = fcZero;
+    if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
+      sign = false;
+  }
 
   /* The fcZero case is a denormal that underflowed to zero.  */
   return (opStatus) (opUnderflow | opInexact);
@@ -1877,6 +1994,11 @@ IEEEFloat::opStatus IEEEFloat::remainderSpecials(const IEEEFloat &rhs) {
 
 /* Change sign.  */
 void IEEEFloat::changeSign() {
+  // With NaN-as-negative-zero, neither NaN or negative zero can change
+  // their signs.
+  if (semantics->nanEncoding == fltNanEncoding::NegativeZero &&
+      (isZero() || isNaN()))
+    return;
   /* Look mummy, this one's easy.  */
   sign = !sign;
 }
@@ -1906,6 +2028,9 @@ IEEEFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs,
   if (category == fcZero) {
     if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
       sign = (rounding_mode == rmTowardNegative);
+    // NaN-in-negative-zero means zeros need to be normalized to +0.
+    if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
+      sign = false;
   }
 
   return fs;
@@ -1931,6 +2056,8 @@ IEEEFloat::opStatus IEEEFloat::multiply(const IEEEFloat &rhs,
   sign ^= rhs.sign;
   fs = multiplySpecials(rhs);
 
+  if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero)
+    sign = false;
   if (isFiniteNonZero()) {
     lostFraction lost_fraction = multiplySignificand(rhs);
     fs = normalize(rounding_mode, lost_fraction);
@@ -1949,6 +2076,8 @@ IEEEFloat::opStatus IEEEFloat::divide(const IEEEFloat &rhs,
   sign ^= rhs.sign;
   fs = divideSpecials(rhs);
 
+  if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero)
+    sign = false;
   if (isFiniteNonZero()) {
     lostFraction lost_fraction = divideSignificand(rhs);
     fs = normalize(rounding_mode, lost_fraction);
@@ -2057,8 +2186,13 @@ IEEEFloat::opStatus IEEEFloat::remainder(const IEEEFloat &rhs) {
     }
   }
 
-  if (isZero())
+  if (isZero()) {
     sign = origSign;    // IEEE754 requires this
+    if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
+      // But some 8-bit floats only have positive 0.
+      sign = false;
+  }
+
   else
     sign ^= origSign;
   return fs;
@@ -2083,8 +2217,11 @@ IEEEFloat::opStatus IEEEFloat::mod(const IEEEFloat &rhs) {
     fs = subtract(V, rmNearestTiesToEven);
     assert(fs==opOK);
   }
-  if (isZero())
+  if (isZero()) {
     sign = origSign; // fmod requires this
+    if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
+      sign = false;
+  }
   return fs;
 }
 
@@ -2112,8 +2249,11 @@ IEEEFloat::opStatus IEEEFloat::fusedMultiplyAdd(const IEEEFloat &multiplicand,
     /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
        positive zero unless rounding to minus infinity, except that
        adding two like-signed zeroes gives that zero.  */
-    if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign)
+    if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign) {
       sign = (rounding_mode == rmTowardNegative);
+      if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
+        sign = false;
+    }
   } else {
     fs = multiplySpecials(multiplicand);
 
@@ -2389,6 +2529,12 @@ IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics,
       return is_signaling ? opInvalidOp : opOK;
     }
 
+    // If NaN is negative zero, we need to create a new NaN to avoid converting
+    // NaN to -Inf.
+    if (fromSemantics.nanEncoding == fltNanEncoding::NegativeZero &&
+        semantics->nanEncoding != fltNanEncoding::NegativeZero)
+      makeNaN(false, false);
+
     *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
 
     // For x87 extended precision, we want to make a NaN, not a special NaN if
@@ -2410,6 +2556,14 @@ IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics,
     makeNaN(false, sign);
     *losesInfo = true;
     fs = opInexact;
+  } else if (category == fcZero &&
+             semantics->nanEncoding == fltNanEncoding::NegativeZero) {
+    // Negative zero loses info, but positive zero doesn't.
+    *losesInfo =
+        fromSemantics.nanEncoding != fltNanEncoding::NegativeZero && sign;
+    fs = *losesInfo ? opInexact : opOK;
+    // NaN is negative zero means -0 -> +0, which can lose information
+    sign = false;
   } else {
     *losesInfo = false;
     fs = opOK;
@@ -2696,7 +2850,7 @@ IEEEFloat::convertFromHexadecimalString(StringRef s,
     }
 
     hex_value = hexDigitValue(*p);
-    if (hex_value == -1U)
+    if (hex_value == UINT_MAX)
       break;
 
     p++;
@@ -2877,9 +3031,11 @@ IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) {
   if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) {
     category = fcZero;
     fs = opOK;
+    if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
+      sign = false;
 
-  /* Check whether the normalized exponent is high enough to overflow
-     max during the log-rebasing in the max-exponent check below. */
+    /* Check whether the normalized exponent is high enough to overflow
+       max during the log-rebasing in the max-exponent check below. */
   } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
     fs = handleOverflow(rounding_mode);
 
@@ -3337,201 +3493,121 @@ APInt IEEEFloat::convertPPCDoubleDoubleAPFloatToAPInt() const {
   return APInt(128, words);
 }
 
-APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const {
-  assert(semantics == (const llvm::fltSemantics*)&semIEEEquad);
-  assert(partCount()==2);
+template <const fltSemantics &S>
+APInt IEEEFloat::convertIEEEFloatToAPInt() const {
+  assert(semantics == &S);
+
+  constexpr int bias = -(S.minExponent - 1);
+  constexpr unsigned int trailing_significand_bits = S.precision - 1;
+  constexpr int integer_bit_part = trailing_significand_bits / integerPartWidth;
+  constexpr integerPart integer_bit =
+      integerPart{1} << (trailing_significand_bits % integerPartWidth);
+  constexpr uint64_t significand_mask = integer_bit - 1;
+  constexpr unsigned int exponent_bits =
+      S.sizeInBits - 1 - trailing_significand_bits;
+  static_assert(exponent_bits < 64);
+  constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1;
 
-  uint64_t myexponent, mysignificand, mysignificand2;
+  uint64_t myexponent;
+  std::array<integerPart, partCountForBits(trailing_significand_bits)>
+      mysignificand;
 
   if (isFiniteNonZero()) {
-    myexponent = exponent+16383; //bias
-    mysignificand = significandParts()[0];
-    mysignificand2 = significandParts()[1];
-    if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL))
-      myexponent = 0;   // denormal
-  } else if (category==fcZero) {
-    myexponent = 0;
-    mysignificand = mysignificand2 = 0;
-  } else if (category==fcInfinity) {
-    myexponent = 0x7fff;
-    mysignificand = mysignificand2 = 0;
+    myexponent = exponent + bias;
+    std::copy_n(significandParts(), mysignificand.size(),
+                mysignificand.begin());
+    if (myexponent == 1 &&
+        !(significandParts()[integer_bit_part] & integer_bit))
+      myexponent = 0; // denormal
+  } else if (category == fcZero) {
+    myexponent = ::exponentZero(S) + bias;
+    mysignificand.fill(0);
+  } else if (category == fcInfinity) {
+    if (S.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
+      llvm_unreachable("semantics don't support inf!");
+    }
+    myexponent = ::exponentInf(S) + bias;
+    mysignificand.fill(0);
   } else {
     assert(category == fcNaN && "Unknown category!");
-    myexponent = 0x7fff;
-    mysignificand = significandParts()[0];
-    mysignificand2 = significandParts()[1];
-  }
-
-  uint64_t words[2];
-  words[0] = mysignificand;
-  words[1] = ((uint64_t)(sign & 1) << 63) |
-             ((myexponent & 0x7fff) << 48) |
-             (mysignificand2 & 0xffffffffffffLL);
+    myexponent = ::exponentNaN(S) + bias;
+    std::copy_n(significandParts(), mysignificand.size(),
+                mysignificand.begin());
+  }
+  std::array<uint64_t, (S.sizeInBits + 63) / 64> words;
+  auto words_iter =
+      std::copy_n(mysignificand.begin(), mysignificand.size(), words.begin());
+  if constexpr (significand_mask != 0) {
+    // Clear the integer bit.
+    words[mysignificand.size() - 1] &= significand_mask;
+  }
+  std::fill(words_iter, words.end(), uint64_t{0});
+  constexpr size_t last_word = words.size() - 1;
+  uint64_t shifted_sign = static_cast<uint64_t>(sign & 1)
+                          << ((S.sizeInBits - 1) % 64);
+  words[last_word] |= shifted_sign;
+  uint64_t shifted_exponent = (myexponent & exponent_mask)
+                              << (trailing_significand_bits % 64);
+  words[last_word] |= shifted_exponent;
+  if constexpr (last_word == 0) {
+    return APInt(S.sizeInBits, words[0]);
+  }
+  return APInt(S.sizeInBits, words);
+}
 
-  return APInt(128, words);
+APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const {
+  assert(partCount() == 2);
+  return convertIEEEFloatToAPInt<semIEEEquad>();
 }
 
 APInt IEEEFloat::convertDoubleAPFloatToAPInt() const {
-  assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble);
   assert(partCount()==1);
-
-  uint64_t myexponent, mysignificand;
-
-  if (isFiniteNonZero()) {
-    myexponent = exponent+1023; //bias
-    mysignificand = *significandParts();
-    if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
-      myexponent = 0;   // denormal
-  } else if (category==fcZero) {
-    myexponent = 0;
-    mysignificand = 0;
-  } else if (category==fcInfinity) {
-    myexponent = 0x7ff;
-    mysignificand = 0;
-  } else {
-    assert(category == fcNaN && "Unknown category!");
-    myexponent = 0x7ff;
-    mysignificand = *significandParts();
-  }
-
-  return APInt(64, ((((uint64_t)(sign & 1) << 63) |
-                     ((myexponent & 0x7ff) <<  52) |
-                     (mysignificand & 0xfffffffffffffLL))));
+  return convertIEEEFloatToAPInt<semIEEEdouble>();
 }
 
 APInt IEEEFloat::convertFloatAPFloatToAPInt() const {
-  assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle);
   assert(partCount()==1);
-
-  uint32_t myexponent, mysignificand;
-
-  if (isFiniteNonZero()) {
-    myexponent = exponent+127; //bias
-    mysignificand = (uint32_t)*significandParts();
-    if (myexponent == 1 && !(mysignificand & 0x800000))
-      myexponent = 0;   // denormal
-  } else if (category==fcZero) {
-    myexponent = 0;
-    mysignificand = 0;
-  } else if (category==fcInfinity) {
-    myexponent = 0xff;
-    mysignificand = 0;
-  } else {
-    assert(category == fcNaN && "Unknown category!");
-    myexponent = 0xff;
-    mysignificand = (uint32_t)*significandParts();
-  }
-
-  return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |
-                    (mysignificand & 0x7fffff)));
+  return convertIEEEFloatToAPInt<semIEEEsingle>();
 }
 
 APInt IEEEFloat::convertBFloatAPFloatToAPInt() const {
-  assert(semantics == (const llvm::fltSemantics *)&semBFloat);
   assert(partCount() == 1);
-
-  uint32_t myexponent, mysignificand;
-
-  if (isFiniteNonZero()) {
-    myexponent = exponent + 127; // bias
-    mysignificand = (uint32_t)*significandParts();
-    if (myexponent == 1 && !(mysignificand & 0x80))
-      myexponent = 0; // denormal
-  } else if (category == fcZero) {
-    myexponent = 0;
-    mysignificand = 0;
-  } else if (category == fcInfinity) {
-    myexponent = 0xff;
-    mysignificand = 0;
-  } else {
-    assert(category == fcNaN && "Unknown category!");
-    myexponent = 0xff;
-    mysignificand = (uint32_t)*significandParts();
-  }
-
-  return APInt(16, (((sign & 1) << 15) | ((myexponent & 0xff) << 7) |
-                    (mysignificand & 0x7f)));
+  return convertIEEEFloatToAPInt<semBFloat>();
 }
 
 APInt IEEEFloat::convertHalfAPFloatToAPInt() const {
-  assert(semantics == (const llvm::fltSemantics*)&semIEEEhalf);
   assert(partCount()==1);
-
-  uint32_t myexponent, mysignificand;
-
-  if (isFiniteNonZero()) {
-    myexponent = exponent+15; //bias
-    mysignificand = (uint32_t)*significandParts();
-    if (myexponent == 1 && !(mysignificand & 0x400))
-      myexponent = 0;   // denormal
-  } else if (category==fcZero) {
-    myexponent = 0;
-    mysignificand = 0;
-  } else if (category==fcInfinity) {
-    myexponent = 0x1f;
-    mysignificand = 0;
-  } else {
-    assert(category == fcNaN && "Unknown category!");
-    myexponent = 0x1f;
-    mysignificand = (uint32_t)*significandParts();
-  }
-
-  return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) |
-                    (mysignificand & 0x3ff)));
+  return convertIEEEFloatToAPInt<semIEEEhalf>();
 }
 
 APInt IEEEFloat::convertFloat8E5M2APFloatToAPInt() const {
-  assert(semantics == (const llvm::fltSemantics *)&semFloat8E5M2);
   assert(partCount() == 1);
+  return convertIEEEFloatToAPInt<semFloat8E5M2>();
+}
 
-  uint32_t myexponent, mysignificand;
-
-  if (isFiniteNonZero()) {
-    myexponent = exponent + 15; // bias
-    mysignificand = (uint32_t)*significandParts();
-    if (myexponent == 1 && !(mysignificand & 0x4))
-      myexponent = 0; // denormal
-  } else if (category == fcZero) {
-    myexponent = 0;
-    mysignificand = 0;
-  } else if (category == fcInfinity) {
-    myexponent = 0x1f;
-    mysignificand = 0;
-  } else {
-    assert(category == fcNaN && "Unknown category!");
-    myexponent = 0x1f;
-    mysignificand = (uint32_t)*significandParts();
-  }
-
-  return APInt(8, (((sign & 1) << 7) | ((myexponent & 0x1f) << 2) |
-                   (mysignificand & 0x3)));
+APInt IEEEFloat::convertFloat8E5M2FNUZAPFloatToAPInt() const {
+  assert(partCount() == 1);
+  return convertIEEEFloatToAPInt<semFloat8E5M2FNUZ>();
 }
 
 APInt IEEEFloat::convertFloat8E4M3FNAPFloatToAPInt() const {
-  assert(semantics == (const llvm::fltSemantics *)&semFloat8E4M3FN);
   assert(partCount() == 1);
+  return convertIEEEFloatToAPInt<semFloat8E4M3FN>();
+}
 
-  uint32_t myexponent, mysignificand;
+APInt IEEEFloat::convertFloat8E4M3FNUZAPFloatToAPInt() const {
+  assert(partCount() == 1);
+  return convertIEEEFloatToAPInt<semFloat8E4M3FNUZ>();
+}
 
-  if (isFiniteNonZero()) {
-    myexponent = exponent + 7; // bias
-    mysignificand = (uint32_t)*significandParts();
-    if (myexponent == 1 && !(mysignificand & 0x8))
-      myexponent = 0; // denormal
-  } else if (category == fcZero) {
-    myexponent = 0;
-    mysignificand = 0;
-  } else if (category == fcInfinity) {
-    myexponent = 0xf;
-    mysignificand = 0;
-  } else {
-    assert(category == fcNaN && "Unknown category!");
-    myexponent = 0xf;
-    mysignificand = (uint32_t)*significandParts();
-  }
+APInt IEEEFloat::convertFloat8E4M3B11FNUZAPFloatToAPInt() const {
+  assert(partCount() == 1);
+  return convertIEEEFloatToAPInt<semFloat8E4M3B11FNUZ>();
+}
 
-  return APInt(8, (((sign & 1) << 7) | ((myexponent & 0xf) << 3) |
-                   (mysignificand & 0x7)));
+APInt IEEEFloat::convertFloatTF32APFloatToAPInt() const {
+  assert(partCount() == 1);
+  return convertIEEEFloatToAPInt<semFloatTF32>();
 }
 
 // This function creates an APInt that is just a bit map of the floating
@@ -3560,9 +3636,21 @@ APInt IEEEFloat::bitcastToAPInt() const {
   if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2)
     return convertFloat8E5M2APFloatToAPInt();
 
+  if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2FNUZ)
+    return convertFloat8E5M2FNUZAPFloatToAPInt();
+
   if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FN)
     return convertFloat8E4M3FNAPFloatToAPInt();
 
+  if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FNUZ)
+    return convertFloat8E4M3FNUZAPFloatToAPInt();
+
+  if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3B11FNUZ)
+    return convertFloat8E4M3B11FNUZAPFloatToAPInt();
+
+  if (semantics == (const llvm::fltSemantics *)&semFloatTF32)
+    return convertFloatTF32APFloatToAPInt();
+
   assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended &&
          "unknown format!");
   return convertF80LongDoubleAPFloatToAPInt();
@@ -3643,205 +3731,131 @@ void IEEEFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) {
   }
 }
 
-void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) {
-  uint64_t i1 = api.getRawData()[0];
-  uint64_t i2 = api.getRawData()[1];
-  uint64_t myexponent = (i2 >> 48) & 0x7fff;
-  uint64_t mysignificand  = i1;
-  uint64_t mysignificand2 = i2 & 0xffffffffffffLL;
+template <const fltSemantics &S>
+void IEEEFloat::initFromIEEEAPInt(const APInt &api) {
+  assert(api.getBitWidth() == S.sizeInBits);
+  constexpr integerPart integer_bit = integerPart{1}
+                                      << ((S.precision - 1) % integerPartWidth);
+  constexpr uint64_t significand_mask = integer_bit - 1;
+  constexpr unsigned int trailing_significand_bits = S.precision - 1;
+  constexpr unsigned int stored_significand_parts =
+      partCountForBits(trailing_significand_bits);
+  constexpr unsigned int exponent_bits =
+      S.sizeInBits - 1 - trailing_significand_bits;
+  static_assert(exponent_bits < 64);
+  constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1;
+  constexpr int bias = -(S.minExponent - 1);
 
-  initialize(&semIEEEquad);
-  assert(partCount()==2);
-
-  sign = static_cast<unsigned int>(i2>>63);
-  if (myexponent==0 &&
-      (mysignificand==0 && mysignificand2==0)) {
-    makeZero(sign);
-  } else if (myexponent==0x7fff &&
-             (mysignificand==0 && mysignificand2==0)) {
-    makeInf(sign);
-  } else if (myexponent==0x7fff &&
-             (mysignificand!=0 || mysignificand2 !=0)) {
-    category = fcNaN;
-    exponent = exponentNaN();
-    significandParts()[0] = mysignificand;
-    significandParts()[1] = mysignificand2;
-  } else {
-    category = fcNormal;
-    exponent = myexponent - 16383;
-    significandParts()[0] = mysignificand;
-    significandParts()[1] = mysignificand2;
-    if (myexponent==0)          // denormal
-      exponent = -16382;
-    else
-      significandParts()[1] |= 0x1000000000000LL;  // integer bit
+  // Copy the bits of the significand. We need to clear out the exponent and
+  // sign bit in the last word.
+  std::array<integerPart, stored_significand_parts> mysignificand;
+  std::copy_n(api.getRawData(), mysignificand.size(), mysignificand.begin());
+  if constexpr (significand_mask != 0) {
+    mysignificand[mysignificand.size() - 1] &= significand_mask;
   }
-}
 
-void IEEEFloat::initFromDoubleAPInt(const APInt &api) {
-  uint64_t i = *api.getRawData();
-  uint64_t myexponent = (i >> 52) & 0x7ff;
-  uint64_t mysignificand = i & 0xfffffffffffffLL;
+  // We assume the last word holds the sign bit, the exponent, and potentially
+  // some of the trailing significand field.
+  uint64_t last_word = api.getRawData()[api.getNumWords() - 1];
+  uint64_t myexponent =
+      (last_word >> (trailing_significand_bits % 64)) & exponent_mask;
 
-  initialize(&semIEEEdouble);
-  assert(partCount()==1);
+  initialize(&S);
+  assert(partCount() == mysignificand.size());
 
-  sign = static_cast<unsigned int>(i>>63);
-  if (myexponent==0 && mysignificand==0) {
-    makeZero(sign);
-  } else if (myexponent==0x7ff && mysignificand==0) {
-    makeInf(sign);
-  } else if (myexponent==0x7ff && mysignificand!=0) {
-    category = fcNaN;
-    exponent = exponentNaN();
-    *significandParts() = mysignificand;
-  } else {
-    category = fcNormal;
-    exponent = myexponent - 1023;
-    *significandParts() = mysignificand;
-    if (myexponent==0)          // denormal
-      exponent = -1022;
-    else
-      *significandParts() |= 0x10000000000000LL;  // integer bit
-  }
-}
+  sign = static_cast<unsigned int>(last_word >> ((S.sizeInBits - 1) % 64));
 
-void IEEEFloat::initFromFloatAPInt(const APInt &api) {
-  uint32_t i = (uint32_t)*api.getRawData();
-  uint32_t myexponent = (i >> 23) & 0xff;
-  uint32_t mysignificand = i & 0x7fffff;
+  bool all_zero_significand =
+      llvm::all_of(mysignificand, [](integerPart bits) { return bits == 0; });
 
-  initialize(&semIEEEsingle);
-  assert(partCount()==1);
+  bool is_zero = myexponent == 0 && all_zero_significand;
 
-  sign = i >> 31;
-  if (myexponent==0 && mysignificand==0) {
-    makeZero(sign);
-  } else if (myexponent==0xff && mysignificand==0) {
-    makeInf(sign);
-  } else if (myexponent==0xff && mysignificand!=0) {
-    category = fcNaN;
-    exponent = exponentNaN();
-    *significandParts() = mysignificand;
-  } else {
-    category = fcNormal;
-    exponent = myexponent - 127;  //bias
-    *significandParts() = mysignificand;
-    if (myexponent==0)    // denormal
-      exponent = -126;
-    else
-      *significandParts() |= 0x800000; // integer bit
+  if constexpr (S.nonFiniteBehavior == fltNonfiniteBehavior::IEEE754) {
+    if (myexponent - bias == ::exponentInf(S) && all_zero_significand) {
+      makeInf(sign);
+      return;
+    }
   }
-}
 
-void IEEEFloat::initFromBFloatAPInt(const APInt &api) {
-  uint32_t i = (uint32_t)*api.getRawData();
-  uint32_t myexponent = (i >> 7) & 0xff;
-  uint32_t mysignificand = i & 0x7f;
+  bool is_nan = false;
 
-  initialize(&semBFloat);
-  assert(partCount() == 1);
+  if constexpr (S.nanEncoding == fltNanEncoding::IEEE) {
+    is_nan = myexponent - bias == ::exponentNaN(S) && !all_zero_significand;
+  } else if constexpr (S.nanEncoding == fltNanEncoding::AllOnes) {
+    bool all_ones_significand =
+        std::all_of(mysignificand.begin(), mysignificand.end() - 1,
+                    [](integerPart bits) { return bits == ~integerPart{0}; }) &&
+        (!significand_mask ||
+         mysignificand[mysignificand.size() - 1] == significand_mask);
+    is_nan = myexponent - bias == ::exponentNaN(S) && all_ones_significand;
+  } else if constexpr (S.nanEncoding == fltNanEncoding::NegativeZero) {
+    is_nan = is_zero && sign;
+  }
 
-  sign = i >> 15;
-  if (myexponent == 0 && mysignificand == 0) {
-    makeZero(sign);
-  } else if (myexponent == 0xff && mysignificand == 0) {
-    makeInf(sign);
-  } else if (myexponent == 0xff && mysignificand != 0) {
+  if (is_nan) {
     category = fcNaN;
-    exponent = exponentNaN();
-    *significandParts() = mysignificand;
-  } else {
-    category = fcNormal;
-    exponent = myexponent - 127; // bias
-    *significandParts() = mysignificand;
-    if (myexponent == 0) // denormal
-      exponent = -126;
-    else
-      *significandParts() |= 0x80; // integer bit
+    exponent = ::exponentNaN(S);
+    std::copy_n(mysignificand.begin(), mysignificand.size(),
+                significandParts());
+    return;
+  }
+
+  if (is_zero) {
+    makeZero(sign);
+    return;
   }
+
+  category = fcNormal;
+  exponent = myexponent - bias;
+  std::copy_n(mysignificand.begin(), mysignificand.size(), significandParts());
+  if (myexponent == 0) // denormal
+    exponent = S.minExponent;
+  else
+    significandParts()[mysignificand.size()-1] |= integer_bit; // integer bit
 }
 
-void IEEEFloat::initFromHalfAPInt(const APInt &api) {
-  uint32_t i = (uint32_t)*api.getRawData();
-  uint32_t myexponent = (i >> 10) & 0x1f;
-  uint32_t mysignificand = i & 0x3ff;
+void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) {
+  initFromIEEEAPInt<semIEEEquad>(api);
+}
 
-  initialize(&semIEEEhalf);
-  assert(partCount()==1);
+void IEEEFloat::initFromDoubleAPInt(const APInt &api) {
+  initFromIEEEAPInt<semIEEEdouble>(api);
+}
 
-  sign = i >> 15;
-  if (myexponent==0 && mysignificand==0) {
-    makeZero(sign);
-  } else if (myexponent==0x1f && mysignificand==0) {
-    makeInf(sign);
-  } else if (myexponent==0x1f && mysignificand!=0) {
-    category = fcNaN;
-    exponent = exponentNaN();
-    *significandParts() = mysignificand;
-  } else {
-    category = fcNormal;
-    exponent = myexponent - 15;  //bias
-    *significandParts() = mysignificand;
-    if (myexponent==0)    // denormal
-      exponent = -14;
-    else
-      *significandParts() |= 0x400; // integer bit
-  }
+void IEEEFloat::initFromFloatAPInt(const APInt &api) {
+  initFromIEEEAPInt<semIEEEsingle>(api);
 }
 
-void IEEEFloat::initFromFloat8E5M2APInt(const APInt &api) {
-  uint32_t i = (uint32_t)*api.getRawData();
-  uint32_t myexponent = (i >> 2) & 0x1f;
-  uint32_t mysignificand = i & 0x3;
+void IEEEFloat::initFromBFloatAPInt(const APInt &api) {
+  initFromIEEEAPInt<semBFloat>(api);
+}
 
-  initialize(&semFloat8E5M2);
-  assert(partCount() == 1);
+void IEEEFloat::initFromHalfAPInt(const APInt &api) {
+  initFromIEEEAPInt<semIEEEhalf>(api);
+}
 
-  sign = i >> 7;
-  if (myexponent == 0 && mysignificand == 0) {
-    makeZero(sign);
-  } else if (myexponent == 0x1f && mysignificand == 0) {
-    makeInf(sign);
-  } else if (myexponent == 0x1f && mysignificand != 0) {
-    category = fcNaN;
-    exponent = exponentNaN();
-    *significandParts() = mysignificand;
-  } else {
-    category = fcNormal;
-    exponent = myexponent - 15; // bias
-    *significandParts() = mysignificand;
-    if (myexponent == 0) // denormal
-      exponent = -14;
-    else
-      *significandParts() |= 0x4; // integer bit
-  }
+void IEEEFloat::initFromFloat8E5M2APInt(const APInt &api) {
+  initFromIEEEAPInt<semFloat8E5M2>(api);
+}
+
+void IEEEFloat::initFromFloat8E5M2FNUZAPInt(const APInt &api) {
+  initFromIEEEAPInt<semFloat8E5M2FNUZ>(api);
 }
 
 void IEEEFloat::initFromFloat8E4M3FNAPInt(const APInt &api) {
-  uint32_t i = (uint32_t)*api.getRawData();
-  uint32_t myexponent = (i >> 3) & 0xf;
-  uint32_t mysignificand = i & 0x7;
+  initFromIEEEAPInt<semFloat8E4M3FN>(api);
+}
 
-  initialize(&semFloat8E4M3FN);
-  assert(partCount() == 1);
+void IEEEFloat::initFromFloat8E4M3FNUZAPInt(const APInt &api) {
+  initFromIEEEAPInt<semFloat8E4M3FNUZ>(api);
+}
 
-  sign = i >> 7;
-  if (myexponent == 0 && mysignificand == 0) {
-    makeZero(sign);
-  } else if (myexponent == 0xf && mysignificand == 7) {
-    category = fcNaN;
-    exponent = exponentNaN();
-    *significandParts() = mysignificand;
-  } else {
-    category = fcNormal;
-    exponent = myexponent - 7; // bias
-    *significandParts() = mysignificand;
-    if (myexponent == 0) // denormal
-      exponent = -6;
-    else
-      *significandParts() |= 0x8; // integer bit
-  }
+void IEEEFloat::initFromFloat8E4M3B11FNUZAPInt(const APInt &api) {
+  initFromIEEEAPInt<semFloat8E4M3B11FNUZ>(api);
+}
+
+void IEEEFloat::initFromFloatTF32APInt(const APInt &api) {
+  initFromIEEEAPInt<semFloatTF32>(api);
 }
 
 /// Treat api as containing the bits of a floating point number.
@@ -3863,8 +3877,16 @@ void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) {
     return initFromPPCDoubleDoubleAPInt(api);
   if (Sem == &semFloat8E5M2)
     return initFromFloat8E5M2APInt(api);
+  if (Sem == &semFloat8E5M2FNUZ)
+    return initFromFloat8E5M2FNUZAPInt(api);
   if (Sem == &semFloat8E4M3FN)
     return initFromFloat8E4M3FNAPInt(api);
+  if (Sem == &semFloat8E4M3FNUZ)
+    return initFromFloat8E4M3FNUZAPInt(api);
+  if (Sem == &semFloat8E4M3B11FNUZ)
+    return initFromFloat8E4M3B11FNUZAPInt(api);
+  if (Sem == &semFloatTF32)
+    return initFromFloatTF32APInt(api);
 
   llvm_unreachable(nullptr);
 }
@@ -3893,7 +3915,8 @@ void IEEEFloat::makeLargest(bool Negative) {
                                    ? (~integerPart(0) >> NumUnusedHighBits)
                                    : 0;
 
-  if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
+  if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
+      semantics->nanEncoding == fltNanEncoding::AllOnes)
     significand[0] &= ~integerPart(1);
 }
 
@@ -4074,7 +4097,7 @@ void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision,
   }
 
   // Ignore trailing binary zeros.
-  int trailingZeros = significand.countTrailingZeros();
+  int trailingZeros = significand.countr_zero();
   exp += trailingZeros;
   significand.lshrInPlace(trailingZeros);
 
@@ -4321,6 +4344,8 @@ IEEEFloat::opStatus IEEEFloat::next(bool nextDown) {
       APInt::tcSet(significandParts(), 0, partCount());
       category = fcZero;
       exponent = 0;
+      if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
+        sign = false;
       break;
     }
 
@@ -4407,17 +4432,15 @@ IEEEFloat::opStatus IEEEFloat::next(bool nextDown) {
 }
 
 APFloatBase::ExponentType IEEEFloat::exponentNaN() const {
-  if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
-    return semantics->maxExponent;
-  return semantics->maxExponent + 1;
+  return ::exponentNaN(*semantics);
 }
 
 APFloatBase::ExponentType IEEEFloat::exponentInf() const {
-  return semantics->maxExponent + 1;
+  return ::exponentInf(*semantics);
 }
 
 APFloatBase::ExponentType IEEEFloat::exponentZero() const {
-  return semantics->minExponent - 1;
+  return ::exponentZero(*semantics);
 }
 
 void IEEEFloat::makeInf(bool Negative) {
@@ -4435,6 +4458,10 @@ void IEEEFloat::makeInf(bool Negative) {
 void IEEEFloat::makeZero(bool Negative) {
   category = fcZero;
   sign = Negative;
+  if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
+    // Merge negative zero to positive because 0b10000...000 is used for NaN
+    sign = false;
+  }
   exponent = exponentZero();
   APInt::tcSet(significandParts(), 0, partCount());
 }
@@ -4477,7 +4504,7 @@ IEEEFloat scalbn(IEEEFloat X, int Exp, IEEEFloat::roundingMode RoundingMode) {
   int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1;
 
   // Clamp to one past the range ends to let normalize handle overlflow.
-  X.exponent += std::min(std::max(Exp, -MaxIncrement - 1), MaxIncrement);
+  X.exponent += std::clamp(Exp, -MaxIncrement - 1, MaxIncrement);
   X.normalize(RoundingMode, lfExactlyZero);
   if (X.isNaN())
     X.makeQuiet();
@@ -5114,6 +5141,19 @@ APFloat::APFloat(const fltSemantics &Semantics, StringRef S)
   consumeError(StatusOrErr.takeError());
 }
 
+FPClassTest APFloat::classify() const {
+  if (isZero())
+    return isNegative() ? fcNegZero : fcPosZero;
+  if (isNormal())
+    return isNegative() ? fcNegNormal : fcPosNormal;
+  if (isDenormal())
+    return isNegative() ? fcNegSubnormal : fcPosSubnormal;
+  if (isInfinity())
+    return isNegative() ? fcNegInf : fcPosInf;
+  assert(isNaN() && "Other class of FP constant");
+  return isSignaling() ? fcSNan : fcQNan;
+}
+
 APFloat::opStatus APFloat::convert(const fltSemantics &ToSemantics,
                                    roundingMode RM, bool *losesInfo) {
   if (&getSemantics() == &ToSemantics) {
diff --git a/llvm/lib/Support/APInt.cpp b/llvm/lib/Support/APInt.cpp
index afe7478a8b2a..05b1526da95f 100644
--- a/llvm/lib/Support/APInt.cpp
+++ b/llvm/lib/Support/APInt.cpp
@@ -19,6 +19,7 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/bit.h"
 #include "llvm/Config/llvm-config.h"
+#include "llvm/Support/Alignment.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
@@ -68,7 +69,7 @@ inline static unsigned getDigit(char cdigit, uint8_t radix) {
   if (r < radix)
     return r;
 
-  return -1U;
+  return UINT_MAX;
 }
 
 
@@ -164,6 +165,14 @@ void APInt::Profile(FoldingSetNodeID& ID) const {
     ID.AddInteger(U.pVal[i]);
 }
 
+bool APInt::isAligned(Align A) const {
+  if (isZero())
+    return true;
+  const unsigned TrailingZeroes = countr_zero();
+  const unsigned MinimumTrailingZeroes = Log2(A);
+  return TrailingZeroes >= MinimumTrailingZeroes;
+}
+
 /// Prefix increment operator. Increments the APInt by one.
 APInt& APInt::operator++() {
   if (isSingleWord())
@@ -479,7 +488,6 @@ APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const {
 
 uint64_t APInt::extractBitsAsZExtValue(unsigned numBits,
                                        unsigned bitPosition) const {
-  assert(numBits > 0 && "Can't extract zero bits");
   assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth &&
          "Illegal bit extraction");
   assert(numBits <= 64 && "Illegal bit extraction");
@@ -626,7 +634,7 @@ unsigned APInt::countLeadingZerosSlowCase() const {
     if (V == 0)
       Count += APINT_BITS_PER_WORD;
     else {
-      Count += llvm::countLeadingZeros(V);
+      Count += llvm::countl_zero(V);
       break;
     }
   }
@@ -646,13 +654,13 @@ unsigned APInt::countLeadingOnesSlowCase() const {
     shift = APINT_BITS_PER_WORD - highWordBits;
   }
   int i = getNumWords() - 1;
-  unsigned Count = llvm::countLeadingOnes(U.pVal[i] << shift);
+  unsigned Count = llvm::countl_one(U.pVal[i] << shift);
   if (Count == highWordBits) {
     for (i--; i >= 0; --i) {
       if (U.pVal[i] == WORDTYPE_MAX)
         Count += APINT_BITS_PER_WORD;
       else {
-        Count += llvm::countLeadingOnes(U.pVal[i]);
+        Count += llvm::countl_one(U.pVal[i]);
         break;
       }
     }
@@ -666,7 +674,7 @@ unsigned APInt::countTrailingZerosSlowCase() const {
   for (; i < getNumWords() && U.pVal[i] == 0; ++i)
     Count += APINT_BITS_PER_WORD;
   if (i < getNumWords())
-    Count += llvm::countTrailingZeros(U.pVal[i]);
+    Count += llvm::countr_zero(U.pVal[i]);
   return std::min(Count, BitWidth);
 }
 
@@ -676,7 +684,7 @@ unsigned APInt::countTrailingOnesSlowCase() const {
   for (; i < getNumWords() && U.pVal[i] == WORDTYPE_MAX; ++i)
     Count += APINT_BITS_PER_WORD;
   if (i < getNumWords())
-    Count += llvm::countTrailingOnes(U.pVal[i]);
+    Count += llvm::countr_one(U.pVal[i]);
   assert(Count <= BitWidth);
   return Count;
 }
@@ -707,18 +715,18 @@ bool APInt::isSubsetOfSlowCase(const APInt &RHS) const {
 APInt APInt::byteSwap() const {
   assert(BitWidth >= 16 && BitWidth % 8 == 0 && "Cannot byteswap!");
   if (BitWidth == 16)
-    return APInt(BitWidth, ByteSwap_16(uint16_t(U.VAL)));
+    return APInt(BitWidth, llvm::byteswap<uint16_t>(U.VAL));
   if (BitWidth == 32)
-    return APInt(BitWidth, ByteSwap_32(unsigned(U.VAL)));
+    return APInt(BitWidth, llvm::byteswap<uint32_t>(U.VAL));
   if (BitWidth <= 64) {
-    uint64_t Tmp1 = ByteSwap_64(U.VAL);
+    uint64_t Tmp1 = llvm::byteswap<uint64_t>(U.VAL);
     Tmp1 >>= (64 - BitWidth);
     return APInt(BitWidth, Tmp1);
   }
 
   APInt Result(getNumWords() * APINT_BITS_PER_WORD, 0);
   for (unsigned I = 0, N = getNumWords(); I != N; ++I)
-    Result.U.pVal[I] = ByteSwap_64(U.pVal[N - I - 1]);
+    Result.U.pVal[I] = llvm::byteswap<uint64_t>(U.pVal[N - I - 1]);
   if (Result.BitWidth != BitWidth) {
     Result.lshrInPlace(Result.BitWidth - BitWidth);
     Result.BitWidth = BitWidth;
@@ -767,8 +775,8 @@ APInt llvm::APIntOps::GreatestCommonDivisor(APInt A, APInt B) {
   // Count common powers of 2 and remove all other powers of 2.
   unsigned Pow2;
   {
-    unsigned Pow2_A = A.countTrailingZeros();
-    unsigned Pow2_B = B.countTrailingZeros();
+    unsigned Pow2_A = A.countr_zero();
+    unsigned Pow2_B = B.countr_zero();
     if (Pow2_A > Pow2_B) {
       A.lshrInPlace(Pow2_A - Pow2_B);
       Pow2 = Pow2_B;
@@ -789,10 +797,10 @@ APInt llvm::APIntOps::GreatestCommonDivisor(APInt A, APInt B) {
   while (A != B) {
     if (A.ugt(B)) {
       A -= B;
-      A.lshrInPlace(A.countTrailingZeros() - Pow2);
+      A.lshrInPlace(A.countr_zero() - Pow2);
     } else {
       B -= A;
-      B.lshrInPlace(B.countTrailingZeros() - Pow2);
+      B.lshrInPlace(B.countr_zero() - Pow2);
     }
   }
 
@@ -1318,7 +1326,7 @@ static void KnuthDiv(uint32_t *u, uint32_t *v, uint32_t *q, uint32_t* r,
   // and v so that its high bits are shifted to the top of v's range without
   // overflow. Note that this can require an extra word in u so that u must
   // be of length m+n+1.
-  unsigned shift = countLeadingZeros(v[n-1]);
+  unsigned shift = llvm::countl_zero(v[n - 1]);
   uint32_t v_carry = 0;
   uint32_t u_carry = 0;
   if (shift) {
@@ -1967,7 +1975,7 @@ APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const {
 }
 
 APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const {
-  if (countLeadingZeros() + RHS.countLeadingZeros() + 2 <= BitWidth) {
+  if (countl_zero() + RHS.countl_zero() + 2 <= BitWidth) {
     Overflow = true;
     return *this * RHS;
   }
@@ -1984,24 +1992,32 @@ APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const {
 }
 
 APInt APInt::sshl_ov(const APInt &ShAmt, bool &Overflow) const {
-  Overflow = ShAmt.uge(getBitWidth());
+  return sshl_ov(ShAmt.getLimitedValue(getBitWidth()), Overflow);
+}
+
+APInt APInt::sshl_ov(unsigned ShAmt, bool &Overflow) const {
+  Overflow = ShAmt >= getBitWidth();
   if (Overflow)
     return APInt(BitWidth, 0);
 
   if (isNonNegative()) // Don't allow sign change.
-    Overflow = ShAmt.uge(countLeadingZeros());
+    Overflow = ShAmt >= countl_zero();
   else
-    Overflow = ShAmt.uge(countLeadingOnes());
+    Overflow = ShAmt >= countl_one();
 
   return *this << ShAmt;
 }
 
 APInt APInt::ushl_ov(const APInt &ShAmt, bool &Overflow) const {
-  Overflow = ShAmt.uge(getBitWidth());
+  return ushl_ov(ShAmt.getLimitedValue(getBitWidth()), Overflow);
+}
+
+APInt APInt::ushl_ov(unsigned ShAmt, bool &Overflow) const {
+  Overflow = ShAmt >= getBitWidth();
   if (Overflow)
     return APInt(BitWidth, 0);
 
-  Overflow = ShAmt.ugt(countLeadingZeros());
+  Overflow = ShAmt > countl_zero();
 
   return *this << ShAmt;
 }
@@ -2067,6 +2083,10 @@ APInt APInt::umul_sat(const APInt &RHS) const {
 }
 
 APInt APInt::sshl_sat(const APInt &RHS) const {
+  return sshl_sat(RHS.getLimitedValue(getBitWidth()));
+}
+
+APInt APInt::sshl_sat(unsigned RHS) const {
   bool Overflow;
   APInt Res = sshl_ov(RHS, Overflow);
   if (!Overflow)
@@ -2077,6 +2097,10 @@ APInt APInt::sshl_sat(const APInt &RHS) const {
 }
 
 APInt APInt::ushl_sat(const APInt &RHS) const {
+  return ushl_sat(RHS.getLimitedValue(getBitWidth()));
+}
+
+APInt APInt::ushl_sat(unsigned RHS) const {
   bool Overflow;
   APInt Res = ushl_ov(RHS, Overflow);
   if (!Overflow)
@@ -2136,8 +2160,8 @@ void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
     this->negate();
 }
 
-void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
-                     bool Signed, bool formatAsCLiteral) const {
+void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix, bool Signed,
+                     bool formatAsCLiteral, bool UpperCase) const {
   assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2 ||
           Radix == 36) &&
          "Radix should be 2, 8, 10, 16, or 36!");
@@ -2173,7 +2197,9 @@ void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
     return;
   }
 
-  static const char Digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+  static const char BothDigits[] = "0123456789abcdefghijklmnopqrstuvwxyz"
+                                   "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+  const char *Digits = BothDigits + (UpperCase ? 36 : 0);
 
   if (isSingleWord()) {
     char Buffer[65];
@@ -2290,14 +2316,6 @@ static inline APInt::WordType highHalf(APInt::WordType part) {
   return part >> (APInt::APINT_BITS_PER_WORD / 2);
 }
 
-/// Returns the bit number of the most significant set bit of a part.
-/// If the input number has no bits set -1U is returned.
-static unsigned partMSB(APInt::WordType value) { return findLastSet(value); }
-
-/// Returns the bit number of the least significant set bit of a part.  If the
-/// input number has no bits set -1U is returned.
-static unsigned partLSB(APInt::WordType value) { return findFirstSet(value); }
-
 /// Sets the least significant part of a bignum to the input value, and zeroes
 /// out higher parts.
 void APInt::tcSet(WordType *dst, WordType part, unsigned parts) {
@@ -2338,32 +2356,33 @@ void APInt::tcClearBit(WordType *parts, unsigned bit) {
 }
 
 /// Returns the bit number of the least significant set bit of a number.  If the
-/// input number has no bits set -1U is returned.
+/// input number has no bits set UINT_MAX is returned.
 unsigned APInt::tcLSB(const WordType *parts, unsigned n) {
   for (unsigned i = 0; i < n; i++) {
     if (parts[i] != 0) {
-      unsigned lsb = partLSB(parts[i]);
+      unsigned lsb = llvm::countr_zero(parts[i]);
       return lsb + i * APINT_BITS_PER_WORD;
     }
   }
 
-  return -1U;
+  return UINT_MAX;
 }
 
 /// Returns the bit number of the most significant set bit of a number.
-/// If the input number has no bits set -1U is returned.
+/// If the input number has no bits set UINT_MAX is returned.
 unsigned APInt::tcMSB(const WordType *parts, unsigned n) {
   do {
     --n;
 
     if (parts[n] != 0) {
-      unsigned msb = partMSB(parts[n]);
+      static_assert(sizeof(parts[n]) <= sizeof(uint64_t));
+      unsigned msb = llvm::Log2_64(parts[n]);
 
       return msb + n * APINT_BITS_PER_WORD;
     }
   } while (n);
 
-  return -1U;
+  return UINT_MAX;
 }
 
 /// Copy the bit vector of width srcBITS from SRC, starting at bit srcLSB, to
@@ -2961,7 +2980,7 @@ llvm::APIntOps::GetMostSignificantDifferentBit(const APInt &A, const APInt &B) {
   assert(A.getBitWidth() == B.getBitWidth() && "Must have the same bitwidth");
   if (A == B)
     return std::nullopt;
-  return A.getBitWidth() - ((A ^ B).countLeadingZeros() + 1);
+  return A.getBitWidth() - ((A ^ B).countl_zero() + 1);
 }
 
 APInt llvm::APIntOps::ScaleBitMask(const APInt &A, unsigned NewBitWidth,
diff --git a/llvm/lib/Support/APSInt.cpp b/llvm/lib/Support/APSInt.cpp
index b65b6824eaf8..5a9f44f304a2 100644
--- a/llvm/lib/Support/APSInt.cpp
+++ b/llvm/lib/Support/APSInt.cpp
@@ -25,7 +25,7 @@ APSInt::APSInt(StringRef Str) {
   unsigned NumBits = ((Str.size() * 64) / 19) + 2;
   APInt Tmp(NumBits, Str, /*radix=*/10);
   if (Str[0] == '-') {
-    unsigned MinBits = Tmp.getMinSignedBits();
+    unsigned MinBits = Tmp.getSignificantBits();
     if (MinBits < NumBits)
       Tmp = Tmp.trunc(std::max<unsigned>(1, MinBits));
     *this = APSInt(Tmp, /*isUnsigned=*/false);
diff --git a/llvm/lib/Support/AddressRanges.cpp b/llvm/lib/Support/AddressRanges.cpp
deleted file mode 100644
index 187d5be00dae..000000000000
--- a/llvm/lib/Support/AddressRanges.cpp
+++ /dev/null
@@ -1,70 +0,0 @@
-//===- AddressRanges.cpp ----------------------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/ADT/AddressRanges.h"
-#include "llvm/ADT/STLExtras.h"
-#include <inttypes.h>
-
-using namespace llvm;
-
-AddressRanges::Collection::const_iterator
-AddressRanges::insert(AddressRange Range) {
-  if (Range.size() == 0)
-    return Ranges.end();
-
-  auto It = llvm::upper_bound(Ranges, Range);
-  auto It2 = It;
-  while (It2 != Ranges.end() && It2->start() <= Range.end())
-    ++It2;
-  if (It != It2) {
-    Range = {Range.start(), std::max(Range.end(), std::prev(It2)->end())};
-    It = Ranges.erase(It, It2);
-  }
-  if (It != Ranges.begin() && Range.start() <= std::prev(It)->end()) {
-    --It;
-    *It = {It->start(), std::max(It->end(), Range.end())};
-    return It;
-  }
-
-  return Ranges.insert(It, Range);
-}
-
-AddressRanges::Collection::const_iterator
-AddressRanges::find(uint64_t Addr) const {
-  auto It = std::partition_point(
-      Ranges.begin(), Ranges.end(),
-      [=](const AddressRange &R) { return R.start() <= Addr; });
-
-  if (It == Ranges.begin())
-    return Ranges.end();
-
-  --It;
-  if (Addr >= It->end())
-    return Ranges.end();
-
-  return It;
-}
-
-AddressRanges::Collection::const_iterator
-AddressRanges::find(AddressRange Range) const {
-  if (Range.size() == 0)
-    return Ranges.end();
-
-  auto It = std::partition_point(
-      Ranges.begin(), Ranges.end(),
-      [=](const AddressRange &R) { return R.start() <= Range.start(); });
-
-  if (It == Ranges.begin())
-    return Ranges.end();
-
-  --It;
-  if (Range.end() > It->end())
-    return Ranges.end();
-
-  return It;
-}
diff --git a/llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_unix.S b/llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_unix.S
index 449e07492832..69fc0936d73c 100644
--- a/llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_unix.S
+++ b/llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_unix.S
@@ -1,5 +1,7 @@
 #if defined(__x86_64__)
 
+#include "llvm_blake3_prefix.h"
+
 #if defined(__ELF__) && defined(__linux__)
 .section .note.GNU-stack,"",%progbits
 #endif
diff --git a/llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_windows_gnu.S b/llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_windows_gnu.S
index bb58d2ae64b1..5ad1c641a7fc 100644
--- a/llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_windows_gnu.S
+++ b/llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_windows_gnu.S
@@ -1,3 +1,5 @@
+#include "llvm_blake3_prefix.h"
+
 .intel_syntax noprefix
 .global _blake3_hash_many_avx2
 .global blake3_hash_many_avx2
diff --git a/llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_windows_msvc.asm b/llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_windows_msvc.asm
index 352298edd2e8..46bad1d98f38 100644
--- a/llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_windows_msvc.asm
+++ b/llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_windows_msvc.asm
@@ -1,11 +1,11 @@
-public _blake3_hash_many_avx2
-public blake3_hash_many_avx2
+public _llvm_blake3_hash_many_avx2
+public llvm_blake3_hash_many_avx2
 
 _TEXT   SEGMENT ALIGN(16) 'CODE'
 
 ALIGN   16
-blake3_hash_many_avx2 PROC
-_blake3_hash_many_avx2 PROC
+llvm_blake3_hash_many_avx2 PROC
+_llvm_blake3_hash_many_avx2 PROC
         push    r15
         push    r14
         push    r13
@@ -1785,8 +1785,8 @@ endroundloop1:
         vmovdqu xmmword ptr [rbx+10H], xmm1
         jmp     unwind
 
-_blake3_hash_many_avx2 ENDP
-blake3_hash_many_avx2 ENDP
+_llvm_blake3_hash_many_avx2 ENDP
+llvm_blake3_hash_many_avx2 ENDP
 _TEXT ENDS
 
 _RDATA SEGMENT READONLY PAGE ALIAS(".rdata") 'CONST'
diff --git a/llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_unix.S b/llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_unix.S
index 3afc0e2250e2..f04a135dd1bc 100644
--- a/llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_unix.S
+++ b/llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_unix.S
@@ -1,5 +1,7 @@
 #if defined(__x86_64__)
 
+#include "llvm_blake3_prefix.h"
+
 #if defined(__ELF__) && defined(__linux__)
 .section .note.GNU-stack,"",%progbits
 #endif
diff --git a/llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_windows_gnu.S b/llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_windows_gnu.S
index e10b9f36cbcc..53c586141fbe 100644
--- a/llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_windows_gnu.S
+++ b/llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_windows_gnu.S
@@ -1,3 +1,5 @@
+#include "llvm_blake3_prefix.h"
+
 .intel_syntax noprefix
 
 .global _blake3_hash_many_avx512
diff --git a/llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_windows_msvc.asm b/llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_windows_msvc.asm
index b19efbaaeb36..f13d1b260ab8 100644
--- a/llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_windows_msvc.asm
+++ b/llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_windows_msvc.asm
@@ -1,15 +1,15 @@
-public _blake3_hash_many_avx512
-public blake3_hash_many_avx512
-public blake3_compress_in_place_avx512
-public _blake3_compress_in_place_avx512
-public blake3_compress_xof_avx512
-public _blake3_compress_xof_avx512
+public _llvm_blake3_hash_many_avx512
+public llvm_blake3_hash_many_avx512
+public llvm_blake3_compress_in_place_avx512
+public _llvm_blake3_compress_in_place_avx512
+public llvm_blake3_compress_xof_avx512
+public _llvm_blake3_compress_xof_avx512
 
 _TEXT   SEGMENT ALIGN(16) 'CODE'
 
 ALIGN   16
-blake3_hash_many_avx512 PROC
-_blake3_hash_many_avx512 PROC
+llvm_blake3_hash_many_avx512 PROC
+_llvm_blake3_hash_many_avx512 PROC
         push    r15
         push    r14
         push    r13
@@ -2404,12 +2404,12 @@ endroundloop1:
         vmovdqu xmmword ptr [rbx+10H], xmm1
         jmp     unwind
 
-_blake3_hash_many_avx512 ENDP
-blake3_hash_many_avx512 ENDP
+_llvm_blake3_hash_many_avx512 ENDP
+llvm_blake3_hash_many_avx512 ENDP
 
 ALIGN 16
-blake3_compress_in_place_avx512 PROC
-_blake3_compress_in_place_avx512 PROC
+llvm_blake3_compress_in_place_avx512 PROC
+_llvm_blake3_compress_in_place_avx512 PROC
         sub     rsp, 72
         vmovdqa xmmword ptr [rsp], xmm6
         vmovdqa xmmword ptr [rsp+10H], xmm7
@@ -2498,12 +2498,12 @@ _blake3_compress_in_place_avx512 PROC
         vmovdqa xmm9, xmmword ptr [rsp+30H]
         add     rsp, 72
         ret
-_blake3_compress_in_place_avx512 ENDP
-blake3_compress_in_place_avx512 ENDP
+_llvm_blake3_compress_in_place_avx512 ENDP
+llvm_blake3_compress_in_place_avx512 ENDP
 
 ALIGN 16
-blake3_compress_xof_avx512 PROC
-_blake3_compress_xof_avx512 PROC
+llvm_blake3_compress_xof_avx512 PROC
+_llvm_blake3_compress_xof_avx512 PROC
         sub     rsp, 72
         vmovdqa xmmword ptr [rsp], xmm6
         vmovdqa xmmword ptr [rsp+10H], xmm7
@@ -2597,8 +2597,8 @@ _blake3_compress_xof_avx512 PROC
         vmovdqa xmm9, xmmword ptr [rsp+30H]
         add     rsp, 72
         ret
-_blake3_compress_xof_avx512 ENDP
-blake3_compress_xof_avx512 ENDP
+_llvm_blake3_compress_xof_avx512 ENDP
+llvm_blake3_compress_xof_avx512 ENDP
 
 _TEXT ENDS
 
diff --git a/llvm/lib/Support/BLAKE3/blake3_impl.h b/llvm/lib/Support/BLAKE3/blake3_impl.h
index 180d0a6eeda8..8e5456d745cd 100644
--- a/llvm/lib/Support/BLAKE3/blake3_impl.h
+++ b/llvm/lib/Support/BLAKE3/blake3_impl.h
@@ -11,15 +11,7 @@
 // For \p LLVM_LIBRARY_VISIBILITY
 #include "llvm/Support/Compiler.h"
 
-// Remove the 'llvm_' prefix for the rest of the internal implementation.
-#define BLAKE3_VERSION_STRING LLVM_BLAKE3_VERSION_STRING
-#define BLAKE3_KEY_LEN LLVM_BLAKE3_KEY_LEN
-#define BLAKE3_OUT_LEN LLVM_BLAKE3_OUT_LEN
-#define BLAKE3_BLOCK_LEN LLVM_BLAKE3_BLOCK_LEN
-#define BLAKE3_CHUNK_LEN LLVM_BLAKE3_CHUNK_LEN
-#define BLAKE3_MAX_DEPTH LLVM_BLAKE3_MAX_DEPTH
-#define blake3_hasher llvm_blake3_hasher
-#define blake3_chunk_state llvm_blake3_chunk_state
+#include "llvm_blake3_prefix.h"
 
 // internal flags
 enum blake3_flags {
diff --git a/llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_unix.S b/llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_unix.S
index 0106b13ba851..9a4f5eb7318b 100644
--- a/llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_unix.S
+++ b/llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_unix.S
@@ -1,5 +1,7 @@
 #if defined(__x86_64__)
 
+#include "llvm_blake3_prefix.h"
+
 #if defined(__ELF__) && defined(__linux__)
 .section .note.GNU-stack,"",%progbits
 #endif
diff --git a/llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_windows_gnu.S b/llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_windows_gnu.S
index 8852ba5976e1..bf3b4523a9f1 100644
--- a/llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_windows_gnu.S
+++ b/llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_windows_gnu.S
@@ -1,3 +1,5 @@
+#include "llvm_blake3_prefix.h"
+
 .intel_syntax noprefix
 .global blake3_hash_many_sse2
 .global _blake3_hash_many_sse2
diff --git a/llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_windows_msvc.asm b/llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_windows_msvc.asm
index 507502f11a80..1069c8df4ed6 100644
--- a/llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_windows_msvc.asm
+++ b/llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_windows_msvc.asm
@@ -1,15 +1,15 @@
-public _blake3_hash_many_sse2
-public blake3_hash_many_sse2
-public blake3_compress_in_place_sse2
-public _blake3_compress_in_place_sse2
-public blake3_compress_xof_sse2
-public _blake3_compress_xof_sse2
+public _llvm_blake3_hash_many_sse2
+public llvm_blake3_hash_many_sse2
+public llvm_blake3_compress_in_place_sse2
+public _llvm_blake3_compress_in_place_sse2
+public llvm_blake3_compress_xof_sse2
+public _llvm_blake3_compress_xof_sse2
 
 _TEXT   SEGMENT ALIGN(16) 'CODE'
 
 ALIGN   16
-blake3_hash_many_sse2 PROC
-_blake3_hash_many_sse2 PROC
+llvm_blake3_hash_many_sse2 PROC
+_llvm_blake3_hash_many_sse2 PROC
         push    r15
         push    r14
         push    r13
@@ -2034,11 +2034,11 @@ endroundloop1:
         movups  xmmword ptr [rbx], xmm0
         movups  xmmword ptr [rbx+10H], xmm1
         jmp     unwind
-_blake3_hash_many_sse2 ENDP
-blake3_hash_many_sse2 ENDP
+_llvm_blake3_hash_many_sse2 ENDP
+llvm_blake3_hash_many_sse2 ENDP
 
-blake3_compress_in_place_sse2 PROC
-_blake3_compress_in_place_sse2 PROC
+llvm_blake3_compress_in_place_sse2 PROC
+_llvm_blake3_compress_in_place_sse2 PROC
         sub     rsp, 120
         movdqa  xmmword ptr [rsp], xmm6
         movdqa  xmmword ptr [rsp+10H], xmm7
@@ -2164,12 +2164,12 @@ _blake3_compress_in_place_sse2 PROC
         movdqa  xmm15, xmmword ptr [rsp+60H]
         add     rsp, 120
         ret
-_blake3_compress_in_place_sse2 ENDP
-blake3_compress_in_place_sse2 ENDP
+_llvm_blake3_compress_in_place_sse2 ENDP
+llvm_blake3_compress_in_place_sse2 ENDP
 
 ALIGN 16
-blake3_compress_xof_sse2 PROC
-_blake3_compress_xof_sse2 PROC
+llvm_blake3_compress_xof_sse2 PROC
+_llvm_blake3_compress_xof_sse2 PROC
         sub     rsp, 120
         movdqa  xmmword ptr [rsp], xmm6
         movdqa  xmmword ptr [rsp+10H], xmm7
@@ -2302,8 +2302,8 @@ _blake3_compress_xof_sse2 PROC
         movdqa  xmm15, xmmword ptr [rsp+60H]
         add     rsp, 120
         ret
-_blake3_compress_xof_sse2 ENDP
-blake3_compress_xof_sse2 ENDP
+_llvm_blake3_compress_xof_sse2 ENDP
+llvm_blake3_compress_xof_sse2 ENDP
 
 _TEXT ENDS
 
diff --git a/llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_unix.S b/llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_unix.S
index 4e918c5bb2cc..1be4ed744426 100644
--- a/llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_unix.S
+++ b/llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_unix.S
@@ -1,5 +1,7 @@
 #if defined(__x86_64__)
 
+#include "llvm_blake3_prefix.h"
+
 #if defined(__ELF__) && defined(__linux__)
 .section .note.GNU-stack,"",%progbits
 #endif
diff --git a/llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_windows_gnu.S b/llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_windows_gnu.S
index 60d0a4042e71..28bdf3890a29 100644
--- a/llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_windows_gnu.S
+++ b/llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_windows_gnu.S
@@ -1,3 +1,5 @@
+#include "llvm_blake3_prefix.h"
+
 .intel_syntax noprefix
 .global blake3_hash_many_sse41
 .global _blake3_hash_many_sse41
diff --git a/llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_windows_msvc.asm b/llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_windows_msvc.asm
index 8966c7b84406..770935372cd9 100644
--- a/llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_windows_msvc.asm
+++ b/llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_windows_msvc.asm
@@ -1,15 +1,15 @@
-public _blake3_hash_many_sse41
-public blake3_hash_many_sse41
-public blake3_compress_in_place_sse41
-public _blake3_compress_in_place_sse41
-public blake3_compress_xof_sse41
-public _blake3_compress_xof_sse41
+public _llvm_blake3_hash_many_sse41
+public llvm_blake3_hash_many_sse41
+public llvm_blake3_compress_in_place_sse41
+public _llvm_blake3_compress_in_place_sse41
+public llvm_blake3_compress_xof_sse41
+public _llvm_blake3_compress_xof_sse41
 
 _TEXT   SEGMENT ALIGN(16) 'CODE'
 
 ALIGN   16
-blake3_hash_many_sse41 PROC
-_blake3_hash_many_sse41 PROC
+llvm_blake3_hash_many_sse41 PROC
+_llvm_blake3_hash_many_sse41 PROC
         push    r15
         push    r14
         push    r13
@@ -1797,11 +1797,11 @@ endroundloop1:
         movups  xmmword ptr [rbx], xmm0
         movups  xmmword ptr [rbx+10H], xmm1
         jmp     unwind
-_blake3_hash_many_sse41 ENDP
-blake3_hash_many_sse41 ENDP
+_llvm_blake3_hash_many_sse41 ENDP
+llvm_blake3_hash_many_sse41 ENDP
 
-blake3_compress_in_place_sse41 PROC
-_blake3_compress_in_place_sse41 PROC
+llvm_blake3_compress_in_place_sse41 PROC
+_llvm_blake3_compress_in_place_sse41 PROC
         sub     rsp, 120
         movdqa  xmmword ptr [rsp], xmm6
         movdqa  xmmword ptr [rsp+10H], xmm7
@@ -1916,12 +1916,12 @@ _blake3_compress_in_place_sse41 PROC
         movdqa  xmm15, xmmword ptr [rsp+60H]
         add     rsp, 120
         ret
-_blake3_compress_in_place_sse41 ENDP
-blake3_compress_in_place_sse41 ENDP
+_llvm_blake3_compress_in_place_sse41 ENDP
+llvm_blake3_compress_in_place_sse41 ENDP
 
 ALIGN 16
-blake3_compress_xof_sse41 PROC
-_blake3_compress_xof_sse41 PROC
+llvm_blake3_compress_xof_sse41 PROC
+_llvm_blake3_compress_xof_sse41 PROC
         sub     rsp, 120
         movdqa  xmmword ptr [rsp], xmm6
         movdqa  xmmword ptr [rsp+10H], xmm7
@@ -2043,8 +2043,8 @@ _blake3_compress_xof_sse41 PROC
         movdqa  xmm15, xmmword ptr [rsp+60H]
         add     rsp, 120
         ret
-_blake3_compress_xof_sse41 ENDP
-blake3_compress_xof_sse41 ENDP
+_llvm_blake3_compress_xof_sse41 ENDP
+llvm_blake3_compress_xof_sse41 ENDP
 
 _TEXT ENDS
 
diff --git a/llvm/lib/Support/BLAKE3/llvm_blake3_prefix.h b/llvm/lib/Support/BLAKE3/llvm_blake3_prefix.h
new file mode 100644
index 000000000000..3cee3691e4cf
--- /dev/null
+++ b/llvm/lib/Support/BLAKE3/llvm_blake3_prefix.h
@@ -0,0 +1,41 @@
+#ifndef LLVM_BLAKE3_PREFIX_H
+#define LLVM_BLAKE3_PREFIX_H
+
+#define BLAKE3_VERSION_STRING LLVM_BLAKE3_VERSION_STRING
+#define BLAKE3_KEY_LEN LLVM_BLAKE3_KEY_LEN
+#define BLAKE3_OUT_LEN LLVM_BLAKE3_OUT_LEN
+#define BLAKE3_BLOCK_LEN LLVM_BLAKE3_BLOCK_LEN
+#define BLAKE3_CHUNK_LEN LLVM_BLAKE3_CHUNK_LEN
+#define BLAKE3_MAX_DEPTH LLVM_BLAKE3_MAX_DEPTH
+#define blake3_hasher llvm_blake3_hasher
+#define blake3_chunk_state llvm_blake3_chunk_state
+#define blake3_compress_in_place llvm_blake3_compress_in_place
+#define blake3_compress_xof llvm_blake3_compress_xof
+#define blake3_hash_many llvm_blake3_hash_many
+#define blake3_simd_degree llvm_blake3_simd_degree
+#define blake3_compress_in_place_portable llvm_blake3_compress_in_place_portable
+#define blake3_compress_xof_portable llvm_blake3_compress_xof_portable
+#define blake3_hash_many_portable llvm_blake3_hash_many_portable
+#define blake3_compress_in_place_sse2 llvm_blake3_compress_in_place_sse2
+#define _blake3_compress_in_place_sse2 _llvm_blake3_compress_in_place_sse2
+#define blake3_compress_xof_sse2 llvm_blake3_compress_xof_sse2
+#define _blake3_compress_xof_sse2 _llvm_blake3_compress_xof_sse2
+#define blake3_hash_many_sse2 llvm_blake3_hash_many_sse2
+#define _blake3_hash_many_sse2 _llvm_blake3_hash_many_sse2
+#define blake3_compress_in_place_sse41 llvm_blake3_compress_in_place_sse41
+#define _blake3_compress_in_place_sse41 _llvm_blake3_compress_in_place_sse41
+#define blake3_compress_xof_sse41 llvm_blake3_compress_xof_sse41
+#define _blake3_compress_xof_sse41 _llvm_blake3_compress_xof_sse41
+#define blake3_hash_many_sse41 llvm_blake3_hash_many_sse41
+#define _blake3_hash_many_sse41 _llvm_blake3_hash_many_sse41
+#define blake3_hash_many_avx2 llvm_blake3_hash_many_avx2
+#define _blake3_hash_many_avx2 _llvm_blake3_hash_many_avx2
+#define blake3_compress_in_place_avx512 llvm_blake3_compress_in_place_avx512
+#define _blake3_compress_in_place_avx512 _llvm_blake3_compress_in_place_avx512
+#define blake3_compress_xof_avx512 llvm_blake3_compress_xof_avx512
+#define _blake3_compress_xof_avx512 _llvm_blake3_compress_xof_avx512
+#define blake3_hash_many_avx512 llvm_blake3_hash_many_avx512
+#define _blake3_hash_many_avx512 _llvm_blake3_hash_many_avx512
+#define blake3_hash_many_neon llvm_blake3_hash_many_neon
+
+#endif /* LLVM_BLAKE3_PREFIX_H */
diff --git a/llvm/lib/Support/BalancedPartitioning.cpp b/llvm/lib/Support/BalancedPartitioning.cpp
new file mode 100644
index 000000000000..113e9484f528
--- /dev/null
+++ b/llvm/lib/Support/BalancedPartitioning.cpp
@@ -0,0 +1,337 @@
+//===- BalancedPartitioning.cpp -------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements BalancedPartitioning, a recursive balanced graph
+// partitioning algorithm.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/BalancedPartitioning.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/ThreadPool.h"
+
+using namespace llvm;
+#define DEBUG_TYPE "balanced-partitioning"
+
+void BPFunctionNode::dump(raw_ostream &OS) const {
+  OS << formatv("{{ID={0} Utilities={{{1:$[,]}} Bucket={2}}", Id,
+                make_range(UtilityNodes.begin(), UtilityNodes.end()), Bucket);
+}
+
+template <typename Func>
+void BalancedPartitioning::BPThreadPool::async(Func &&F) {
+#if LLVM_ENABLE_THREADS
+  // This new thread could spawn more threads, so mark it as active
+  ++NumActiveThreads;
+  TheThreadPool.async([=]() {
+    // Run the task
+    F();
+
+    // This thread will no longer spawn new threads, so mark it as inactive
+    if (--NumActiveThreads == 0) {
+      // There are no more active threads, so mark as finished and notify
+      {
+        std::unique_lock<std::mutex> lock(mtx);
+        assert(!IsFinishedSpawning);
+        IsFinishedSpawning = true;
+      }
+      cv.notify_one();
+    }
+  });
+#else
+  llvm_unreachable("threads are disabled");
+#endif
+}
+
+void BalancedPartitioning::BPThreadPool::wait() {
+#if LLVM_ENABLE_THREADS
+  // TODO: We could remove the mutex and condition variable and use
+  // std::atomic::wait() instead, but that isn't available until C++20
+  {
+    std::unique_lock<std::mutex> lock(mtx);
+    cv.wait(lock, [&]() { return IsFinishedSpawning; });
+    assert(IsFinishedSpawning && NumActiveThreads == 0);
+  }
+  // Now we can call ThreadPool::wait() since all tasks have been submitted
+  TheThreadPool.wait();
+#else
+  llvm_unreachable("threads are disabled");
+#endif
+}
+
+BalancedPartitioning::BalancedPartitioning(
+    const BalancedPartitioningConfig &Config)
+    : Config(Config) {
+  // Pre-computing log2 values
+  Log2Cache[0] = 0.0;
+  for (unsigned I = 1; I < LOG_CACHE_SIZE; I++)
+    Log2Cache[I] = std::log2(I);
+}
+
+void BalancedPartitioning::run(std::vector<BPFunctionNode> &Nodes) const {
+  LLVM_DEBUG(
+      dbgs() << format(
+          "Partitioning %d nodes using depth %d and %d iterations per split\n",
+          Nodes.size(), Config.SplitDepth, Config.IterationsPerSplit));
+  std::optional<BPThreadPool> TP;
+#if LLVM_ENABLE_THREADS
+  ThreadPool TheThreadPool;
+  if (Config.TaskSplitDepth > 1)
+    TP.emplace(TheThreadPool);
+#endif
+
+  // Record the input order
+  for (unsigned I = 0; I < Nodes.size(); I++)
+    Nodes[I].InputOrderIndex = I;
+
+  auto NodesRange = llvm::make_range(Nodes.begin(), Nodes.end());
+  auto BisectTask = [=, &TP]() {
+    bisect(NodesRange, /*RecDepth=*/0, /*RootBucket=*/1, /*Offset=*/0, TP);
+  };
+  if (TP) {
+    TP->async(std::move(BisectTask));
+    TP->wait();
+  } else {
+    BisectTask();
+  }
+
+  llvm::stable_sort(NodesRange, [](const auto &L, const auto &R) {
+    return L.Bucket < R.Bucket;
+  });
+
+  LLVM_DEBUG(dbgs() << "Balanced partitioning completed\n");
+}
+
+void BalancedPartitioning::bisect(const FunctionNodeRange Nodes,
+                                  unsigned RecDepth, unsigned RootBucket,
+                                  unsigned Offset,
+                                  std::optional<BPThreadPool> &TP) const {
+  unsigned NumNodes = std::distance(Nodes.begin(), Nodes.end());
+  if (NumNodes <= 1 || RecDepth >= Config.SplitDepth) {
+    // We've reach the lowest level of the recursion tree. Fall back to the
+    // original order and assign to buckets.
+    llvm::stable_sort(Nodes, [](const auto &L, const auto &R) {
+      return L.InputOrderIndex < R.InputOrderIndex;
+    });
+    for (auto &N : Nodes)
+      N.Bucket = Offset++;
+    return;
+  }
+
+  LLVM_DEBUG(dbgs() << format("Bisect with %d nodes and root bucket %d\n",
+                              NumNodes, RootBucket));
+
+  std::mt19937 RNG(RootBucket);
+
+  unsigned LeftBucket = 2 * RootBucket;
+  unsigned RightBucket = 2 * RootBucket + 1;
+
+  // Split into two and assign to the left and right buckets
+  split(Nodes, LeftBucket);
+
+  runIterations(Nodes, RecDepth, LeftBucket, RightBucket, RNG);
+
+  // Split nodes wrt the resulting buckets
+  auto NodesMid =
+      llvm::partition(Nodes, [&](auto &N) { return N.Bucket == LeftBucket; });
+  unsigned MidOffset = Offset + std::distance(Nodes.begin(), NodesMid);
+
+  auto LeftNodes = llvm::make_range(Nodes.begin(), NodesMid);
+  auto RightNodes = llvm::make_range(NodesMid, Nodes.end());
+
+  auto LeftRecTask = [=, &TP]() {
+    bisect(LeftNodes, RecDepth + 1, LeftBucket, Offset, TP);
+  };
+  auto RightRecTask = [=, &TP]() {
+    bisect(RightNodes, RecDepth + 1, RightBucket, MidOffset, TP);
+  };
+
+  if (TP && RecDepth < Config.TaskSplitDepth && NumNodes >= 4) {
+    TP->async(std::move(LeftRecTask));
+    TP->async(std::move(RightRecTask));
+  } else {
+    LeftRecTask();
+    RightRecTask();
+  }
+}
+
+void BalancedPartitioning::runIterations(const FunctionNodeRange Nodes,
+                                         unsigned RecDepth, unsigned LeftBucket,
+                                         unsigned RightBucket,
+                                         std::mt19937 &RNG) const {
+  unsigned NumNodes = std::distance(Nodes.begin(), Nodes.end());
+  DenseMap<BPFunctionNode::UtilityNodeT, unsigned> UtilityNodeDegree;
+  for (auto &N : Nodes)
+    for (auto &UN : N.UtilityNodes)
+      ++UtilityNodeDegree[UN];
+  // Remove utility nodes if they have just one edge or are connected to all
+  // functions
+  for (auto &N : Nodes)
+    llvm::erase_if(N.UtilityNodes, [&](auto &UN) {
+      return UtilityNodeDegree[UN] <= 1 || UtilityNodeDegree[UN] >= NumNodes;
+    });
+
+  // Renumber utility nodes so they can be used to index into Signatures
+  DenseMap<BPFunctionNode::UtilityNodeT, unsigned> UtilityNodeIndex;
+  for (auto &N : Nodes)
+    for (auto &UN : N.UtilityNodes)
+      if (!UtilityNodeIndex.count(UN))
+        UtilityNodeIndex[UN] = UtilityNodeIndex.size();
+  for (auto &N : Nodes)
+    for (auto &UN : N.UtilityNodes)
+      UN = UtilityNodeIndex[UN];
+
+  // Initialize signatures
+  SignaturesT Signatures(/*Size=*/UtilityNodeIndex.size());
+  for (auto &N : Nodes) {
+    for (auto &UN : N.UtilityNodes) {
+      assert(UN < Signatures.size());
+      if (N.Bucket == LeftBucket) {
+        Signatures[UN].LeftCount++;
+      } else {
+        Signatures[UN].RightCount++;
+      }
+    }
+  }
+
+  for (unsigned I = 0; I < Config.IterationsPerSplit; I++) {
+    unsigned NumMovedNodes =
+        runIteration(Nodes, LeftBucket, RightBucket, Signatures, RNG);
+    if (NumMovedNodes == 0)
+      break;
+  }
+}
+
+unsigned BalancedPartitioning::runIteration(const FunctionNodeRange Nodes,
+                                            unsigned LeftBucket,
+                                            unsigned RightBucket,
+                                            SignaturesT &Signatures,
+                                            std::mt19937 &RNG) const {
+  // Init signature cost caches
+  for (auto &Signature : Signatures) {
+    if (Signature.CachedGainIsValid)
+      continue;
+    unsigned L = Signature.LeftCount;
+    unsigned R = Signature.RightCount;
+    assert((L > 0 || R > 0) && "incorrect signature");
+    float Cost = logCost(L, R);
+    Signature.CachedGainLR = 0.f;
+    Signature.CachedGainRL = 0.f;
+    if (L > 0)
+      Signature.CachedGainLR = Cost - logCost(L - 1, R + 1);
+    if (R > 0)
+      Signature.CachedGainRL = Cost - logCost(L + 1, R - 1);
+    Signature.CachedGainIsValid = true;
+  }
+
+  // Compute move gains
+  typedef std::pair<float, BPFunctionNode *> GainPair;
+  std::vector<GainPair> Gains;
+  for (auto &N : Nodes) {
+    bool FromLeftToRight = (N.Bucket == LeftBucket);
+    float Gain = moveGain(N, FromLeftToRight, Signatures);
+    Gains.push_back(std::make_pair(Gain, &N));
+  }
+
+  // Collect left and right gains
+  auto LeftEnd = llvm::partition(
+      Gains, [&](const auto &GP) { return GP.second->Bucket == LeftBucket; });
+  auto LeftRange = llvm::make_range(Gains.begin(), LeftEnd);
+  auto RightRange = llvm::make_range(LeftEnd, Gains.end());
+
+  // Sort gains in descending order
+  auto LargerGain = [](const auto &L, const auto &R) {
+    return L.first > R.first;
+  };
+  llvm::stable_sort(LeftRange, LargerGain);
+  llvm::stable_sort(RightRange, LargerGain);
+
+  unsigned NumMovedDataVertices = 0;
+  for (auto [LeftPair, RightPair] : llvm::zip(LeftRange, RightRange)) {
+    auto &[LeftGain, LeftNode] = LeftPair;
+    auto &[RightGain, RightNode] = RightPair;
+    // Stop when the gain is no longer beneficial
+    if (LeftGain + RightGain <= 0.f)
+      break;
+    // Try to exchange the nodes between buckets
+    if (moveFunctionNode(*LeftNode, LeftBucket, RightBucket, Signatures, RNG))
+      ++NumMovedDataVertices;
+    if (moveFunctionNode(*RightNode, LeftBucket, RightBucket, Signatures, RNG))
+      ++NumMovedDataVertices;
+  }
+  return NumMovedDataVertices;
+}
+
+bool BalancedPartitioning::moveFunctionNode(BPFunctionNode &N,
+                                            unsigned LeftBucket,
+                                            unsigned RightBucket,
+                                            SignaturesT &Signatures,
+                                            std::mt19937 &RNG) const {
+  // Sometimes we skip the move. This helps to escape local optima
+  if (std::uniform_real_distribution<float>(0.f, 1.f)(RNG) <=
+      Config.SkipProbability)
+    return false;
+
+  bool FromLeftToRight = (N.Bucket == LeftBucket);
+  // Update the current bucket
+  N.Bucket = (FromLeftToRight ? RightBucket : LeftBucket);
+
+  // Update signatures and invalidate gain cache
+  if (FromLeftToRight) {
+    for (auto &UN : N.UtilityNodes) {
+      auto &Signature = Signatures[UN];
+      Signature.LeftCount--;
+      Signature.RightCount++;
+      Signature.CachedGainIsValid = false;
+    }
+  } else {
+    for (auto &UN : N.UtilityNodes) {
+      auto &Signature = Signatures[UN];
+      Signature.LeftCount++;
+      Signature.RightCount--;
+      Signature.CachedGainIsValid = false;
+    }
+  }
+  return true;
+}
+
+void BalancedPartitioning::split(const FunctionNodeRange Nodes,
+                                 unsigned StartBucket) const {
+  unsigned NumNodes = std::distance(Nodes.begin(), Nodes.end());
+  auto NodesMid = Nodes.begin() + (NumNodes + 1) / 2;
+
+  std::nth_element(Nodes.begin(), NodesMid, Nodes.end(), [](auto &L, auto &R) {
+    return L.InputOrderIndex < R.InputOrderIndex;
+  });
+
+  for (auto &N : llvm::make_range(Nodes.begin(), NodesMid))
+    N.Bucket = StartBucket;
+  for (auto &N : llvm::make_range(NodesMid, Nodes.end()))
+    N.Bucket = StartBucket + 1;
+}
+
+float BalancedPartitioning::moveGain(const BPFunctionNode &N,
+                                     bool FromLeftToRight,
+                                     const SignaturesT &Signatures) {
+  float Gain = 0.f;
+  for (auto &UN : N.UtilityNodes)
+    Gain += (FromLeftToRight ? Signatures[UN].CachedGainLR
+                             : Signatures[UN].CachedGainRL);
+  return Gain;
+}
+
+float BalancedPartitioning::logCost(unsigned X, unsigned Y) const {
+  return -(X * log2Cached(X + 1) + Y * log2Cached(Y + 1));
+}
+
+float BalancedPartitioning::log2Cached(unsigned i) const {
+  return (i < LOG_CACHE_SIZE) ? Log2Cache[i] : std::log2(i);
+}
diff --git a/llvm/lib/Support/BinaryStreamWriter.cpp b/llvm/lib/Support/BinaryStreamWriter.cpp
index dc4ea200c7be..3d87a30a86a1 100644
--- a/llvm/lib/Support/BinaryStreamWriter.cpp
+++ b/llvm/lib/Support/BinaryStreamWriter.cpp
@@ -8,6 +8,7 @@
 
 #include "llvm/Support/BinaryStreamWriter.h"
 
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/BinaryStreamReader.h"
 #include "llvm/Support/BinaryStreamRef.h"
 #include "llvm/Support/LEB128.h"
diff --git a/llvm/lib/Support/BlockFrequency.cpp b/llvm/lib/Support/BlockFrequency.cpp
index 702165ac480b..a4a1e477d940 100644
--- a/llvm/lib/Support/BlockFrequency.cpp
+++ b/llvm/lib/Support/BlockFrequency.cpp
@@ -12,7 +12,6 @@
 
 #include "llvm/Support/BlockFrequency.h"
 #include "llvm/Support/BranchProbability.h"
-#include <cassert>
 
 using namespace llvm;
 
@@ -37,47 +36,3 @@ BlockFrequency BlockFrequency::operator/(BranchProbability Prob) const {
   Freq /= Prob;
   return Freq;
 }
-
-BlockFrequency &BlockFrequency::operator+=(BlockFrequency Freq) {
-  uint64_t Before = Freq.Frequency;
-  Frequency += Freq.Frequency;
-
-  // If overflow, set frequency to the maximum value.
-  if (Frequency < Before)
-    Frequency = UINT64_MAX;
-
-  return *this;
-}
-
-BlockFrequency BlockFrequency::operator+(BlockFrequency Freq) const {
-  BlockFrequency NewFreq(Frequency);
-  NewFreq += Freq;
-  return NewFreq;
-}
-
-BlockFrequency &BlockFrequency::operator-=(BlockFrequency Freq) {
-  // If underflow, set frequency to 0.
-  if (Frequency <= Freq.Frequency)
-    Frequency = 0;
-  else
-    Frequency -= Freq.Frequency;
-  return *this;
-}
-
-BlockFrequency BlockFrequency::operator-(BlockFrequency Freq) const {
-  BlockFrequency NewFreq(Frequency);
-  NewFreq -= Freq;
-  return NewFreq;
-}
-
-BlockFrequency &BlockFrequency::operator>>=(const unsigned count) {
-  // Frequency can never be 0 by design.
-  assert(Frequency != 0);
-
-  // Shift right by count.
-  Frequency >>= count;
-
-  // Saturate to 1 if we are 0.
-  Frequency |= Frequency == 0;
-  return *this;
-}
diff --git a/llvm/lib/Support/Chrono.cpp b/llvm/lib/Support/Chrono.cpp
index 8c28d45d8822..859ece8f5500 100644
--- a/llvm/lib/Support/Chrono.cpp
+++ b/llvm/lib/Support/Chrono.cpp
@@ -74,7 +74,7 @@ void format_provider<TimePoint<>>::format(const TimePoint<> &T, raw_ostream &OS,
           continue;
         case 'N':  // Nanoseconds, from date(1).
           FStream << llvm::format(
-              "%.6lu", (long)duration_cast<nanoseconds>(Fractional).count());
+              "%.9lu", (long)duration_cast<nanoseconds>(Fractional).count());
           ++I;
           continue;
         case '%':  // Consume %%, so %%f parses as (%%)f not %(%f)
diff --git a/llvm/lib/Support/CommandLine.cpp b/llvm/lib/Support/CommandLine.cpp
index 66632504d6fb..d3efb8b67be5 100644
--- a/llvm/lib/Support/CommandLine.cpp
+++ b/llvm/lib/Support/CommandLine.cpp
@@ -208,8 +208,7 @@ public:
     bool HadErrors = false;
     if (O->hasArgStr()) {
       // If it's a DefaultOption, check to make sure it isn't already there.
-      if (O->isDefaultOption() &&
-          SC->OptionsMap.find(O->ArgStr) != SC->OptionsMap.end())
+      if (O->isDefaultOption() && SC->OptionsMap.contains(O->ArgStr))
         return;
 
       // Add argument to the argument map!
@@ -2758,7 +2757,7 @@ StringMap<Option *> &cl::getRegisteredOptions(SubCommand &Sub) {
   initCommonOptions();
   auto &Subs = GlobalParser->RegisteredSubCommands;
   (void)Subs;
-  assert(is_contained(Subs, &Sub));
+  assert(Subs.contains(&Sub));
   return Sub.OptionsMap;
 }
 
diff --git a/llvm/lib/Support/ConvertEBCDIC.cpp b/llvm/lib/Support/ConvertEBCDIC.cpp
new file mode 100644
index 000000000000..08eeaa52a6c9
--- /dev/null
+++ b/llvm/lib/Support/ConvertEBCDIC.cpp
@@ -0,0 +1,123 @@
+//===--- ConvertEBCDIC.cpp - UTF8/EBCDIC CharSet Conversion -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file provides utility functions for converting between EBCDIC-1047 and
+/// UTF-8.
+///
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/ConvertEBCDIC.h"
+
+using namespace llvm;
+
+static const unsigned char ISO88591ToIBM1047[256] = {
+    0x00, 0x01, 0x02, 0x03, 0x37, 0x2d, 0x2e, 0x2f, 0x16, 0x05, 0x15, 0x0b,
+    0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x3c, 0x3d, 0x32, 0x26,
+    0x18, 0x19, 0x3f, 0x27, 0x1c, 0x1d, 0x1e, 0x1f, 0x40, 0x5a, 0x7f, 0x7b,
+    0x5b, 0x6c, 0x50, 0x7d, 0x4d, 0x5d, 0x5c, 0x4e, 0x6b, 0x60, 0x4b, 0x61,
+    0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0x7a, 0x5e,
+    0x4c, 0x7e, 0x6e, 0x6f, 0x7c, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
+    0xc8, 0xc9, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xe2,
+    0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xad, 0xe0, 0xbd, 0x5f, 0x6d,
+    0x79, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92,
+    0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6,
+    0xa7, 0xa8, 0xa9, 0xc0, 0x4f, 0xd0, 0xa1, 0x07, 0x20, 0x21, 0x22, 0x23,
+    0x24, 0x25, 0x06, 0x17, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x09, 0x0a, 0x1b,
+    0x30, 0x31, 0x1a, 0x33, 0x34, 0x35, 0x36, 0x08, 0x38, 0x39, 0x3a, 0x3b,
+    0x04, 0x14, 0x3e, 0xff, 0x41, 0xaa, 0x4a, 0xb1, 0x9f, 0xb2, 0x6a, 0xb5,
+    0xbb, 0xb4, 0x9a, 0x8a, 0xb0, 0xca, 0xaf, 0xbc, 0x90, 0x8f, 0xea, 0xfa,
+    0xbe, 0xa0, 0xb6, 0xb3, 0x9d, 0xda, 0x9b, 0x8b, 0xb7, 0xb8, 0xb9, 0xab,
+    0x64, 0x65, 0x62, 0x66, 0x63, 0x67, 0x9e, 0x68, 0x74, 0x71, 0x72, 0x73,
+    0x78, 0x75, 0x76, 0x77, 0xac, 0x69, 0xed, 0xee, 0xeb, 0xef, 0xec, 0xbf,
+    0x80, 0xfd, 0xfe, 0xfb, 0xfc, 0xba, 0xae, 0x59, 0x44, 0x45, 0x42, 0x46,
+    0x43, 0x47, 0x9c, 0x48, 0x54, 0x51, 0x52, 0x53, 0x58, 0x55, 0x56, 0x57,
+    0x8c, 0x49, 0xcd, 0xce, 0xcb, 0xcf, 0xcc, 0xe1, 0x70, 0xdd, 0xde, 0xdb,
+    0xdc, 0x8d, 0x8e, 0xdf};
+
+static const unsigned char IBM1047ToISO88591[256] = {
+    0x00, 0x01, 0x02, 0x03, 0x9c, 0x09, 0x86, 0x7f, 0x97, 0x8d, 0x8e, 0x0b,
+    0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x9d, 0x0a, 0x08, 0x87,
+    0x18, 0x19, 0x92, 0x8f, 0x1c, 0x1d, 0x1e, 0x1f, 0x80, 0x81, 0x82, 0x83,
+    0x84, 0x85, 0x17, 0x1b, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x05, 0x06, 0x07,
+    0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04, 0x98, 0x99, 0x9a, 0x9b,
+    0x14, 0x15, 0x9e, 0x1a, 0x20, 0xa0, 0xe2, 0xe4, 0xe0, 0xe1, 0xe3, 0xe5,
+    0xe7, 0xf1, 0xa2, 0x2e, 0x3c, 0x28, 0x2b, 0x7c, 0x26, 0xe9, 0xea, 0xeb,
+    0xe8, 0xed, 0xee, 0xef, 0xec, 0xdf, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0x5e,
+    0x2d, 0x2f, 0xc2, 0xc4, 0xc0, 0xc1, 0xc3, 0xc5, 0xc7, 0xd1, 0xa6, 0x2c,
+    0x25, 0x5f, 0x3e, 0x3f, 0xf8, 0xc9, 0xca, 0xcb, 0xc8, 0xcd, 0xce, 0xcf,
+    0xcc, 0x60, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22, 0xd8, 0x61, 0x62, 0x63,
+    0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xab, 0xbb, 0xf0, 0xfd, 0xfe, 0xb1,
+    0xb0, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0xaa, 0xba,
+    0xe6, 0xb8, 0xc6, 0xa4, 0xb5, 0x7e, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
+    0x79, 0x7a, 0xa1, 0xbf, 0xd0, 0x5b, 0xde, 0xae, 0xac, 0xa3, 0xa5, 0xb7,
+    0xa9, 0xa7, 0xb6, 0xbc, 0xbd, 0xbe, 0xdd, 0xa8, 0xaf, 0x5d, 0xb4, 0xd7,
+    0x7b, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0xad, 0xf4,
+    0xf6, 0xf2, 0xf3, 0xf5, 0x7d, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50,
+    0x51, 0x52, 0xb9, 0xfb, 0xfc, 0xf9, 0xfa, 0xff, 0x5c, 0xf7, 0x53, 0x54,
+    0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0xb2, 0xd4, 0xd6, 0xd2, 0xd3, 0xd5,
+    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0xb3, 0xdb,
+    0xdc, 0xd9, 0xda, 0x9f};
+
+std::error_code
+ConverterEBCDIC::convertToEBCDIC(StringRef Source,
+                                 SmallVectorImpl<char> &Result) {
+  assert(Result.empty() && "Result must be empty!");
+  const unsigned char *Table = ISO88591ToIBM1047;
+  const unsigned char *Ptr =
+      reinterpret_cast<const unsigned char *>(Source.data());
+  size_t Length = Source.size();
+  Result.reserve(Length);
+  while (Length--) {
+    unsigned char Ch = *Ptr++;
+    // Handle UTF-8 2-byte-sequences in input.
+    if (Ch >= 128) {
+      // Only two-byte sequences can be decoded.
+      if (Ch != 0xc2 && Ch != 0xc3)
+        return std::make_error_code(std::errc::illegal_byte_sequence);
+      // Is buffer truncated?
+      if (!Length)
+        return std::make_error_code(std::errc::invalid_argument);
+      unsigned char Ch2 = *Ptr++;
+      // Is second byte well-formed?
+      if ((Ch2 & 0xc0) != 0x80)
+        return std::make_error_code(std::errc::illegal_byte_sequence);
+      Ch = Ch2 | (Ch << 6);
+      Length--;
+    }
+    // Translate the character.
+    Ch = Table[Ch];
+    Result.push_back(static_cast<char>(Ch));
+  }
+  return std::error_code();
+}
+
+void ConverterEBCDIC::convertToUTF8(StringRef Source,
+                                    SmallVectorImpl<char> &Result) {
+  assert(Result.empty() && "Result must be empty!");
+
+  const unsigned char *Table = IBM1047ToISO88591;
+  const unsigned char *Ptr =
+      reinterpret_cast<const unsigned char *>(Source.data());
+  size_t Length = Source.size();
+  Result.reserve(Length);
+  while (Length--) {
+    unsigned char Ch = *Ptr++;
+    // Translate the character.
+    Ch = Table[Ch];
+    // Handle UTF-8 2-byte-sequences in output.
+    if (Ch >= 128) {
+      // First byte prefixed with either 0xc2 or 0xc3.
+      Result.push_back(static_cast<char>(0xc0 | (Ch >> 6)));
+      // Second byte is either the same as the ASCII byte or ASCII byte -64.
+      Ch = Ch & 0xbf;
+    }
+    Result.push_back(static_cast<char>(Ch));
+  }
+}
diff --git a/llvm/lib/Support/ConvertUTFWrapper.cpp b/llvm/lib/Support/ConvertUTFWrapper.cpp
index 9bf3f8f8b897..3fa7365e72d3 100644
--- a/llvm/lib/Support/ConvertUTFWrapper.cpp
+++ b/llvm/lib/Support/ConvertUTFWrapper.cpp
@@ -102,7 +102,7 @@ bool convertUTF16ToUTF8String(ArrayRef<char> SrcBytes, std::string &Out) {
   if (Src[0] == UNI_UTF16_BYTE_ORDER_MARK_SWAPPED) {
     ByteSwapped.insert(ByteSwapped.end(), Src, SrcEnd);
     for (UTF16 &I : ByteSwapped)
-      I = llvm::ByteSwap_16(I);
+      I = llvm::byteswap<uint16_t>(I);
     Src = &ByteSwapped[0];
     SrcEnd = &ByteSwapped[ByteSwapped.size() - 1] + 1;
   }
@@ -160,7 +160,7 @@ bool convertUTF32ToUTF8String(ArrayRef<char> SrcBytes, std::string &Out) {
   if (Src[0] == UNI_UTF32_BYTE_ORDER_MARK_SWAPPED) {
     ByteSwapped.insert(ByteSwapped.end(), Src, SrcEnd);
     for (UTF32 &I : ByteSwapped)
-      I = llvm::ByteSwap_32(I);
+      I = llvm::byteswap<uint32_t>(I);
     Src = &ByteSwapped[0];
     SrcEnd = &ByteSwapped[ByteSwapped.size() - 1] + 1;
   }
diff --git a/llvm/lib/Support/CrashRecoveryContext.cpp b/llvm/lib/Support/CrashRecoveryContext.cpp
index e96a9b59d834..f53aea177d61 100644
--- a/llvm/lib/Support/CrashRecoveryContext.cpp
+++ b/llvm/lib/Support/CrashRecoveryContext.cpp
@@ -431,7 +431,10 @@ bool CrashRecoveryContext::RunSafely(function_ref<void()> Fn) {
 
 [[noreturn]] void CrashRecoveryContext::HandleExit(int RetCode) {
 #if defined(_WIN32)
-  // SEH and VEH
+  // Since the exception code is actually of NTSTATUS type, we use the
+  // Microsoft-recommended 0xE prefix, to signify that this is a user error.
+  // This value is a combination of the customer field (bit 29) and severity
+  // field (bits 30-31) in the NTSTATUS specification.
   ::RaiseException(0xE0000000 | RetCode, 0, 0, NULL);
 #else
   // On Unix we don't need to raise an exception, we go directly to
@@ -445,10 +448,10 @@ bool CrashRecoveryContext::RunSafely(function_ref<void()> Fn) {
 
 bool CrashRecoveryContext::isCrash(int RetCode) {
 #if defined(_WIN32)
-  // On Windows, the high bits are reserved for kernel return codes. Values
-  // starting with 0x80000000 are reserved for "warnings"; values of 0xC0000000
-  // and up are for "errors". In practice, both are interpreted as a
-  // non-continuable signal.
+  // On Windows, the code is interpreted as NTSTATUS. The two high bits
+  // represent the severity. Values starting with 0x80000000 are reserved for
+  // "warnings"; values of 0xC0000000 and up are for "errors". In practice, both
+  // are interpreted as a non-continuable signal.
   unsigned Code = ((unsigned)RetCode & 0xF0000000) >> 28;
   if (Code != 0xC && Code != 8)
     return false;
diff --git a/llvm/lib/Support/DataExtractor.cpp b/llvm/lib/Support/DataExtractor.cpp
index 8cf312191153..59a44f4071b5 100644
--- a/llvm/lib/Support/DataExtractor.cpp
+++ b/llvm/lib/Support/DataExtractor.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/DataExtractor.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/Errc.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/LEB128.h"
diff --git a/llvm/lib/Support/DebugOptions.h b/llvm/lib/Support/DebugOptions.h
index 75e557d7d8d7..db727d5a584c 100644
--- a/llvm/lib/Support/DebugOptions.h
+++ b/llvm/lib/Support/DebugOptions.h
@@ -11,6 +11,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#ifndef LLVM_SUPPORT_DEBUGOPTIONS_H
+#define LLVM_SUPPORT_DEBUGOPTIONS_H
+
 namespace llvm {
 
 // These are invoked internally before parsing command line options.
@@ -27,3 +30,5 @@ void initDebugOptions();
 void initRandomSeedOptions();
 
 } // namespace llvm
+
+#endif // LLVM_SUPPORT_DEBUGOPTIONS_H
diff --git a/llvm/lib/Support/DivisionByConstantInfo.cpp b/llvm/lib/Support/DivisionByConstantInfo.cpp
index e7072d94e49c..8150bd83c79f 100644
--- a/llvm/lib/Support/DivisionByConstantInfo.cpp
+++ b/llvm/lib/Support/DivisionByConstantInfo.cpp
@@ -132,7 +132,7 @@ UnsignedDivisionByConstantInfo::get(const APInt &D, unsigned LeadingZeros,
            (Q1.ult(Delta) || (Q1 == Delta && R1.isZero())));
 
   if (Retval.IsAdd && !D[0] && AllowEvenDivisorOptimization) {
-    unsigned PreShift = D.countTrailingZeros();
+    unsigned PreShift = D.countr_zero();
     APInt ShiftedD = D.lshr(PreShift);
     Retval =
         UnsignedDivisionByConstantInfo::get(ShiftedD, LeadingZeros + PreShift);
diff --git a/llvm/lib/Support/ELFAttributeParser.cpp b/llvm/lib/Support/ELFAttributeParser.cpp
index a5a0676b1077..2e90b70dc83f 100644
--- a/llvm/lib/Support/ELFAttributeParser.cpp
+++ b/llvm/lib/Support/ELFAttributeParser.cpp
@@ -127,10 +127,14 @@ Error ELFAttributeParser::parseSubsection(uint32_t length) {
     sw->printString("Vendor", vendorName);
   }
 
-  // Ignore unrecognized vendor-name.
-  if (vendorName.lower() != vendor)
-    return createStringError(errc::invalid_argument,
-                             "unrecognized vendor-name: " + vendorName);
+  // Handle a subsection with an unrecognized vendor-name by skipping
+  // over it to the next subsection. ADDENDA32 in the Arm ABI defines
+  // that vendor attribute sections must not affect compatibility, so
+  // this should always be safe.
+  if (vendorName.lower() != vendor) {
+    cursor.seek(end);
+    return Error::success();
+  }
 
   while (cursor.tell() < end) {
     /// Tag_File | Tag_Section | Tag_Symbol   uleb128:byte-size
diff --git a/llvm/lib/Support/Errno.cpp b/llvm/lib/Support/Errno.cpp
index 7f665be8db6c..60a7e536b6c5 100644
--- a/llvm/lib/Support/Errno.cpp
+++ b/llvm/lib/Support/Errno.cpp
@@ -55,17 +55,11 @@ std::string StrError(int errnum) {
 #elif HAVE_DECL_STRERROR_S // "Windows Secure API"
   strerror_s(buffer, MaxErrStrLen - 1, errnum);
   str = buffer;
-#elif defined(HAVE_STRERROR)
+#else
   // Copy the thread un-safe result of strerror into
   // the buffer as fast as possible to minimize impact
   // of collision of strerror in multiple threads.
   str = strerror(errnum);
-#else
-  // Strange that this system doesn't even have strerror
-  // but, oh well, just use a generic message
-  raw_string_ostream stream(str);
-  stream << "Error #" << errnum;
-  stream.flush();
 #endif
   return str;
 }
diff --git a/llvm/lib/Support/Error.cpp b/llvm/lib/Support/Error.cpp
index fbe86f2b59e1..21d591530b41 100644
--- a/llvm/lib/Support/Error.cpp
+++ b/llvm/lib/Support/Error.cpp
@@ -7,6 +7,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/Error.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include <system_error>
@@ -70,6 +72,15 @@ void logAllUnhandledErrors(Error E, raw_ostream &OS, Twine ErrorBanner) {
   });
 }
 
+/// Write all error messages (if any) in E to a string. The newline character
+/// is used to separate error messages.
+std::string toString(Error E) {
+  SmallVector<std::string, 2> Errors;
+  handleAllErrors(std::move(E), [&Errors](const ErrorInfoBase &EI) {
+    Errors.push_back(EI.message());
+  });
+  return join(Errors.begin(), Errors.end(), "\n");
+}
 
 std::error_code ErrorList::convertToErrorCode() const {
   return std::error_code(static_cast<int>(ErrorErrorCode::MultipleErrors),
@@ -149,7 +160,7 @@ void report_fatal_error(Error Err, bool GenCrashDiag) {
     raw_string_ostream ErrStream(ErrMsg);
     logAllUnhandledErrors(std::move(Err), ErrStream);
   }
-  report_fatal_error(Twine(ErrMsg));
+  report_fatal_error(Twine(ErrMsg), GenCrashDiag);
 }
 
 } // end namespace llvm
diff --git a/llvm/lib/Support/FileUtilities.cpp b/llvm/lib/Support/FileUtilities.cpp
index d01a41a46489..dbd6c324cf4d 100644
--- a/llvm/lib/Support/FileUtilities.cpp
+++ b/llvm/lib/Support/FileUtilities.cpp
@@ -169,7 +169,7 @@ static bool CompareNumbers(const char *&F1P, const char *&F2P,
 
 /// DiffFilesWithTolerance - Compare the two files specified, returning 0 if the
 /// files match, 1 if they are different, and 2 if there is a file error.  This
-/// function differs from DiffFiles in that you can specify an absolete and
+/// function differs from DiffFiles in that you can specify an absolute and
 /// relative FP error that is allowed to exist.  If you specify a string to fill
 /// in for the error option, it will set the string to an error message if an
 /// error occurs, allowing the caller to distinguish between a failed diff and a
@@ -267,64 +267,6 @@ int llvm::DiffFilesWithTolerance(StringRef NameA,
   return CompareFailed;
 }
 
-void llvm::AtomicFileWriteError::log(raw_ostream &OS) const {
-  OS << "atomic_write_error: ";
-  switch (Error) {
-  case atomic_write_error::failed_to_create_uniq_file:
-    OS << "failed_to_create_uniq_file";
-    return;
-  case atomic_write_error::output_stream_error:
-    OS << "output_stream_error";
-    return;
-  case atomic_write_error::failed_to_rename_temp_file:
-    OS << "failed_to_rename_temp_file";
-    return;
-  }
-  llvm_unreachable("unknown atomic_write_error value in "
-                   "failed_to_rename_temp_file::log()");
-}
-
-llvm::Error llvm::writeFileAtomically(StringRef TempPathModel,
-                                      StringRef FinalPath, StringRef Buffer) {
-  return writeFileAtomically(TempPathModel, FinalPath,
-                             [&Buffer](llvm::raw_ostream &OS) {
-                               OS.write(Buffer.data(), Buffer.size());
-                               return llvm::Error::success();
-                             });
-}
-
-llvm::Error llvm::writeFileAtomically(
-    StringRef TempPathModel, StringRef FinalPath,
-    std::function<llvm::Error(llvm::raw_ostream &)> Writer) {
-  SmallString<128> GeneratedUniqPath;
-  int TempFD;
-  if (sys::fs::createUniqueFile(TempPathModel, TempFD, GeneratedUniqPath)) {
-    return llvm::make_error<AtomicFileWriteError>(
-        atomic_write_error::failed_to_create_uniq_file);
-  }
-  llvm::FileRemover RemoveTmpFileOnFail(GeneratedUniqPath);
-
-  raw_fd_ostream OS(TempFD, /*shouldClose=*/true);
-  if (llvm::Error Err = Writer(OS)) {
-    return Err;
-  }
-
-  OS.close();
-  if (OS.has_error()) {
-    OS.clear_error();
-    return llvm::make_error<AtomicFileWriteError>(
-        atomic_write_error::output_stream_error);
-  }
-
-  if (sys::fs::rename(/*from=*/GeneratedUniqPath, /*to=*/FinalPath)) {
-    return llvm::make_error<AtomicFileWriteError>(
-        atomic_write_error::failed_to_rename_temp_file);
-  }
-
-  RemoveTmpFileOnFail.releaseFile();
-  return Error::success();
-}
-
 Expected<FilePermissionsApplier>
 FilePermissionsApplier::create(StringRef InputFilename) {
   sys::fs::file_status Status;
@@ -389,5 +331,3 @@ Error FilePermissionsApplier::apply(
 
   return Error::success();
 }
-
-char llvm::AtomicFileWriteError::ID;
diff --git a/llvm/lib/Support/FloatingPointMode.cpp b/llvm/lib/Support/FloatingPointMode.cpp
new file mode 100644
index 000000000000..9543884ff46e
--- /dev/null
+++ b/llvm/lib/Support/FloatingPointMode.cpp
@@ -0,0 +1,95 @@
+//===- FloatingPointMode.cpp ------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/FloatingPointMode.h"
+#include "llvm/ADT/StringExtras.h"
+
+using namespace llvm;
+
+FPClassTest llvm::fneg(FPClassTest Mask) {
+  FPClassTest NewMask = Mask & fcNan;
+  if (Mask & fcNegInf)
+    NewMask |= fcPosInf;
+  if (Mask & fcNegNormal)
+    NewMask |= fcPosNormal;
+  if (Mask & fcNegSubnormal)
+    NewMask |= fcPosSubnormal;
+  if (Mask & fcNegZero)
+    NewMask |= fcPosZero;
+  if (Mask & fcPosZero)
+    NewMask |= fcNegZero;
+  if (Mask & fcPosSubnormal)
+    NewMask |= fcNegSubnormal;
+  if (Mask & fcPosNormal)
+    NewMask |= fcNegNormal;
+  if (Mask & fcPosInf)
+    NewMask |= fcNegInf;
+  return NewMask;
+}
+
+FPClassTest llvm::fabs(FPClassTest Mask) {
+  FPClassTest NewMask = Mask & fcNan;
+  if (Mask & fcPosZero)
+    NewMask |= fcZero;
+  if (Mask & fcPosSubnormal)
+    NewMask |= fcSubnormal;
+  if (Mask & fcPosNormal)
+    NewMask |= fcNormal;
+  if (Mask & fcPosInf)
+    NewMask |= fcInf;
+  return NewMask;
+}
+
+// Every bitfield has a unique name and one or more aliasing names that cover
+// multiple bits. Names should be listed in order of preference, with higher
+// popcounts listed first.
+//
+// Bits are consumed as printed. Each field should only be represented in one
+// printed field.
+static constexpr std::pair<FPClassTest, StringLiteral> NoFPClassName[] = {
+  {fcAllFlags, "all"},
+  {fcNan, "nan"},
+  {fcSNan, "snan"},
+  {fcQNan, "qnan"},
+  {fcInf, "inf"},
+  {fcNegInf, "ninf"},
+  {fcPosInf, "pinf"},
+  {fcZero, "zero"},
+  {fcNegZero, "nzero"},
+  {fcPosZero, "pzero"},
+  {fcSubnormal, "sub"},
+  {fcNegSubnormal, "nsub"},
+  {fcPosSubnormal, "psub"},
+  {fcNormal, "norm"},
+  {fcNegNormal, "nnorm"},
+  {fcPosNormal, "pnorm"}
+};
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, FPClassTest Mask) {
+  OS << '(';
+
+  if (Mask == fcNone) {
+    OS << "none)";
+    return OS;
+  }
+
+  ListSeparator LS(" ");
+  for (auto [BitTest, Name] : NoFPClassName) {
+    if ((Mask & BitTest) == BitTest) {
+      OS << LS << Name;
+
+      // Clear the bits so we don't print any aliased names later.
+      Mask &= ~BitTest;
+    }
+  }
+
+  assert(Mask == 0 && "didn't print some mask bits");
+
+  OS << ')';
+  return OS;
+}
diff --git a/llvm/lib/Support/FoldingSet.cpp b/llvm/lib/Support/FoldingSet.cpp
index ece31b971c1c..419bf6740768 100644
--- a/llvm/lib/Support/FoldingSet.cpp
+++ b/llvm/lib/Support/FoldingSet.cpp
@@ -269,7 +269,7 @@ void FoldingSetBase::reserve(unsigned EltCount, const FoldingSetInfo &Info) {
   // range of 1.0 - 2.0.
   if(EltCount < capacity())
     return;
-  GrowBucketCount(PowerOf2Floor(EltCount), Info);
+  GrowBucketCount(llvm::bit_floor(EltCount), Info);
 }
 
 /// FindNodeOrInsertPos - Look up the node specified by ID.  If it exists,
diff --git a/llvm/lib/Support/ItaniumManglingCanonicalizer.cpp b/llvm/lib/Support/ItaniumManglingCanonicalizer.cpp
deleted file mode 100644
index d95d84f7837e..000000000000
--- a/llvm/lib/Support/ItaniumManglingCanonicalizer.cpp
+++ /dev/null
@@ -1,307 +0,0 @@
-//===----------------- ItaniumManglingCanonicalizer.cpp -------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Support/ItaniumManglingCanonicalizer.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/FoldingSet.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/Demangle/ItaniumDemangle.h"
-#include "llvm/Support/Allocator.h"
-
-using namespace llvm;
-using llvm::itanium_demangle::ForwardTemplateReference;
-using llvm::itanium_demangle::Node;
-using llvm::itanium_demangle::NodeKind;
-using llvm::itanium_demangle::StringView;
-
-namespace {
-struct FoldingSetNodeIDBuilder {
-  llvm::FoldingSetNodeID &ID;
-  void operator()(const Node *P) { ID.AddPointer(P); }
-  void operator()(StringView Str) {
-    ID.AddString(llvm::StringRef(Str.begin(), Str.size()));
-  }
-  template <typename T>
-  std::enable_if_t<std::is_integral_v<T> || std::is_enum_v<T>> operator()(T V) {
-    ID.AddInteger((unsigned long long)V);
-  }
-  void operator()(itanium_demangle::NodeArray A) {
-    ID.AddInteger(A.size());
-    for (const Node *N : A)
-      (*this)(N);
-  }
-};
-
-template<typename ...T>
-void profileCtor(llvm::FoldingSetNodeID &ID, Node::Kind K, T ...V) {
-  FoldingSetNodeIDBuilder Builder = {ID};
-  Builder(K);
-  int VisitInOrder[] = {
-    (Builder(V), 0) ...,
-    0 // Avoid empty array if there are no arguments.
-  };
-  (void)VisitInOrder;
-}
-
-// FIXME: Convert this to a generic lambda when possible.
-template<typename NodeT> struct ProfileSpecificNode {
-  FoldingSetNodeID &ID;
-  template<typename ...T> void operator()(T ...V) {
-    profileCtor(ID, NodeKind<NodeT>::Kind, V...);
-  }
-};
-
-struct ProfileNode {
-  FoldingSetNodeID &ID;
-  template<typename NodeT> void operator()(const NodeT *N) {
-    N->match(ProfileSpecificNode<NodeT>{ID});
-  }
-};
-
-template<> void ProfileNode::operator()(const ForwardTemplateReference *N) {
-  llvm_unreachable("should never canonicalize a ForwardTemplateReference");
-}
-
-void profileNode(llvm::FoldingSetNodeID &ID, const Node *N) {
-  N->visit(ProfileNode{ID});
-}
-
-class FoldingNodeAllocator {
-  class alignas(alignof(Node *)) NodeHeader : public llvm::FoldingSetNode {
-  public:
-    // 'Node' in this context names the injected-class-name of the base class.
-    itanium_demangle::Node *getNode() {
-      return reinterpret_cast<itanium_demangle::Node *>(this + 1);
-    }
-    void Profile(llvm::FoldingSetNodeID &ID) { profileNode(ID, getNode()); }
-  };
-
-  BumpPtrAllocator RawAlloc;
-  llvm::FoldingSet<NodeHeader> Nodes;
-
-public:
-  void reset() {}
-
-  template <typename T, typename... Args>
-  std::pair<Node *, bool> getOrCreateNode(bool CreateNewNodes, Args &&... As) {
-    // FIXME: Don't canonicalize forward template references for now, because
-    // they contain state (the resolved template node) that's not known at their
-    // point of creation.
-    if (std::is_same<T, ForwardTemplateReference>::value) {
-      // Note that we don't use if-constexpr here and so we must still write
-      // this code in a generic form.
-      return {new (RawAlloc.Allocate(sizeof(T), alignof(T)))
-                  T(std::forward<Args>(As)...),
-              true};
-    }
-
-    llvm::FoldingSetNodeID ID;
-    profileCtor(ID, NodeKind<T>::Kind, As...);
-
-    void *InsertPos;
-    if (NodeHeader *Existing = Nodes.FindNodeOrInsertPos(ID, InsertPos))
-      return {static_cast<T*>(Existing->getNode()), false};
-
-    if (!CreateNewNodes)
-      return {nullptr, true};
-
-    static_assert(alignof(T) <= alignof(NodeHeader),
-                  "underaligned node header for specific node kind");
-    void *Storage =
-        RawAlloc.Allocate(sizeof(NodeHeader) + sizeof(T), alignof(NodeHeader));
-    NodeHeader *New = new (Storage) NodeHeader;
-    T *Result = new (New->getNode()) T(std::forward<Args>(As)...);
-    Nodes.InsertNode(New, InsertPos);
-    return {Result, true};
-  }
-
-  template<typename T, typename... Args>
-  Node *makeNode(Args &&...As) {
-    return getOrCreateNode<T>(true, std::forward<Args>(As)...).first;
-  }
-
-  void *allocateNodeArray(size_t sz) {
-    return RawAlloc.Allocate(sizeof(Node *) * sz, alignof(Node *));
-  }
-};
-
-class CanonicalizerAllocator : public FoldingNodeAllocator {
-  Node *MostRecentlyCreated = nullptr;
-  Node *TrackedNode = nullptr;
-  bool TrackedNodeIsUsed = false;
-  bool CreateNewNodes = true;
-  llvm::SmallDenseMap<Node*, Node*, 32> Remappings;
-
-  template<typename T, typename ...Args> Node *makeNodeSimple(Args &&...As) {
-    std::pair<Node *, bool> Result =
-        getOrCreateNode<T>(CreateNewNodes, std::forward<Args>(As)...);
-    if (Result.second) {
-      // Node is new. Make a note of that.
-      MostRecentlyCreated = Result.first;
-    } else if (Result.first) {
-      // Node is pre-existing; check if it's in our remapping table.
-      if (auto *N = Remappings.lookup(Result.first)) {
-        Result.first = N;
-        assert(Remappings.find(Result.first) == Remappings.end() &&
-               "should never need multiple remap steps");
-      }
-      if (Result.first == TrackedNode)
-        TrackedNodeIsUsed = true;
-    }
-    return Result.first;
-  }
-
-  /// Helper to allow makeNode to be partially-specialized on T.
-  template<typename T> struct MakeNodeImpl {
-    CanonicalizerAllocator &Self;
-    template<typename ...Args> Node *make(Args &&...As) {
-      return Self.makeNodeSimple<T>(std::forward<Args>(As)...);
-    }
-  };
-
-public:
-  template<typename T, typename ...Args> Node *makeNode(Args &&...As) {
-    return MakeNodeImpl<T>{*this}.make(std::forward<Args>(As)...);
-  }
-
-  void reset() { MostRecentlyCreated = nullptr; }
-
-  void setCreateNewNodes(bool CNN) { CreateNewNodes = CNN; }
-
-  void addRemapping(Node *A, Node *B) {
-    // Note, we don't need to check whether B is also remapped, because if it
-    // was we would have already remapped it when building it.
-    Remappings.insert(std::make_pair(A, B));
-  }
-
-  bool isMostRecentlyCreated(Node *N) const { return MostRecentlyCreated == N; }
-
-  void trackUsesOf(Node *N) {
-    TrackedNode = N;
-    TrackedNodeIsUsed = false;
-  }
-  bool trackedNodeIsUsed() const { return TrackedNodeIsUsed; }
-};
-
-// FIXME: Also expand built-in substitutions?
-
-using CanonicalizingDemangler =
-    itanium_demangle::ManglingParser<CanonicalizerAllocator>;
-} // namespace
-
-struct ItaniumManglingCanonicalizer::Impl {
-  CanonicalizingDemangler Demangler = {nullptr, nullptr};
-};
-
-ItaniumManglingCanonicalizer::ItaniumManglingCanonicalizer() : P(new Impl) {}
-ItaniumManglingCanonicalizer::~ItaniumManglingCanonicalizer() { delete P; }
-
-ItaniumManglingCanonicalizer::EquivalenceError
-ItaniumManglingCanonicalizer::addEquivalence(FragmentKind Kind, StringRef First,
-                                             StringRef Second) {
-  auto &Alloc = P->Demangler.ASTAllocator;
-  Alloc.setCreateNewNodes(true);
-
-  auto Parse = [&](StringRef Str) {
-    P->Demangler.reset(Str.begin(), Str.end());
-    Node *N = nullptr;
-    switch (Kind) {
-      // A <name>, with minor extensions to allow arbitrary namespace and
-      // template names that can't easily be written as <name>s.
-    case FragmentKind::Name:
-      // Very special case: allow "St" as a shorthand for "3std". It's not
-      // valid as a <name> mangling, but is nonetheless the most natural
-      // way to name the 'std' namespace.
-      if (Str.size() == 2 && P->Demangler.consumeIf("St"))
-        N = P->Demangler.make<itanium_demangle::NameType>("std");
-      // We permit substitutions to name templates without their template
-      // arguments. This mostly just falls out, as almost all template names
-      // are valid as <name>s, but we also want to parse <substitution>s as
-      // <name>s, even though they're not.
-      else if (Str.startswith("S"))
-        // Parse the substitution and optional following template arguments.
-        N = P->Demangler.parseType();
-      else
-        N = P->Demangler.parseName();
-      break;
-
-      // A <type>.
-    case FragmentKind::Type:
-      N = P->Demangler.parseType();
-      break;
-
-      // An <encoding>.
-    case FragmentKind::Encoding:
-      N = P->Demangler.parseEncoding();
-      break;
-    }
-
-    // If we have trailing junk, the mangling is invalid.
-    if (P->Demangler.numLeft() != 0)
-      N = nullptr;
-
-    // If any node was created after N, then we cannot safely remap it because
-    // it might already be in use by another node.
-    return std::make_pair(N, Alloc.isMostRecentlyCreated(N));
-  };
-
-  Node *FirstNode, *SecondNode;
-  bool FirstIsNew, SecondIsNew;
-
-  std::tie(FirstNode, FirstIsNew) = Parse(First);
-  if (!FirstNode)
-    return EquivalenceError::InvalidFirstMangling;
-
-  Alloc.trackUsesOf(FirstNode);
-  std::tie(SecondNode, SecondIsNew) = Parse(Second);
-  if (!SecondNode)
-    return EquivalenceError::InvalidSecondMangling;
-
-  // If they're already equivalent, there's nothing to do.
-  if (FirstNode == SecondNode)
-    return EquivalenceError::Success;
-
-  if (FirstIsNew && !Alloc.trackedNodeIsUsed())
-    Alloc.addRemapping(FirstNode, SecondNode);
-  else if (SecondIsNew)
-    Alloc.addRemapping(SecondNode, FirstNode);
-  else
-    return EquivalenceError::ManglingAlreadyUsed;
-
-  return EquivalenceError::Success;
-}
-
-static ItaniumManglingCanonicalizer::Key
-parseMaybeMangledName(CanonicalizingDemangler &Demangler, StringRef Mangling,
-                      bool CreateNewNodes) {
-  Demangler.ASTAllocator.setCreateNewNodes(CreateNewNodes);
-  Demangler.reset(Mangling.begin(), Mangling.end());
-  // Attempt demangling only for names that look like C++ mangled names.
-  // Otherwise, treat them as extern "C" names. We permit the latter to
-  // be remapped by (eg)
-  //   encoding 6memcpy 7memmove
-  // consistent with how they are encoded as local-names inside a C++ mangling.
-  Node *N;
-  if (Mangling.startswith("_Z") || Mangling.startswith("__Z") ||
-      Mangling.startswith("___Z") || Mangling.startswith("____Z"))
-    N = Demangler.parse();
-  else
-    N = Demangler.make<itanium_demangle::NameType>(
-        StringView(Mangling.data(), Mangling.size()));
-  return reinterpret_cast<ItaniumManglingCanonicalizer::Key>(N);
-}
-
-ItaniumManglingCanonicalizer::Key
-ItaniumManglingCanonicalizer::canonicalize(StringRef Mangling) {
-  return parseMaybeMangledName(P->Demangler, Mangling, true);
-}
-
-ItaniumManglingCanonicalizer::Key
-ItaniumManglingCanonicalizer::lookup(StringRef Mangling) {
-  return parseMaybeMangledName(P->Demangler, Mangling, false);
-}
diff --git a/llvm/lib/Support/JSON.cpp b/llvm/lib/Support/JSON.cpp
index 0e7f7bf1d999..c672a43b033e 100644
--- a/llvm/lib/Support/JSON.cpp
+++ b/llvm/lib/Support/JSON.cpp
@@ -8,12 +8,14 @@
 
 #include "llvm/Support/JSON.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/ConvertUTF.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/Format.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/NativeFormatting.h"
+#include "llvm/Support/raw_ostream.h"
 #include <cctype>
+#include <cerrno>
 #include <optional>
 
 namespace llvm {
diff --git a/llvm/lib/Support/KnownBits.cpp b/llvm/lib/Support/KnownBits.cpp
index 745c46fb6ffb..097c22d33dd1 100644
--- a/llvm/lib/Support/KnownBits.cpp
+++ b/llvm/lib/Support/KnownBits.cpp
@@ -105,7 +105,7 @@ KnownBits KnownBits::sextInReg(unsigned SrcBitWidth) const {
 KnownBits KnownBits::makeGE(const APInt &Val) const {
   // Count the number of leading bit positions where our underlying value is
   // known to be less than or equal to Val.
-  unsigned N = (Zero | Val).countLeadingOnes();
+  unsigned N = (Zero | Val).countl_one();
 
   // For each of those bit positions, if Val has a 1 in that bit then our
   // underlying value must also have a 1.
@@ -129,7 +129,7 @@ KnownBits KnownBits::umax(const KnownBits &LHS, const KnownBits &RHS) {
   // are common to these two values are also known in the result.
   KnownBits L = LHS.makeGE(RHS.getMinValue());
   KnownBits R = RHS.makeGE(LHS.getMinValue());
-  return KnownBits::commonBits(L, R);
+  return L.intersectWith(R);
 }
 
 KnownBits KnownBits::umin(const KnownBits &LHS, const KnownBits &RHS) {
@@ -164,169 +164,189 @@ KnownBits KnownBits::smin(const KnownBits &LHS, const KnownBits &RHS) {
   return Flip(umax(Flip(LHS), Flip(RHS)));
 }
 
-KnownBits KnownBits::shl(const KnownBits &LHS, const KnownBits &RHS) {
+static unsigned getMaxShiftAmount(const APInt &MaxValue, unsigned BitWidth) {
+  if (isPowerOf2_32(BitWidth))
+    return MaxValue.extractBitsAsZExtValue(Log2_32(BitWidth), 0);
+  // This is only an approximate upper bound.
+  return MaxValue.getLimitedValue(BitWidth - 1);
+}
+
+KnownBits KnownBits::shl(const KnownBits &LHS, const KnownBits &RHS, bool NUW,
+                         bool NSW, bool ShAmtNonZero) {
   unsigned BitWidth = LHS.getBitWidth();
-  KnownBits Known(BitWidth);
+  auto ShiftByConst = [&](const KnownBits &LHS, unsigned ShiftAmt) {
+    KnownBits Known;
+    bool ShiftedOutZero, ShiftedOutOne;
+    Known.Zero = LHS.Zero.ushl_ov(ShiftAmt, ShiftedOutZero);
+    Known.Zero.setLowBits(ShiftAmt);
+    Known.One = LHS.One.ushl_ov(ShiftAmt, ShiftedOutOne);
+
+    // All cases returning poison have been handled by MaxShiftAmount already.
+    if (NSW) {
+      if (NUW && ShiftAmt != 0)
+        // NUW means we can assume anything shifted out was a zero.
+        ShiftedOutZero = true;
+
+      if (ShiftedOutZero)
+        Known.makeNonNegative();
+      else if (ShiftedOutOne)
+        Known.makeNegative();
+    }
+    return Known;
+  };
 
-  // If the shift amount is a valid constant then transform LHS directly.
-  if (RHS.isConstant() && RHS.getConstant().ult(BitWidth)) {
-    unsigned Shift = RHS.getConstant().getZExtValue();
-    Known = LHS;
-    Known.Zero <<= Shift;
-    Known.One <<= Shift;
-    // Low bits are known zero.
-    Known.Zero.setLowBits(Shift);
+  // Fast path for a common case when LHS is completely unknown.
+  KnownBits Known(BitWidth);
+  unsigned MinShiftAmount = RHS.getMinValue().getLimitedValue(BitWidth);
+  if (MinShiftAmount == 0 && ShAmtNonZero)
+    MinShiftAmount = 1;
+  if (LHS.isUnknown()) {
+    Known.Zero.setLowBits(MinShiftAmount);
+    if (NUW && NSW && MinShiftAmount != 0)
+      Known.makeNonNegative();
     return Known;
   }
 
-  // No matter the shift amount, the trailing zeros will stay zero.
-  unsigned MinTrailingZeros = LHS.countMinTrailingZeros();
-
-  // Minimum shift amount low bits are known zero.
-  APInt MinShiftAmount = RHS.getMinValue();
-  if (MinShiftAmount.ult(BitWidth)) {
-    MinTrailingZeros += MinShiftAmount.getZExtValue();
-    MinTrailingZeros = std::min(MinTrailingZeros, BitWidth);
+  // Determine maximum shift amount, taking NUW/NSW flags into account.
+  APInt MaxValue = RHS.getMaxValue();
+  unsigned MaxShiftAmount = getMaxShiftAmount(MaxValue, BitWidth);
+  if (NUW && NSW)
+    MaxShiftAmount = std::min(MaxShiftAmount, LHS.countMaxLeadingZeros() - 1);
+  if (NUW)
+    MaxShiftAmount = std::min(MaxShiftAmount, LHS.countMaxLeadingZeros());
+  if (NSW)
+    MaxShiftAmount = std::min(
+        MaxShiftAmount,
+        std::max(LHS.countMaxLeadingZeros(), LHS.countMaxLeadingOnes()) - 1);
+
+  // Fast path for common case where the shift amount is unknown.
+  if (MinShiftAmount == 0 && MaxShiftAmount == BitWidth - 1 &&
+      isPowerOf2_32(BitWidth)) {
+    Known.Zero.setLowBits(LHS.countMinTrailingZeros());
+    if (LHS.isAllOnes())
+      Known.One.setSignBit();
+    if (NSW) {
+      if (LHS.isNonNegative())
+        Known.makeNonNegative();
+      if (LHS.isNegative())
+        Known.makeNegative();
+    }
+    return Known;
   }
 
-  // If the maximum shift is in range, then find the common bits from all
-  // possible shifts.
-  APInt MaxShiftAmount = RHS.getMaxValue();
-  if (MaxShiftAmount.ult(BitWidth) && !LHS.isUnknown()) {
-    uint64_t ShiftAmtZeroMask = (~RHS.Zero).getZExtValue();
-    uint64_t ShiftAmtOneMask = RHS.One.getZExtValue();
-    assert(MinShiftAmount.ult(MaxShiftAmount) && "Illegal shift range");
-    Known.Zero.setAllBits();
-    Known.One.setAllBits();
-    for (uint64_t ShiftAmt = MinShiftAmount.getZExtValue(),
-                  MaxShiftAmt = MaxShiftAmount.getZExtValue();
-         ShiftAmt <= MaxShiftAmt; ++ShiftAmt) {
-      // Skip if the shift amount is impossible.
-      if ((ShiftAmtZeroMask & ShiftAmt) != ShiftAmt ||
-          (ShiftAmtOneMask | ShiftAmt) != ShiftAmt)
-        continue;
-      KnownBits SpecificShift;
-      SpecificShift.Zero = LHS.Zero << ShiftAmt;
-      SpecificShift.One = LHS.One << ShiftAmt;
-      Known = KnownBits::commonBits(Known, SpecificShift);
-      if (Known.isUnknown())
-        break;
-    }
+  // Find the common bits from all possible shifts.
+  unsigned ShiftAmtZeroMask = RHS.Zero.zextOrTrunc(32).getZExtValue();
+  unsigned ShiftAmtOneMask = RHS.One.zextOrTrunc(32).getZExtValue();
+  Known.Zero.setAllBits();
+  Known.One.setAllBits();
+  for (unsigned ShiftAmt = MinShiftAmount; ShiftAmt <= MaxShiftAmount;
+       ++ShiftAmt) {
+    // Skip if the shift amount is impossible.
+    if ((ShiftAmtZeroMask & ShiftAmt) != 0 ||
+        (ShiftAmtOneMask | ShiftAmt) != ShiftAmt)
+      continue;
+    Known = Known.intersectWith(ShiftByConst(LHS, ShiftAmt));
+    if (Known.isUnknown())
+      break;
   }
 
-  Known.Zero.setLowBits(MinTrailingZeros);
+  // All shift amounts may result in poison.
+  if (Known.hasConflict())
+    Known.setAllZero();
   return Known;
 }
 
-KnownBits KnownBits::lshr(const KnownBits &LHS, const KnownBits &RHS) {
+KnownBits KnownBits::lshr(const KnownBits &LHS, const KnownBits &RHS,
+                          bool ShAmtNonZero) {
   unsigned BitWidth = LHS.getBitWidth();
-  KnownBits Known(BitWidth);
-
-  if (RHS.isConstant() && RHS.getConstant().ult(BitWidth)) {
-    unsigned Shift = RHS.getConstant().getZExtValue();
-    Known = LHS;
-    Known.Zero.lshrInPlace(Shift);
-    Known.One.lshrInPlace(Shift);
+  auto ShiftByConst = [&](const KnownBits &LHS, unsigned ShiftAmt) {
+    KnownBits Known = LHS;
+    Known.Zero.lshrInPlace(ShiftAmt);
+    Known.One.lshrInPlace(ShiftAmt);
     // High bits are known zero.
-    Known.Zero.setHighBits(Shift);
+    Known.Zero.setHighBits(ShiftAmt);
     return Known;
-  }
-
-  // No matter the shift amount, the leading zeros will stay zero.
-  unsigned MinLeadingZeros = LHS.countMinLeadingZeros();
+  };
 
-  // Minimum shift amount high bits are known zero.
-  APInt MinShiftAmount = RHS.getMinValue();
-  if (MinShiftAmount.ult(BitWidth)) {
-    MinLeadingZeros += MinShiftAmount.getZExtValue();
-    MinLeadingZeros = std::min(MinLeadingZeros, BitWidth);
+  // Fast path for a common case when LHS is completely unknown.
+  KnownBits Known(BitWidth);
+  unsigned MinShiftAmount = RHS.getMinValue().getLimitedValue(BitWidth);
+  if (MinShiftAmount == 0 && ShAmtNonZero)
+    MinShiftAmount = 1;
+  if (LHS.isUnknown()) {
+    Known.Zero.setHighBits(MinShiftAmount);
+    return Known;
   }
 
-  // If the maximum shift is in range, then find the common bits from all
-  // possible shifts.
-  APInt MaxShiftAmount = RHS.getMaxValue();
-  if (MaxShiftAmount.ult(BitWidth) && !LHS.isUnknown()) {
-    uint64_t ShiftAmtZeroMask = (~RHS.Zero).getZExtValue();
-    uint64_t ShiftAmtOneMask = RHS.One.getZExtValue();
-    assert(MinShiftAmount.ult(MaxShiftAmount) && "Illegal shift range");
-    Known.Zero.setAllBits();
-    Known.One.setAllBits();
-    for (uint64_t ShiftAmt = MinShiftAmount.getZExtValue(),
-                  MaxShiftAmt = MaxShiftAmount.getZExtValue();
-         ShiftAmt <= MaxShiftAmt; ++ShiftAmt) {
-      // Skip if the shift amount is impossible.
-      if ((ShiftAmtZeroMask & ShiftAmt) != ShiftAmt ||
-          (ShiftAmtOneMask | ShiftAmt) != ShiftAmt)
-        continue;
-      KnownBits SpecificShift = LHS;
-      SpecificShift.Zero.lshrInPlace(ShiftAmt);
-      SpecificShift.One.lshrInPlace(ShiftAmt);
-      Known = KnownBits::commonBits(Known, SpecificShift);
-      if (Known.isUnknown())
-        break;
-    }
+  // Find the common bits from all possible shifts.
+  APInt MaxValue = RHS.getMaxValue();
+  unsigned MaxShiftAmount = getMaxShiftAmount(MaxValue, BitWidth);
+  unsigned ShiftAmtZeroMask = RHS.Zero.zextOrTrunc(32).getZExtValue();
+  unsigned ShiftAmtOneMask = RHS.One.zextOrTrunc(32).getZExtValue();
+  Known.Zero.setAllBits();
+  Known.One.setAllBits();
+  for (unsigned ShiftAmt = MinShiftAmount; ShiftAmt <= MaxShiftAmount;
+       ++ShiftAmt) {
+    // Skip if the shift amount is impossible.
+    if ((ShiftAmtZeroMask & ShiftAmt) != 0 ||
+        (ShiftAmtOneMask | ShiftAmt) != ShiftAmt)
+      continue;
+    Known = Known.intersectWith(ShiftByConst(LHS, ShiftAmt));
+    if (Known.isUnknown())
+      break;
   }
 
-  Known.Zero.setHighBits(MinLeadingZeros);
+  // All shift amounts may result in poison.
+  if (Known.hasConflict())
+    Known.setAllZero();
   return Known;
 }
 
-KnownBits KnownBits::ashr(const KnownBits &LHS, const KnownBits &RHS) {
+KnownBits KnownBits::ashr(const KnownBits &LHS, const KnownBits &RHS,
+                          bool ShAmtNonZero) {
   unsigned BitWidth = LHS.getBitWidth();
-  KnownBits Known(BitWidth);
-
-  if (RHS.isConstant() && RHS.getConstant().ult(BitWidth)) {
-    unsigned Shift = RHS.getConstant().getZExtValue();
-    Known = LHS;
-    Known.Zero.ashrInPlace(Shift);
-    Known.One.ashrInPlace(Shift);
+  auto ShiftByConst = [&](const KnownBits &LHS, unsigned ShiftAmt) {
+    KnownBits Known = LHS;
+    Known.Zero.ashrInPlace(ShiftAmt);
+    Known.One.ashrInPlace(ShiftAmt);
     return Known;
-  }
-
-  // No matter the shift amount, the leading sign bits will stay.
-  unsigned MinLeadingZeros = LHS.countMinLeadingZeros();
-  unsigned MinLeadingOnes = LHS.countMinLeadingOnes();
+  };
 
-  // Minimum shift amount high bits are known sign bits.
-  APInt MinShiftAmount = RHS.getMinValue();
-  if (MinShiftAmount.ult(BitWidth)) {
-    if (MinLeadingZeros) {
-      MinLeadingZeros += MinShiftAmount.getZExtValue();
-      MinLeadingZeros = std::min(MinLeadingZeros, BitWidth);
-    }
-    if (MinLeadingOnes) {
-      MinLeadingOnes += MinShiftAmount.getZExtValue();
-      MinLeadingOnes = std::min(MinLeadingOnes, BitWidth);
+  // Fast path for a common case when LHS is completely unknown.
+  KnownBits Known(BitWidth);
+  unsigned MinShiftAmount = RHS.getMinValue().getLimitedValue(BitWidth);
+  if (MinShiftAmount == 0 && ShAmtNonZero)
+    MinShiftAmount = 1;
+  if (LHS.isUnknown()) {
+    if (MinShiftAmount == BitWidth) {
+      // Always poison. Return zero because we don't like returning conflict.
+      Known.setAllZero();
+      return Known;
     }
+    return Known;
   }
 
-  // If the maximum shift is in range, then find the common bits from all
-  // possible shifts.
-  APInt MaxShiftAmount = RHS.getMaxValue();
-  if (MaxShiftAmount.ult(BitWidth) && !LHS.isUnknown()) {
-    uint64_t ShiftAmtZeroMask = (~RHS.Zero).getZExtValue();
-    uint64_t ShiftAmtOneMask = RHS.One.getZExtValue();
-    assert(MinShiftAmount.ult(MaxShiftAmount) && "Illegal shift range");
-    Known.Zero.setAllBits();
-    Known.One.setAllBits();
-    for (uint64_t ShiftAmt = MinShiftAmount.getZExtValue(),
-                  MaxShiftAmt = MaxShiftAmount.getZExtValue();
-         ShiftAmt <= MaxShiftAmt; ++ShiftAmt) {
-      // Skip if the shift amount is impossible.
-      if ((ShiftAmtZeroMask & ShiftAmt) != ShiftAmt ||
-          (ShiftAmtOneMask | ShiftAmt) != ShiftAmt)
-        continue;
-      KnownBits SpecificShift = LHS;
-      SpecificShift.Zero.ashrInPlace(ShiftAmt);
-      SpecificShift.One.ashrInPlace(ShiftAmt);
-      Known = KnownBits::commonBits(Known, SpecificShift);
-      if (Known.isUnknown())
-        break;
-    }
+  // Find the common bits from all possible shifts.
+  APInt MaxValue = RHS.getMaxValue();
+  unsigned MaxShiftAmount = getMaxShiftAmount(MaxValue, BitWidth);
+  unsigned ShiftAmtZeroMask = RHS.Zero.zextOrTrunc(32).getZExtValue();
+  unsigned ShiftAmtOneMask = RHS.One.zextOrTrunc(32).getZExtValue();
+  Known.Zero.setAllBits();
+  Known.One.setAllBits();
+  for (unsigned ShiftAmt = MinShiftAmount; ShiftAmt <= MaxShiftAmount;
+      ++ShiftAmt) {
+    // Skip if the shift amount is impossible.
+    if ((ShiftAmtZeroMask & ShiftAmt) != 0 ||
+        (ShiftAmtOneMask | ShiftAmt) != ShiftAmt)
+      continue;
+    Known = Known.intersectWith(ShiftByConst(LHS, ShiftAmt));
+    if (Known.isUnknown())
+      break;
   }
 
-  Known.Zero.setHighBits(MinLeadingZeros);
-  Known.One.setHighBits(MinLeadingOnes);
+  // All shift amounts may result in poison.
+  if (Known.hasConflict())
+    Known.setAllZero();
   return Known;
 }
 
@@ -399,19 +419,219 @@ KnownBits KnownBits::abs(bool IntMinIsPoison) const {
 
   // Absolute value preserves trailing zero count.
   KnownBits KnownAbs(getBitWidth());
-  KnownAbs.Zero.setLowBits(countMinTrailingZeros());
 
-  // We only know that the absolute values's MSB will be zero if INT_MIN is
-  // poison, or there is a set bit that isn't the sign bit (otherwise it could
-  // be INT_MIN).
-  if (IntMinIsPoison || (!One.isZero() && !One.isMinSignedValue()))
-    KnownAbs.Zero.setSignBit();
+  // If the input is negative, then abs(x) == -x.
+  if (isNegative()) {
+    KnownBits Tmp = *this;
+    // Special case for IntMinIsPoison. We know the sign bit is set and we know
+    // all the rest of the bits except one to be zero. Since we have
+    // IntMinIsPoison, that final bit MUST be a one, as otherwise the input is
+    // INT_MIN.
+    if (IntMinIsPoison && (Zero.popcount() + 2) == getBitWidth())
+      Tmp.One.setBit(countMinTrailingZeros());
+
+    KnownAbs = computeForAddSub(
+        /*Add*/ false, IntMinIsPoison,
+        KnownBits::makeConstant(APInt(getBitWidth(), 0)), Tmp);
+
+    // One more special case for IntMinIsPoison. If we don't know any ones other
+    // than the signbit, we know for certain that all the unknowns can't be
+    // zero. So if we know high zero bits, but have unknown low bits, we know
+    // for certain those high-zero bits will end up as one. This is because,
+    // the low bits can't be all zeros, so the +1 in (~x + 1) cannot carry up
+    // to the high bits. If we know a known INT_MIN input skip this. The result
+    // is poison anyways.
+    if (IntMinIsPoison && Tmp.countMinPopulation() == 1 &&
+        Tmp.countMaxPopulation() != 1) {
+      Tmp.One.clearSignBit();
+      Tmp.Zero.setSignBit();
+      KnownAbs.One.setBits(getBitWidth() - Tmp.countMinLeadingZeros(),
+                           getBitWidth() - 1);
+    }
+
+  } else {
+    unsigned MaxTZ = countMaxTrailingZeros();
+    unsigned MinTZ = countMinTrailingZeros();
+
+    KnownAbs.Zero.setLowBits(MinTZ);
+    // If we know the lowest set 1, then preserve it.
+    if (MaxTZ == MinTZ && MaxTZ < getBitWidth())
+      KnownAbs.One.setBit(MaxTZ);
+
+    // We only know that the absolute values's MSB will be zero if INT_MIN is
+    // poison, or there is a set bit that isn't the sign bit (otherwise it could
+    // be INT_MIN).
+    if (IntMinIsPoison || (!One.isZero() && !One.isMinSignedValue())) {
+      KnownAbs.One.clearSignBit();
+      KnownAbs.Zero.setSignBit();
+    }
+  }
 
-  // FIXME: Handle known negative input?
-  // FIXME: Calculate the negated Known bits and combine them?
+  assert(!KnownAbs.hasConflict() && "Bad Output");
   return KnownAbs;
 }
 
+static KnownBits computeForSatAddSub(bool Add, bool Signed,
+                                     const KnownBits &LHS,
+                                     const KnownBits &RHS) {
+  assert(!LHS.hasConflict() && !RHS.hasConflict() && "Bad inputs");
+  // We don't see NSW even for sadd/ssub as we want to check if the result has
+  // signed overflow.
+  KnownBits Res = KnownBits::computeForAddSub(Add, /*NSW*/ false, LHS, RHS);
+  unsigned BitWidth = Res.getBitWidth();
+  auto SignBitKnown = [&](const KnownBits &K) {
+    return K.Zero[BitWidth - 1] || K.One[BitWidth - 1];
+  };
+  std::optional<bool> Overflow;
+
+  if (Signed) {
+    // If we can actually detect overflow do so. Otherwise leave Overflow as
+    // nullopt (we assume it may have happened).
+    if (SignBitKnown(LHS) && SignBitKnown(RHS) && SignBitKnown(Res)) {
+      if (Add) {
+        // sadd.sat
+        Overflow = (LHS.isNonNegative() == RHS.isNonNegative() &&
+                    Res.isNonNegative() != LHS.isNonNegative());
+      } else {
+        // ssub.sat
+        Overflow = (LHS.isNonNegative() != RHS.isNonNegative() &&
+                    Res.isNonNegative() != LHS.isNonNegative());
+      }
+    }
+  } else if (Add) {
+    // uadd.sat
+    bool Of;
+    (void)LHS.getMaxValue().uadd_ov(RHS.getMaxValue(), Of);
+    if (!Of) {
+      Overflow = false;
+    } else {
+      (void)LHS.getMinValue().uadd_ov(RHS.getMinValue(), Of);
+      if (Of)
+        Overflow = true;
+    }
+  } else {
+    // usub.sat
+    bool Of;
+    (void)LHS.getMinValue().usub_ov(RHS.getMaxValue(), Of);
+    if (!Of) {
+      Overflow = false;
+    } else {
+      (void)LHS.getMaxValue().usub_ov(RHS.getMinValue(), Of);
+      if (Of)
+        Overflow = true;
+    }
+  }
+
+  if (Signed) {
+    if (Add) {
+      if (LHS.isNonNegative() && RHS.isNonNegative()) {
+        // Pos + Pos -> Pos
+        Res.One.clearSignBit();
+        Res.Zero.setSignBit();
+      }
+      if (LHS.isNegative() && RHS.isNegative()) {
+        // Neg + Neg -> Neg
+        Res.One.setSignBit();
+        Res.Zero.clearSignBit();
+      }
+    } else {
+      if (LHS.isNegative() && RHS.isNonNegative()) {
+        // Neg - Pos -> Neg
+        Res.One.setSignBit();
+        Res.Zero.clearSignBit();
+      } else if (LHS.isNonNegative() && RHS.isNegative()) {
+        // Pos - Neg -> Pos
+        Res.One.clearSignBit();
+        Res.Zero.setSignBit();
+      }
+    }
+  } else {
+    // Add: Leading ones of either operand are preserved.
+    // Sub: Leading zeros of LHS and leading ones of RHS are preserved
+    // as leading zeros in the result.
+    unsigned LeadingKnown;
+    if (Add)
+      LeadingKnown =
+          std::max(LHS.countMinLeadingOnes(), RHS.countMinLeadingOnes());
+    else
+      LeadingKnown =
+          std::max(LHS.countMinLeadingZeros(), RHS.countMinLeadingOnes());
+
+    // We select between the operation result and all-ones/zero
+    // respectively, so we can preserve known ones/zeros.
+    APInt Mask = APInt::getHighBitsSet(BitWidth, LeadingKnown);
+    if (Add) {
+      Res.One |= Mask;
+      Res.Zero &= ~Mask;
+    } else {
+      Res.Zero |= Mask;
+      Res.One &= ~Mask;
+    }
+  }
+
+  if (Overflow) {
+    // We know whether or not we overflowed.
+    if (!(*Overflow)) {
+      // No overflow.
+      assert(!Res.hasConflict() && "Bad Output");
+      return Res;
+    }
+
+    // We overflowed
+    APInt C;
+    if (Signed) {
+      // sadd.sat / ssub.sat
+      assert(SignBitKnown(LHS) &&
+             "We somehow know overflow without knowing input sign");
+      C = LHS.isNegative() ? APInt::getSignedMinValue(BitWidth)
+                           : APInt::getSignedMaxValue(BitWidth);
+    } else if (Add) {
+      // uadd.sat
+      C = APInt::getMaxValue(BitWidth);
+    } else {
+      // uadd.sat
+      C = APInt::getMinValue(BitWidth);
+    }
+
+    Res.One = C;
+    Res.Zero = ~C;
+    assert(!Res.hasConflict() && "Bad Output");
+    return Res;
+  }
+
+  // We don't know if we overflowed.
+  if (Signed) {
+    // sadd.sat/ssub.sat
+    // We can keep our information about the sign bits.
+    Res.Zero.clearLowBits(BitWidth - 1);
+    Res.One.clearLowBits(BitWidth - 1);
+  } else if (Add) {
+    // uadd.sat
+    // We need to clear all the known zeros as we can only use the leading ones.
+    Res.Zero.clearAllBits();
+  } else {
+    // usub.sat
+    // We need to clear all the known ones as we can only use the leading zero.
+    Res.One.clearAllBits();
+  }
+
+  assert(!Res.hasConflict() && "Bad Output");
+  return Res;
+}
+
+KnownBits KnownBits::sadd_sat(const KnownBits &LHS, const KnownBits &RHS) {
+  return computeForSatAddSub(/*Add*/ true, /*Signed*/ true, LHS, RHS);
+}
+KnownBits KnownBits::ssub_sat(const KnownBits &LHS, const KnownBits &RHS) {
+  return computeForSatAddSub(/*Add*/ false, /*Signed*/ true, LHS, RHS);
+}
+KnownBits KnownBits::uadd_sat(const KnownBits &LHS, const KnownBits &RHS) {
+  return computeForSatAddSub(/*Add*/ true, /*Signed*/ false, LHS, RHS);
+}
+KnownBits KnownBits::usub_sat(const KnownBits &LHS, const KnownBits &RHS) {
+  return computeForSatAddSub(/*Add*/ false, /*Signed*/ false, LHS, RHS);
+}
+
 KnownBits KnownBits::mul(const KnownBits &LHS, const KnownBits &RHS,
                          bool NoUndefSelfMultiply) {
   unsigned BitWidth = LHS.getBitWidth();
@@ -432,7 +652,7 @@ KnownBits KnownBits::mul(const KnownBits &LHS, const KnownBits &RHS,
   // fit in the bitwidth (it must not overflow).
   bool HasOverflow;
   APInt UMaxResult = UMaxLHS.umul_ov(UMaxRHS, HasOverflow);
-  unsigned LeadZ = HasOverflow ? 0 : UMaxResult.countLeadingZeros();
+  unsigned LeadZ = HasOverflow ? 0 : UMaxResult.countl_zero();
 
   // The result of the bottom bits of an integer multiply can be
   // inferred by looking at the bottom bits of both operands and
@@ -481,8 +701,8 @@ KnownBits KnownBits::mul(const KnownBits &LHS, const KnownBits &RHS,
 
   // How many times we'd be able to divide each argument by 2 (shr by 1).
   // This gives us the number of trailing zeros on the multiplication result.
-  unsigned TrailBitsKnown0 = (LHS.Zero | LHS.One).countTrailingOnes();
-  unsigned TrailBitsKnown1 = (RHS.Zero | RHS.One).countTrailingOnes();
+  unsigned TrailBitsKnown0 = (LHS.Zero | LHS.One).countr_one();
+  unsigned TrailBitsKnown1 = (RHS.Zero | RHS.One).countr_one();
   unsigned TrailZero0 = LHS.countMinTrailingZeros();
   unsigned TrailZero1 = RHS.countMinTrailingZeros();
   unsigned TrailZ = TrailZero0 + TrailZero1;
@@ -528,34 +748,151 @@ KnownBits KnownBits::mulhu(const KnownBits &LHS, const KnownBits &RHS) {
   return mul(WideLHS, WideRHS).extractBits(BitWidth, BitWidth);
 }
 
-KnownBits KnownBits::udiv(const KnownBits &LHS, const KnownBits &RHS) {
+static KnownBits divComputeLowBit(KnownBits Known, const KnownBits &LHS,
+                                  const KnownBits &RHS, bool Exact) {
+
+  if (!Exact)
+    return Known;
+
+  // If LHS is Odd, the result is Odd no matter what.
+  // Odd / Odd -> Odd
+  // Odd / Even -> Impossible (because its exact division)
+  if (LHS.One[0])
+    Known.One.setBit(0);
+
+  int MinTZ =
+      (int)LHS.countMinTrailingZeros() - (int)RHS.countMaxTrailingZeros();
+  int MaxTZ =
+      (int)LHS.countMaxTrailingZeros() - (int)RHS.countMinTrailingZeros();
+  if (MinTZ >= 0) {
+    // Result has at least MinTZ trailing zeros.
+    Known.Zero.setLowBits(MinTZ);
+    if (MinTZ == MaxTZ) {
+      // Result has exactly MinTZ trailing zeros.
+      Known.One.setBit(MinTZ);
+    }
+  } else if (MaxTZ < 0) {
+    // Poison Result
+    Known.setAllZero();
+  }
+
+  // In the KnownBits exhaustive tests, we have poison inputs for exact values
+  // a LOT. If we have a conflict, just return all zeros.
+  if (Known.hasConflict())
+    Known.setAllZero();
+
+  return Known;
+}
+
+KnownBits KnownBits::sdiv(const KnownBits &LHS, const KnownBits &RHS,
+                          bool Exact) {
+  // Equivalent of `udiv`. We must have caught this before it was folded.
+  if (LHS.isNonNegative() && RHS.isNonNegative())
+    return udiv(LHS, RHS, Exact);
+
+  unsigned BitWidth = LHS.getBitWidth();
+  assert(!LHS.hasConflict() && !RHS.hasConflict() && "Bad inputs");
+  KnownBits Known(BitWidth);
+
+  if (LHS.isZero() || RHS.isZero()) {
+    // Result is either known Zero or UB. Return Zero either way.
+    // Checking this earlier saves us a lot of special cases later on.
+    Known.setAllZero();
+    return Known;
+  }
+
+  std::optional<APInt> Res;
+  if (LHS.isNegative() && RHS.isNegative()) {
+    // Result non-negative.
+    APInt Denom = RHS.getSignedMaxValue();
+    APInt Num = LHS.getSignedMinValue();
+    // INT_MIN/-1 would be a poison result (impossible). Estimate the division
+    // as signed max (we will only set sign bit in the result).
+    Res = (Num.isMinSignedValue() && Denom.isAllOnes())
+              ? APInt::getSignedMaxValue(BitWidth)
+              : Num.sdiv(Denom);
+  } else if (LHS.isNegative() && RHS.isNonNegative()) {
+    // Result is negative if Exact OR -LHS u>= RHS.
+    if (Exact || (-LHS.getSignedMaxValue()).uge(RHS.getSignedMaxValue())) {
+      APInt Denom = RHS.getSignedMinValue();
+      APInt Num = LHS.getSignedMinValue();
+      Res = Denom.isZero() ? Num : Num.sdiv(Denom);
+    }
+  } else if (LHS.isStrictlyPositive() && RHS.isNegative()) {
+    // Result is negative if Exact OR LHS u>= -RHS.
+    if (Exact || LHS.getSignedMinValue().uge(-RHS.getSignedMinValue())) {
+      APInt Denom = RHS.getSignedMaxValue();
+      APInt Num = LHS.getSignedMaxValue();
+      Res = Num.sdiv(Denom);
+    }
+  }
+
+  if (Res) {
+    if (Res->isNonNegative()) {
+      unsigned LeadZ = Res->countLeadingZeros();
+      Known.Zero.setHighBits(LeadZ);
+    } else {
+      unsigned LeadO = Res->countLeadingOnes();
+      Known.One.setHighBits(LeadO);
+    }
+  }
+
+  Known = divComputeLowBit(Known, LHS, RHS, Exact);
+
+  assert(!Known.hasConflict() && "Bad Output");
+  return Known;
+}
+
+KnownBits KnownBits::udiv(const KnownBits &LHS, const KnownBits &RHS,
+                          bool Exact) {
   unsigned BitWidth = LHS.getBitWidth();
   assert(!LHS.hasConflict() && !RHS.hasConflict());
   KnownBits Known(BitWidth);
 
-  // For the purposes of computing leading zeros we can conservatively
-  // treat a udiv as a logical right shift by the power of 2 known to
-  // be less than the denominator.
-  unsigned LeadZ = LHS.countMinLeadingZeros();
-  unsigned RHSMaxLeadingZeros = RHS.countMaxLeadingZeros();
+  if (LHS.isZero() || RHS.isZero()) {
+    // Result is either known Zero or UB. Return Zero either way.
+    // Checking this earlier saves us a lot of special cases later on.
+    Known.setAllZero();
+    return Known;
+  }
 
-  if (RHSMaxLeadingZeros != BitWidth)
-    LeadZ = std::min(BitWidth, LeadZ + BitWidth - RHSMaxLeadingZeros - 1);
+  // We can figure out the minimum number of upper zero bits by doing
+  // MaxNumerator / MinDenominator. If the Numerator gets smaller or Denominator
+  // gets larger, the number of upper zero bits increases.
+  APInt MinDenom = RHS.getMinValue();
+  APInt MaxNum = LHS.getMaxValue();
+  APInt MaxRes = MinDenom.isZero() ? MaxNum : MaxNum.udiv(MinDenom);
+
+  unsigned LeadZ = MaxRes.countLeadingZeros();
 
   Known.Zero.setHighBits(LeadZ);
+  Known = divComputeLowBit(Known, LHS, RHS, Exact);
+
+  assert(!Known.hasConflict() && "Bad Output");
   return Known;
 }
 
-KnownBits KnownBits::urem(const KnownBits &LHS, const KnownBits &RHS) {
+KnownBits KnownBits::remGetLowBits(const KnownBits &LHS, const KnownBits &RHS) {
   unsigned BitWidth = LHS.getBitWidth();
+  if (!RHS.isZero() && RHS.Zero[0]) {
+    // rem X, Y where Y[0:N] is zero will preserve X[0:N] in the result.
+    unsigned RHSZeros = RHS.countMinTrailingZeros();
+    APInt Mask = APInt::getLowBitsSet(BitWidth, RHSZeros);
+    APInt OnesMask = LHS.One & Mask;
+    APInt ZerosMask = LHS.Zero & Mask;
+    return KnownBits(ZerosMask, OnesMask);
+  }
+  return KnownBits(BitWidth);
+}
+
+KnownBits KnownBits::urem(const KnownBits &LHS, const KnownBits &RHS) {
   assert(!LHS.hasConflict() && !RHS.hasConflict());
-  KnownBits Known(BitWidth);
 
+  KnownBits Known = remGetLowBits(LHS, RHS);
   if (RHS.isConstant() && RHS.getConstant().isPowerOf2()) {
-    // The upper bits are all zero, the lower ones are unchanged.
-    APInt LowBits = RHS.getConstant() - 1;
-    Known.Zero = LHS.Zero | ~LowBits;
-    Known.One = LHS.One & LowBits;
+    // NB: Low bits set in `remGetLowBits`.
+    APInt HighBits = ~(RHS.getConstant() - 1);
+    Known.Zero |= HighBits;
     return Known;
   }
 
@@ -568,16 +905,12 @@ KnownBits KnownBits::urem(const KnownBits &LHS, const KnownBits &RHS) {
 }
 
 KnownBits KnownBits::srem(const KnownBits &LHS, const KnownBits &RHS) {
-  unsigned BitWidth = LHS.getBitWidth();
   assert(!LHS.hasConflict() && !RHS.hasConflict());
-  KnownBits Known(BitWidth);
 
+  KnownBits Known = remGetLowBits(LHS, RHS);
   if (RHS.isConstant() && RHS.getConstant().isPowerOf2()) {
-    // The low bits of the first operand are unchanged by the srem.
+    // NB: Low bits are set in `remGetLowBits`.
     APInt LowBits = RHS.getConstant() - 1;
-    Known.Zero = LHS.Zero & LowBits;
-    Known.One = LHS.One & LowBits;
-
     // If the first operand is non-negative or has all low bits zero, then
     // the upper bits are all zero.
     if (LHS.isNonNegative() || LowBits.isSubsetOf(LHS.Zero))
@@ -623,8 +956,40 @@ KnownBits &KnownBits::operator^=(const KnownBits &RHS) {
   return *this;
 }
 
+KnownBits KnownBits::blsi() const {
+  unsigned BitWidth = getBitWidth();
+  KnownBits Known(Zero, APInt(BitWidth, 0));
+  unsigned Max = countMaxTrailingZeros();
+  Known.Zero.setBitsFrom(std::min(Max + 1, BitWidth));
+  unsigned Min = countMinTrailingZeros();
+  if (Max == Min && Max < BitWidth)
+    Known.One.setBit(Max);
+  return Known;
+}
+
+KnownBits KnownBits::blsmsk() const {
+  unsigned BitWidth = getBitWidth();
+  KnownBits Known(BitWidth);
+  unsigned Max = countMaxTrailingZeros();
+  Known.Zero.setBitsFrom(std::min(Max + 1, BitWidth));
+  unsigned Min = countMinTrailingZeros();
+  Known.One.setLowBits(std::min(Min + 1, BitWidth));
+  return Known;
+}
+
 void KnownBits::print(raw_ostream &OS) const {
-  OS << "{Zero=" << Zero << ", One=" << One << "}";
+  unsigned BitWidth = getBitWidth();
+  for (unsigned I = 0; I < BitWidth; ++I) {
+    unsigned N = BitWidth - I - 1;
+    if (Zero[N] && One[N])
+      OS << "!";
+    else if (Zero[N])
+      OS << "0";
+    else if (One[N])
+      OS << "1";
+    else
+      OS << "?";
+  }
 }
 void KnownBits::dump() const {
   print(dbgs());
diff --git a/llvm/lib/Support/LowLevelType.cpp b/llvm/lib/Support/LowLevelType.cpp
deleted file mode 100644
index 0282cd9bd79e..000000000000
--- a/llvm/lib/Support/LowLevelType.cpp
+++ /dev/null
@@ -1,59 +0,0 @@
-//===-- llvm/Support/LowLevelType.cpp -------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file This file implements the more header-heavy bits of the LLT class to
-/// avoid polluting users' namespaces.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Support/LowLevelTypeImpl.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-LLT::LLT(MVT VT) {
-  if (VT.isVector()) {
-    bool asVector = VT.getVectorMinNumElements() > 1;
-    init(/*IsPointer=*/false, asVector, /*IsScalar=*/!asVector,
-         VT.getVectorElementCount(), VT.getVectorElementType().getSizeInBits(),
-         /*AddressSpace=*/0);
-  } else if (VT.isValid()) {
-    // Aggregates are no different from real scalars as far as GlobalISel is
-    // concerned.
-    init(/*IsPointer=*/false, /*IsVector=*/false, /*IsScalar=*/true,
-         ElementCount::getFixed(0), VT.getSizeInBits(), /*AddressSpace=*/0);
-  } else {
-    IsScalar = false;
-    IsPointer = false;
-    IsVector = false;
-    RawData = 0;
-  }
-}
-
-void LLT::print(raw_ostream &OS) const {
-  if (isVector()) {
-    OS << "<";
-    OS << getElementCount() << " x " << getElementType() << ">";
-  } else if (isPointer())
-    OS << "p" << getAddressSpace();
-  else if (isValid()) {
-    assert(isScalar() && "unexpected type");
-    OS << "s" << getScalarSizeInBits();
-  } else
-    OS << "LLT_invalid";
-}
-
-const constexpr LLT::BitFieldInfo LLT::ScalarSizeFieldInfo;
-const constexpr LLT::BitFieldInfo LLT::PointerSizeFieldInfo;
-const constexpr LLT::BitFieldInfo LLT::PointerAddressSpaceFieldInfo;
-const constexpr LLT::BitFieldInfo LLT::VectorElementsFieldInfo;
-const constexpr LLT::BitFieldInfo LLT::VectorScalableFieldInfo;
-const constexpr LLT::BitFieldInfo LLT::VectorSizeFieldInfo;
-const constexpr LLT::BitFieldInfo LLT::PointerVectorElementsFieldInfo;
-const constexpr LLT::BitFieldInfo LLT::PointerVectorScalableFieldInfo;
-const constexpr LLT::BitFieldInfo LLT::PointerVectorSizeFieldInfo;
-const constexpr LLT::BitFieldInfo LLT::PointerVectorAddressSpaceFieldInfo;
diff --git a/llvm/lib/Support/MemoryBuffer.cpp b/llvm/lib/Support/MemoryBuffer.cpp
index 0bb11725d2fc..4cc4fe019b75 100644
--- a/llvm/lib/Support/MemoryBuffer.cpp
+++ b/llvm/lib/Support/MemoryBuffer.cpp
@@ -11,6 +11,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/MemoryBuffer.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/Config/config.h"
 #include "llvm/Support/Alignment.h"
@@ -22,6 +23,7 @@
 #include "llvm/Support/Process.h"
 #include "llvm/Support/Program.h"
 #include "llvm/Support/SmallVectorMemoryBuffer.h"
+#include <algorithm>
 #include <cassert>
 #include <cstring>
 #include <new>
@@ -132,10 +134,13 @@ MemoryBuffer::getMemBuffer(MemoryBufferRef Ref, bool RequiresNullTerminator) {
 
 static ErrorOr<std::unique_ptr<WritableMemoryBuffer>>
 getMemBufferCopyImpl(StringRef InputData, const Twine &BufferName) {
-  auto Buf = WritableMemoryBuffer::getNewUninitMemBuffer(InputData.size(), BufferName);
+  auto Buf =
+      WritableMemoryBuffer::getNewUninitMemBuffer(InputData.size(), BufferName);
   if (!Buf)
     return make_error_code(errc::not_enough_memory);
-  memcpy(Buf->getBufferStart(), InputData.data(), InputData.size());
+  // Calling memcpy with null src/dst is UB, and an empty StringRef is
+  // represented with {nullptr, 0}.
+  llvm::copy(InputData, Buf->getBufferStart());
   return std::move(Buf);
 }
 
diff --git a/llvm/lib/Support/NativeFormatting.cpp b/llvm/lib/Support/NativeFormatting.cpp
index 6e8137c405b8..3b9273e1eaad 100644
--- a/llvm/lib/Support/NativeFormatting.cpp
+++ b/llvm/lib/Support/NativeFormatting.cpp
@@ -58,10 +58,7 @@ static void write_unsigned_impl(raw_ostream &S, T N, size_t MinDigits,
   static_assert(std::is_unsigned_v<T>, "Value is not unsigned!");
 
   char NumberBuffer[128];
-  std::memset(NumberBuffer, '0', sizeof(NumberBuffer));
-
-  size_t Len = 0;
-  Len = format_to_buffer(N, NumberBuffer);
+  size_t Len = format_to_buffer(N, NumberBuffer);
 
   if (IsNegative)
     S << '-';
diff --git a/llvm/lib/Support/PGOOptions.cpp b/llvm/lib/Support/PGOOptions.cpp
new file mode 100644
index 000000000000..04d50cc70d91
--- /dev/null
+++ b/llvm/lib/Support/PGOOptions.cpp
@@ -0,0 +1,58 @@
+//===------ PGOOptions.cpp -- PGO option tunables --------------*- C++ -*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/PGOOptions.h"
+#include "llvm/Support/VirtualFileSystem.h"
+
+using namespace llvm;
+
+PGOOptions::PGOOptions(std::string ProfileFile, std::string CSProfileGenFile,
+                       std::string ProfileRemappingFile,
+                       std::string MemoryProfile,
+                       IntrusiveRefCntPtr<vfs::FileSystem> FS, PGOAction Action,
+                       CSPGOAction CSAction, bool DebugInfoForProfiling,
+                       bool PseudoProbeForProfiling)
+    : ProfileFile(ProfileFile), CSProfileGenFile(CSProfileGenFile),
+      ProfileRemappingFile(ProfileRemappingFile), MemoryProfile(MemoryProfile),
+      Action(Action), CSAction(CSAction),
+      DebugInfoForProfiling(DebugInfoForProfiling ||
+                            (Action == SampleUse && !PseudoProbeForProfiling)),
+      PseudoProbeForProfiling(PseudoProbeForProfiling), FS(std::move(FS)) {
+  // Note, we do allow ProfileFile.empty() for Action=IRUse LTO can
+  // callback with IRUse action without ProfileFile.
+
+  // If there is a CSAction, PGOAction cannot be IRInstr or SampleUse.
+  assert(this->CSAction == NoCSAction ||
+         (this->Action != IRInstr && this->Action != SampleUse));
+
+  // For CSIRInstr, CSProfileGenFile also needs to be nonempty.
+  assert(this->CSAction != CSIRInstr || !this->CSProfileGenFile.empty());
+
+  // If CSAction is CSIRUse, PGOAction needs to be IRUse as they share
+  // a profile.
+  assert(this->CSAction != CSIRUse || this->Action == IRUse);
+
+  // Cannot optimize with MemProf profile during IR instrumentation.
+  assert(this->MemoryProfile.empty() || this->Action != PGOOptions::IRInstr);
+
+  // If neither Action nor CSAction nor MemoryProfile are set,
+  // DebugInfoForProfiling or PseudoProbeForProfiling needs to be true.
+  assert(this->Action != NoAction || this->CSAction != NoCSAction ||
+         !this->MemoryProfile.empty() || this->DebugInfoForProfiling ||
+         this->PseudoProbeForProfiling);
+
+  // If we need to use the profile, the VFS cannot be nullptr.
+  assert(this->FS || !(this->Action == IRUse || this->CSAction == CSIRUse ||
+                       !this->MemoryProfile.empty()));
+}
+
+PGOOptions::PGOOptions(const PGOOptions &) = default;
+
+PGOOptions &PGOOptions::operator=(const PGOOptions &O) = default;
+
+PGOOptions::~PGOOptions() = default;
diff --git a/llvm/lib/Support/Parallel.cpp b/llvm/lib/Support/Parallel.cpp
index 23ed9d813548..9b14b05b5211 100644
--- a/llvm/lib/Support/Parallel.cpp
+++ b/llvm/lib/Support/Parallel.cpp
@@ -12,8 +12,8 @@
 #include "llvm/Support/Threading.h"
 
 #include <atomic>
+#include <deque>
 #include <future>
-#include <stack>
 #include <thread>
 #include <vector>
 
@@ -24,11 +24,11 @@ namespace parallel {
 #if LLVM_ENABLE_THREADS
 
 #ifdef _WIN32
-static thread_local unsigned threadIndex;
+static thread_local unsigned threadIndex = UINT_MAX;
 
-unsigned getThreadIndex() { return threadIndex; }
+unsigned getThreadIndex() { GET_THREAD_INDEX_IMPL; }
 #else
-thread_local unsigned threadIndex;
+thread_local unsigned threadIndex = UINT_MAX;
 #endif
 
 namespace detail {
@@ -39,7 +39,8 @@ namespace {
 class Executor {
 public:
   virtual ~Executor() = default;
-  virtual void add(std::function<void()> func) = 0;
+  virtual void add(std::function<void()> func, bool Sequential = false) = 0;
+  virtual size_t getThreadCount() const = 0;
 
   static Executor *getDefaultExecutor();
 };
@@ -49,13 +50,16 @@ public:
 class ThreadPoolExecutor : public Executor {
 public:
   explicit ThreadPoolExecutor(ThreadPoolStrategy S = hardware_concurrency()) {
-    unsigned ThreadCount = S.compute_thread_count();
+    ThreadCount = S.compute_thread_count();
     // Spawn all but one of the threads in another thread as spawning threads
     // can take a while.
     Threads.reserve(ThreadCount);
     Threads.resize(1);
     std::lock_guard<std::mutex> Lock(Mutex);
-    Threads[0] = std::thread([this, ThreadCount, S] {
+    // Use operator[] before creating the thread to avoid data race in .size()
+    // in “safe libc++” mode.
+    auto &Thread0 = Threads[0];
+    Thread0 = std::thread([this, S] {
       for (unsigned I = 1; I < ThreadCount; ++I) {
         Threads.emplace_back([=] { work(S, I); });
         if (Stop)
@@ -94,36 +98,61 @@ public:
     static void call(void *Ptr) { ((ThreadPoolExecutor *)Ptr)->stop(); }
   };
 
-  void add(std::function<void()> F) override {
+  void add(std::function<void()> F, bool Sequential = false) override {
     {
       std::lock_guard<std::mutex> Lock(Mutex);
-      WorkStack.push(std::move(F));
+      if (Sequential)
+        WorkQueueSequential.emplace_front(std::move(F));
+      else
+        WorkQueue.emplace_back(std::move(F));
     }
     Cond.notify_one();
   }
 
+  size_t getThreadCount() const override { return ThreadCount; }
+
 private:
+  bool hasSequentialTasks() const {
+    return !WorkQueueSequential.empty() && !SequentialQueueIsLocked;
+  }
+
+  bool hasGeneralTasks() const { return !WorkQueue.empty(); }
+
   void work(ThreadPoolStrategy S, unsigned ThreadID) {
     threadIndex = ThreadID;
     S.apply_thread_strategy(ThreadID);
     while (true) {
       std::unique_lock<std::mutex> Lock(Mutex);
-      Cond.wait(Lock, [&] { return Stop || !WorkStack.empty(); });
+      Cond.wait(Lock, [&] {
+        return Stop || hasGeneralTasks() || hasSequentialTasks();
+      });
       if (Stop)
         break;
-      auto Task = std::move(WorkStack.top());
-      WorkStack.pop();
+      bool Sequential = hasSequentialTasks();
+      if (Sequential)
+        SequentialQueueIsLocked = true;
+      else
+        assert(hasGeneralTasks());
+
+      auto &Queue = Sequential ? WorkQueueSequential : WorkQueue;
+      auto Task = std::move(Queue.back());
+      Queue.pop_back();
       Lock.unlock();
       Task();
+      if (Sequential)
+        SequentialQueueIsLocked = false;
     }
   }
 
   std::atomic<bool> Stop{false};
-  std::stack<std::function<void()>> WorkStack;
+  std::atomic<bool> SequentialQueueIsLocked{false};
+  std::deque<std::function<void()>> WorkQueue;
+  std::deque<std::function<void()>> WorkQueueSequential;
   std::mutex Mutex;
   std::condition_variable Cond;
   std::promise<void> ThreadsCreated;
   std::vector<std::thread> Threads;
+  unsigned ThreadCount;
 };
 
 Executor *Executor::getDefaultExecutor() {
@@ -153,54 +182,53 @@ Executor *Executor::getDefaultExecutor() {
 }
 } // namespace
 } // namespace detail
-#endif
 
-static std::atomic<int> TaskGroupInstances;
+size_t getThreadCount() {
+  return detail::Executor::getDefaultExecutor()->getThreadCount();
+}
+#endif
 
 // Latch::sync() called by the dtor may cause one thread to block. If is a dead
 // lock if all threads in the default executor are blocked. To prevent the dead
-// lock, only allow the first TaskGroup to run tasks parallelly. In the scenario
+// lock, only allow the root TaskGroup to run tasks parallelly. In the scenario
 // of nested parallel_for_each(), only the outermost one runs parallelly.
-TaskGroup::TaskGroup() : Parallel(TaskGroupInstances++ == 0) {}
+TaskGroup::TaskGroup()
+#if LLVM_ENABLE_THREADS
+    : Parallel((parallel::strategy.ThreadsRequested != 1) &&
+               (threadIndex == UINT_MAX)) {}
+#else
+    : Parallel(false) {}
+#endif
 TaskGroup::~TaskGroup() {
   // We must ensure that all the workloads have finished before decrementing the
   // instances count.
   L.sync();
-  --TaskGroupInstances;
 }
 
-void TaskGroup::spawn(std::function<void()> F) {
+void TaskGroup::spawn(std::function<void()> F, bool Sequential) {
 #if LLVM_ENABLE_THREADS
   if (Parallel) {
     L.inc();
-    detail::Executor::getDefaultExecutor()->add([&, F = std::move(F)] {
-      F();
-      L.dec();
-    });
+    detail::Executor::getDefaultExecutor()->add(
+        [&, F = std::move(F)] {
+          F();
+          L.dec();
+        },
+        Sequential);
     return;
   }
 #endif
   F();
 }
 
-void TaskGroup::execute(std::function<void()> F) {
-  if (parallel::strategy.ThreadsRequested == 1)
-    F();
-  else
-    spawn(F);
-}
 } // namespace parallel
 } // namespace llvm
 
 void llvm::parallelFor(size_t Begin, size_t End,
                        llvm::function_ref<void(size_t)> Fn) {
-  // If we have zero or one items, then do not incur the overhead of spinning up
-  // a task group.  They are surprisingly expensive, and because they do not
-  // support nested parallelism, a single entry task group can block parallel
-  // execution underneath them.
 #if LLVM_ENABLE_THREADS
-  auto NumItems = End - Begin;
-  if (NumItems > 1 && parallel::strategy.ThreadsRequested != 1) {
+  if (parallel::strategy.ThreadsRequested != 1) {
+    auto NumItems = End - Begin;
     // Limit the number of tasks to MaxTasksPerGroup to limit job scheduling
     // overhead on large inputs.
     auto TaskSize = NumItems / parallel::detail::MaxTasksPerGroup;
@@ -214,8 +242,12 @@ void llvm::parallelFor(size_t Begin, size_t End,
           Fn(I);
       });
     }
-    for (; Begin != End; ++Begin)
-      Fn(Begin);
+    if (Begin != End) {
+      TG.spawn([=, &Fn] {
+        for (size_t I = Begin; I != End; ++I)
+          Fn(I);
+      });
+    }
     return;
   }
 #endif
diff --git a/llvm/lib/Support/Path.cpp b/llvm/lib/Support/Path.cpp
index 152d902f52e6..7a57c104ef10 100644
--- a/llvm/lib/Support/Path.cpp
+++ b/llvm/lib/Support/Path.cpp
@@ -13,6 +13,7 @@
 #include "llvm/Support/Path.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/ScopeExit.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/Config/config.h"
 #include "llvm/Config/llvm-config.h"
 #include "llvm/Support/Endian.h"
@@ -22,6 +23,7 @@
 #include "llvm/Support/Process.h"
 #include "llvm/Support/Signals.h"
 #include <cctype>
+#include <cerrno>
 
 #if !defined(_MSC_VER) && !defined(__MINGW32__)
 #include <unistd.h>
@@ -1202,18 +1204,10 @@ Error readNativeFileToEOF(file_t FileHandle, SmallVectorImpl<char> &Buffer,
 #include "Windows/Path.inc"
 #endif
 
-bool IsLLVMDriver = false;
-
 namespace llvm {
 namespace sys {
 namespace fs {
 
-std::string getMainExecutable(const char *Argv0, void *MainAddr) {
-  if (IsLLVMDriver)
-    return sys::path::stem(Argv0).str();
-  return getMainExecutableImpl(Argv0, MainAddr);
-}
-
 TempFile::TempFile(StringRef Name, int FD)
     : TmpName(std::string(Name)), FD(FD) {}
 TempFile::TempFile(TempFile &&Other) { *this = std::move(Other); }
diff --git a/llvm/lib/Support/PrettyStackTrace.cpp b/llvm/lib/Support/PrettyStackTrace.cpp
index fa91405fee10..f9f1b8a419b8 100644
--- a/llvm/lib/Support/PrettyStackTrace.cpp
+++ b/llvm/lib/Support/PrettyStackTrace.cpp
@@ -64,8 +64,7 @@ static LLVM_THREAD_LOCAL PrettyStackTraceEntry *PrettyStackTraceHead = nullptr;
 // the current thread". If the user happens to overflow an 'unsigned' with
 // SIGINFO requests, it's possible that some threads will stop responding to it,
 // but the program won't crash.
-static volatile std::atomic<unsigned> GlobalSigInfoGenerationCounter =
-    ATOMIC_VAR_INIT(1);
+static volatile std::atomic<unsigned> GlobalSigInfoGenerationCounter = 1;
 static LLVM_THREAD_LOCAL unsigned ThreadLocalSigInfoGenerationCounter = 0;
 
 namespace llvm {
diff --git a/llvm/lib/Support/RISCVISAInfo.cpp b/llvm/lib/Support/RISCVISAInfo.cpp
index 1b1bff023d2f..70fab8010831 100644
--- a/llvm/lib/Support/RISCVISAInfo.cpp
+++ b/llvm/lib/Support/RISCVISAInfo.cpp
@@ -1,4 +1,4 @@
-//===-- RISCVISAInfo.cpp - RISCV Arch String Parser -------------*- C++ -*-===//
+//===-- RISCVISAInfo.cpp - RISC-V Arch String Parser ------------*- C++ -*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -16,6 +16,7 @@
 #include "llvm/Support/raw_ostream.h"
 
 #include <array>
+#include <atomic>
 #include <optional>
 #include <string>
 #include <vector>
@@ -33,107 +34,189 @@ struct RISCVSupportedExtension {
   const char *Name;
   /// Supported version.
   RISCVExtensionVersion Version;
+
+  bool operator<(const RISCVSupportedExtension &RHS) const {
+    return StringRef(Name) < StringRef(RHS.Name);
+  }
 };
 
 } // end anonymous namespace
 
 static constexpr StringLiteral AllStdExts = "mafdqlcbkjtpvnh";
 
+static const char *RISCVGImplications[] = {
+  "i", "m", "a", "f", "d", "zicsr", "zifencei"
+};
+
+// NOTE: This table should be sorted alphabetically by extension name.
 static const RISCVSupportedExtension SupportedExtensions[] = {
-    {"i", RISCVExtensionVersion{2, 0}},
-    {"e", RISCVExtensionVersion{1, 9}},
-    {"m", RISCVExtensionVersion{2, 0}},
-    {"a", RISCVExtensionVersion{2, 0}},
-    {"f", RISCVExtensionVersion{2, 0}},
-    {"d", RISCVExtensionVersion{2, 0}},
+    {"a", RISCVExtensionVersion{2, 1}},
     {"c", RISCVExtensionVersion{2, 0}},
-
+    {"d", RISCVExtensionVersion{2, 2}},
+    {"e", RISCVExtensionVersion{2, 0}},
+    {"f", RISCVExtensionVersion{2, 2}},
     {"h", RISCVExtensionVersion{1, 0}},
+    {"i", RISCVExtensionVersion{2, 1}},
+    {"m", RISCVExtensionVersion{2, 0}},
 
-    {"zihintpause", RISCVExtensionVersion{2, 0}},
+    {"svinval", RISCVExtensionVersion{1, 0}},
+    {"svnapot", RISCVExtensionVersion{1, 0}},
+    {"svpbmt", RISCVExtensionVersion{1, 0}},
 
-    {"zfhmin", RISCVExtensionVersion{1, 0}},
-    {"zfh", RISCVExtensionVersion{1, 0}},
+    {"v", RISCVExtensionVersion{1, 0}},
 
-    {"zfinx", RISCVExtensionVersion{1, 0}},
-    {"zdinx", RISCVExtensionVersion{1, 0}},
-    {"zhinxmin", RISCVExtensionVersion{1, 0}},
-    {"zhinx", RISCVExtensionVersion{1, 0}},
+    // vendor-defined ('X') extensions
+    {"xcvbitmanip", RISCVExtensionVersion{1, 0}},
+    {"xcvmac", RISCVExtensionVersion{1, 0}},
+    {"xsfcie", RISCVExtensionVersion{1, 0}},
+    {"xsfvcp", RISCVExtensionVersion{1, 0}},
+    {"xtheadba", RISCVExtensionVersion{1, 0}},
+    {"xtheadbb", RISCVExtensionVersion{1, 0}},
+    {"xtheadbs", RISCVExtensionVersion{1, 0}},
+    {"xtheadcmo", RISCVExtensionVersion{1, 0}},
+    {"xtheadcondmov", RISCVExtensionVersion{1, 0}},
+    {"xtheadfmemidx", RISCVExtensionVersion{1, 0}},
+    {"xtheadmac", RISCVExtensionVersion{1, 0}},
+    {"xtheadmemidx", RISCVExtensionVersion{1, 0}},
+    {"xtheadmempair", RISCVExtensionVersion{1, 0}},
+    {"xtheadsync", RISCVExtensionVersion{1, 0}},
+    {"xtheadvdot", RISCVExtensionVersion{1, 0}},
+    {"xventanacondops", RISCVExtensionVersion{1, 0}},
+
+    {"zawrs", RISCVExtensionVersion{1, 0}},
 
     {"zba", RISCVExtensionVersion{1, 0}},
     {"zbb", RISCVExtensionVersion{1, 0}},
     {"zbc", RISCVExtensionVersion{1, 0}},
-    {"zbs", RISCVExtensionVersion{1, 0}},
-
     {"zbkb", RISCVExtensionVersion{1, 0}},
     {"zbkc", RISCVExtensionVersion{1, 0}},
     {"zbkx", RISCVExtensionVersion{1, 0}},
+    {"zbs", RISCVExtensionVersion{1, 0}},
+
+    {"zca", RISCVExtensionVersion{1, 0}},
+    {"zcb", RISCVExtensionVersion{1, 0}},
+    {"zcd", RISCVExtensionVersion{1, 0}},
+    {"zce", RISCVExtensionVersion{1, 0}},
+    {"zcf", RISCVExtensionVersion{1, 0}},
+    {"zcmp", RISCVExtensionVersion{1, 0}},
+    {"zcmt", RISCVExtensionVersion{1, 0}},
+
+    {"zdinx", RISCVExtensionVersion{1, 0}},
+
+    {"zfh", RISCVExtensionVersion{1, 0}},
+    {"zfhmin", RISCVExtensionVersion{1, 0}},
+    {"zfinx", RISCVExtensionVersion{1, 0}},
+
+    {"zhinx", RISCVExtensionVersion{1, 0}},
+    {"zhinxmin", RISCVExtensionVersion{1, 0}},
+
+    {"zicbom", RISCVExtensionVersion{1, 0}},
+    {"zicbop", RISCVExtensionVersion{1, 0}},
+    {"zicboz", RISCVExtensionVersion{1, 0}},
+    {"zicntr", RISCVExtensionVersion{1, 0}},
+    {"zicsr", RISCVExtensionVersion{2, 0}},
+    {"zifencei", RISCVExtensionVersion{2, 0}},
+    {"zihintpause", RISCVExtensionVersion{2, 0}},
+    {"zihpm", RISCVExtensionVersion{1, 0}},
+
+    {"zk", RISCVExtensionVersion{1, 0}},
+    {"zkn", RISCVExtensionVersion{1, 0}},
     {"zknd", RISCVExtensionVersion{1, 0}},
     {"zkne", RISCVExtensionVersion{1, 0}},
     {"zknh", RISCVExtensionVersion{1, 0}},
-    {"zksed", RISCVExtensionVersion{1, 0}},
-    {"zksh", RISCVExtensionVersion{1, 0}},
     {"zkr", RISCVExtensionVersion{1, 0}},
-    {"zkn", RISCVExtensionVersion{1, 0}},
     {"zks", RISCVExtensionVersion{1, 0}},
+    {"zksed", RISCVExtensionVersion{1, 0}},
+    {"zksh", RISCVExtensionVersion{1, 0}},
     {"zkt", RISCVExtensionVersion{1, 0}},
-    {"zk", RISCVExtensionVersion{1, 0}},
 
     {"zmmul", RISCVExtensionVersion{1, 0}},
 
-    {"v", RISCVExtensionVersion{1, 0}},
-    {"zvl32b", RISCVExtensionVersion{1, 0}},
-    {"zvl64b", RISCVExtensionVersion{1, 0}},
-    {"zvl128b", RISCVExtensionVersion{1, 0}},
-    {"zvl256b", RISCVExtensionVersion{1, 0}},
-    {"zvl512b", RISCVExtensionVersion{1, 0}},
-    {"zvl1024b", RISCVExtensionVersion{1, 0}},
-    {"zvl2048b", RISCVExtensionVersion{1, 0}},
-    {"zvl4096b", RISCVExtensionVersion{1, 0}},
-    {"zvl8192b", RISCVExtensionVersion{1, 0}},
-    {"zvl16384b", RISCVExtensionVersion{1, 0}},
-    {"zvl32768b", RISCVExtensionVersion{1, 0}},
-    {"zvl65536b", RISCVExtensionVersion{1, 0}},
-    {"zve32x", RISCVExtensionVersion{1, 0}},
     {"zve32f", RISCVExtensionVersion{1, 0}},
-    {"zve64x", RISCVExtensionVersion{1, 0}},
-    {"zve64f", RISCVExtensionVersion{1, 0}},
+    {"zve32x", RISCVExtensionVersion{1, 0}},
     {"zve64d", RISCVExtensionVersion{1, 0}},
+    {"zve64f", RISCVExtensionVersion{1, 0}},
+    {"zve64x", RISCVExtensionVersion{1, 0}},
 
-    {"zicbom", RISCVExtensionVersion{1, 0}},
-    {"zicboz", RISCVExtensionVersion{1, 0}},
-    {"zicbop", RISCVExtensionVersion{1, 0}},
+    {"zvfh", RISCVExtensionVersion{1, 0}},
 
-    {"svnapot", RISCVExtensionVersion{1, 0}},
-    {"svpbmt", RISCVExtensionVersion{1, 0}},
-    {"svinval", RISCVExtensionVersion{1, 0}},
-    {"xventanacondops", RISCVExtensionVersion{1, 0}},
-    {"xtheadvdot", RISCVExtensionVersion{1, 0}},
+    {"zvl1024b", RISCVExtensionVersion{1, 0}},
+    {"zvl128b", RISCVExtensionVersion{1, 0}},
+    {"zvl16384b", RISCVExtensionVersion{1, 0}},
+    {"zvl2048b", RISCVExtensionVersion{1, 0}},
+    {"zvl256b", RISCVExtensionVersion{1, 0}},
+    {"zvl32768b", RISCVExtensionVersion{1, 0}},
+    {"zvl32b", RISCVExtensionVersion{1, 0}},
+    {"zvl4096b", RISCVExtensionVersion{1, 0}},
+    {"zvl512b", RISCVExtensionVersion{1, 0}},
+    {"zvl64b", RISCVExtensionVersion{1, 0}},
+    {"zvl65536b", RISCVExtensionVersion{1, 0}},
+    {"zvl8192b", RISCVExtensionVersion{1, 0}},
 };
 
+// NOTE: This table should be sorted alphabetically by extension name.
 static const RISCVSupportedExtension SupportedExperimentalExtensions[] = {
+    {"smaia", RISCVExtensionVersion{1, 0}},
+    {"ssaia", RISCVExtensionVersion{1, 0}},
+
+    {"zacas", RISCVExtensionVersion{1, 0}},
+
+    {"zfa", RISCVExtensionVersion{0, 2}},
+    {"zfbfmin", RISCVExtensionVersion{0, 6}},
+
+    {"zicond", RISCVExtensionVersion{1, 0}},
+
     {"zihintntl", RISCVExtensionVersion{0, 2}},
 
-    {"zca", RISCVExtensionVersion{0, 70}},
-    {"zcd", RISCVExtensionVersion{0, 70}},
-    {"zcf", RISCVExtensionVersion{0, 70}},
-    {"zvfh", RISCVExtensionVersion{0, 1}},
-    {"zawrs", RISCVExtensionVersion{1, 0}},
     {"ztso", RISCVExtensionVersion{0, 1}},
+
+    {"zvbb", RISCVExtensionVersion{1, 0}},
+    {"zvbc", RISCVExtensionVersion{1, 0}},
+
+    {"zvfbfmin", RISCVExtensionVersion{0, 6}},
+    {"zvfbfwma", RISCVExtensionVersion{0, 6}},
+
+    // vector crypto
+    {"zvkg", RISCVExtensionVersion{1, 0}},
+    {"zvkn", RISCVExtensionVersion{1, 0}},
+    {"zvknc", RISCVExtensionVersion{1, 0}},
+    {"zvkned", RISCVExtensionVersion{1, 0}},
+    {"zvkng", RISCVExtensionVersion{1, 0}},
+    {"zvknha", RISCVExtensionVersion{1, 0}},
+    {"zvknhb", RISCVExtensionVersion{1, 0}},
+    {"zvks", RISCVExtensionVersion{1, 0}},
+    {"zvksc", RISCVExtensionVersion{1, 0}},
+    {"zvksed", RISCVExtensionVersion{1, 0}},
+    {"zvksg", RISCVExtensionVersion{1, 0}},
+    {"zvksh", RISCVExtensionVersion{1, 0}},
+    {"zvkt", RISCVExtensionVersion{1, 0}},
 };
 
+static void verifyTables() {
+#ifndef NDEBUG
+  static std::atomic<bool> TableChecked(false);
+  if (!TableChecked.load(std::memory_order_relaxed)) {
+    assert(llvm::is_sorted(SupportedExtensions) &&
+           "Extensions are not sorted by name");
+    assert(llvm::is_sorted(SupportedExperimentalExtensions) &&
+           "Experimental extensions are not sorted by name");
+    TableChecked.store(true, std::memory_order_relaxed);
+  }
+#endif
+}
+
 static bool stripExperimentalPrefix(StringRef &Ext) {
   return Ext.consume_front("experimental-");
 }
 
-// This function finds the first character that doesn't belong to a version
+// This function finds the last character that doesn't belong to a version
 // (e.g. zba1p0 is extension 'zba' of version '1p0'). So the function will
 // consume [0-9]*p[0-9]* starting from the backward. An extension name will not
 // end with a digit or the letter 'p', so this function will parse correctly.
 // NOTE: This function is NOT able to take empty strings or strings that only
 // have version numbers and no extension name. It assumes the extension name
 // will be at least more than one character.
-static size_t findFirstNonVersionCharacter(StringRef Ext) {
+static size_t findLastNonVersionCharacter(StringRef Ext) {
   assert(!Ext.empty() &&
          "Already guarded by if-statement in ::parseArchString");
 
@@ -149,11 +232,12 @@ static size_t findFirstNonVersionCharacter(StringRef Ext) {
 }
 
 namespace {
-struct FindByName {
-  FindByName(StringRef Ext) : Ext(Ext){};
-  StringRef Ext;
-  bool operator()(const RISCVSupportedExtension &ExtInfo) {
-    return ExtInfo.Name == Ext;
+struct LessExtName {
+  bool operator()(const RISCVSupportedExtension &LHS, StringRef RHS) {
+    return StringRef(LHS.Name) < RHS;
+  }
+  bool operator()(StringRef LHS, const RISCVSupportedExtension &RHS) {
+    return LHS < StringRef(RHS.Name);
   }
 };
 } // namespace
@@ -164,12 +248,12 @@ findDefaultVersion(StringRef ExtName) {
   // TODO: We might set default version based on profile or ISA spec.
   for (auto &ExtInfo : {ArrayRef(SupportedExtensions),
                         ArrayRef(SupportedExperimentalExtensions)}) {
-    auto ExtensionInfoIterator = llvm::find_if(ExtInfo, FindByName(ExtName));
+    auto I = llvm::lower_bound(ExtInfo, ExtName, LessExtName());
 
-    if (ExtensionInfoIterator == ExtInfo.end()) {
+    if (I == ExtInfo.end() || I->Name != ExtName)
       continue;
-    }
-    return ExtensionInfoIterator->Version;
+
+    return I->Version;
   }
   return std::nullopt;
 }
@@ -177,15 +261,12 @@ findDefaultVersion(StringRef ExtName) {
 void RISCVISAInfo::addExtension(StringRef ExtName, unsigned MajorVersion,
                                 unsigned MinorVersion) {
   RISCVExtensionInfo Ext;
-  Ext.ExtName = ExtName.str();
   Ext.MajorVersion = MajorVersion;
   Ext.MinorVersion = MinorVersion;
   Exts[ExtName.str()] = Ext;
 }
 
 static StringRef getExtensionTypeDesc(StringRef Ext) {
-  if (Ext.startswith("sx"))
-    return "non-standard supervisor-level extension";
   if (Ext.startswith("s"))
     return "standard supervisor-level extension";
   if (Ext.startswith("x"))
@@ -196,8 +277,6 @@ static StringRef getExtensionTypeDesc(StringRef Ext) {
 }
 
 static StringRef getExtensionType(StringRef Ext) {
-  if (Ext.startswith("sx"))
-    return "sx";
   if (Ext.startswith("s"))
     return "s";
   if (Ext.startswith("x"))
@@ -209,36 +288,50 @@ static StringRef getExtensionType(StringRef Ext) {
 
 static std::optional<RISCVExtensionVersion>
 isExperimentalExtension(StringRef Ext) {
-  auto ExtIterator =
-      llvm::find_if(SupportedExperimentalExtensions, FindByName(Ext));
-  if (ExtIterator == std::end(SupportedExperimentalExtensions))
+  auto I =
+      llvm::lower_bound(SupportedExperimentalExtensions, Ext, LessExtName());
+  if (I == std::end(SupportedExperimentalExtensions) || I->Name != Ext)
     return std::nullopt;
 
-  return ExtIterator->Version;
+  return I->Version;
 }
 
 bool RISCVISAInfo::isSupportedExtensionFeature(StringRef Ext) {
   bool IsExperimental = stripExperimentalPrefix(Ext);
 
-  if (IsExperimental)
-    return llvm::any_of(SupportedExperimentalExtensions, FindByName(Ext));
-  else
-    return llvm::any_of(SupportedExtensions, FindByName(Ext));
+  ArrayRef<RISCVSupportedExtension> ExtInfo =
+      IsExperimental ? ArrayRef(SupportedExperimentalExtensions)
+                     : ArrayRef(SupportedExtensions);
+
+  auto I = llvm::lower_bound(ExtInfo, Ext, LessExtName());
+  return I != ExtInfo.end() && I->Name == Ext;
 }
 
 bool RISCVISAInfo::isSupportedExtension(StringRef Ext) {
-  return llvm::any_of(SupportedExtensions, FindByName(Ext)) ||
-         llvm::any_of(SupportedExperimentalExtensions, FindByName(Ext));
+  verifyTables();
+
+  for (auto ExtInfo : {ArrayRef(SupportedExtensions),
+                       ArrayRef(SupportedExperimentalExtensions)}) {
+    auto I = llvm::lower_bound(ExtInfo, Ext, LessExtName());
+    if (I != ExtInfo.end() && I->Name == Ext)
+      return true;
+  }
+
+  return false;
 }
 
 bool RISCVISAInfo::isSupportedExtension(StringRef Ext, unsigned MajorVersion,
                                         unsigned MinorVersion) {
-  auto FindByNameAndVersion = [=](const RISCVSupportedExtension &ExtInfo) {
-    return ExtInfo.Name == Ext && (MajorVersion == ExtInfo.Version.Major) &&
-           (MinorVersion == ExtInfo.Version.Minor);
-  };
-  return llvm::any_of(SupportedExtensions, FindByNameAndVersion) ||
-         llvm::any_of(SupportedExperimentalExtensions, FindByNameAndVersion);
+  for (auto ExtInfo : {ArrayRef(SupportedExtensions),
+                       ArrayRef(SupportedExperimentalExtensions)}) {
+    auto Range =
+        std::equal_range(ExtInfo.begin(), ExtInfo.end(), Ext, LessExtName());
+    for (auto I = Range.first, E = Range.second; I != E; ++I)
+      if (I->Version.Major == MajorVersion && I->Version.Minor == MinorVersion)
+        return true;
+  }
+
+  return false;
 }
 
 bool RISCVISAInfo::hasExtension(StringRef Ext) const {
@@ -250,78 +343,71 @@ bool RISCVISAInfo::hasExtension(StringRef Ext) const {
   return Exts.count(Ext.str()) != 0;
 }
 
+// We rank extensions in the following order:
+// -Single letter extensions in canonical order.
+// -Unknown single letter extensions in alphabetical order.
+// -Multi-letter extensions starting with 'z' sorted by canonical order of
+//  the second letter then sorted alphabetically.
+// -Multi-letter extensions starting with 's' in alphabetical order.
+// -(TODO) Multi-letter extensions starting with 'zxm' in alphabetical order.
+// -X extensions in alphabetical order.
+// These flags are used to indicate the category. The first 6 bits store the
+// single letter extension rank for single letter and multi-letter extensions
+// starting with 'z'.
+enum RankFlags {
+  RF_Z_EXTENSION = 1 << 6,
+  RF_S_EXTENSION = 1 << 7,
+  RF_X_EXTENSION = 1 << 8,
+};
+
 // Get the rank for single-letter extension, lower value meaning higher
 // priority.
-static int singleLetterExtensionRank(char Ext) {
+static unsigned singleLetterExtensionRank(char Ext) {
+  assert(Ext >= 'a' && Ext <= 'z');
   switch (Ext) {
   case 'i':
-    return -2;
+    return 0;
   case 'e':
-    return -1;
-  default:
-    break;
+    return 1;
   }
 
   size_t Pos = AllStdExts.find(Ext);
-  int Rank;
-  if (Pos == StringRef::npos)
-    // If we got an unknown extension letter, then give it an alphabetical
-    // order, but after all known standard extensions.
-    Rank = AllStdExts.size() + (Ext - 'a');
-  else
-    Rank = Pos;
+  if (Pos != StringRef::npos)
+    return Pos + 2; // Skip 'e' and 'i' from above.
 
-  return Rank;
+  // If we got an unknown extension letter, then give it an alphabetical
+  // order, but after all known standard extensions.
+  return 2 + AllStdExts.size() + (Ext - 'a');
 }
 
 // Get the rank for multi-letter extension, lower value meaning higher
 // priority/order in canonical order.
-static int multiLetterExtensionRank(const std::string &ExtName) {
-  assert(ExtName.length() >= 2);
-  int HighOrder;
-  int LowOrder = 0;
-  // The order between multi-char extensions: s -> h -> z -> x.
-  char ExtClass = ExtName[0];
-  switch (ExtClass) {
+static unsigned getExtensionRank(const std::string &ExtName) {
+  assert(ExtName.size() >= 1);
+  switch (ExtName[0]) {
   case 's':
-    HighOrder = 0;
-    break;
+    return RF_S_EXTENSION;
   case 'z':
-    HighOrder = 1;
+    assert(ExtName.size() >= 2);
     // `z` extension must be sorted by canonical order of second letter.
     // e.g. zmx has higher rank than zax.
-    LowOrder = singleLetterExtensionRank(ExtName[1]);
-    break;
+    return RF_Z_EXTENSION | singleLetterExtensionRank(ExtName[1]);
   case 'x':
-    HighOrder = 2;
-    break;
+    return RF_X_EXTENSION;
   default:
-    llvm_unreachable("Unknown prefix for multi-char extension");
-    return -1;
+    assert(ExtName.size() == 1);
+    return singleLetterExtensionRank(ExtName[0]);
   }
-
-  return (HighOrder << 8) + LowOrder;
 }
 
 // Compare function for extension.
 // Only compare the extension name, ignore version comparison.
 bool RISCVISAInfo::compareExtension(const std::string &LHS,
                                     const std::string &RHS) {
-  size_t LHSLen = LHS.length();
-  size_t RHSLen = RHS.length();
-  if (LHSLen == 1 && RHSLen != 1)
-    return true;
+  unsigned LHSRank = getExtensionRank(LHS);
+  unsigned RHSRank = getExtensionRank(RHS);
 
-  if (LHSLen != 1 && RHSLen == 1)
-    return false;
-
-  if (LHSLen == 1 && RHSLen == 1)
-    return singleLetterExtensionRank(LHS[0]) <
-           singleLetterExtensionRank(RHS[0]);
-
-  // Both are multi-char ext here.
-  int LHSRank = multiLetterExtensionRank(LHS);
-  int RHSRank = multiLetterExtensionRank(RHS);
+  // If the ranks differ, pick the lower rank.
   if (LHSRank != RHSRank)
     return LHSRank < RHSRank;
 
@@ -485,11 +571,12 @@ RISCVISAInfo::parseFeatures(unsigned XLen,
                               ? ArrayRef(SupportedExperimentalExtensions)
                               : ArrayRef(SupportedExtensions);
     auto ExtensionInfoIterator =
-        llvm::find_if(ExtensionInfos, FindByName(ExtName));
+        llvm::lower_bound(ExtensionInfos, ExtName, LessExtName());
 
     // Not all features is related to ISA extension, like `relax` or
     // `save-restore`, skip those feature.
-    if (ExtensionInfoIterator == ExtensionInfos.end())
+    if (ExtensionInfoIterator == ExtensionInfos.end() ||
+        ExtensionInfoIterator->Name != ExtName)
       continue;
 
     if (Add)
@@ -503,6 +590,67 @@ RISCVISAInfo::parseFeatures(unsigned XLen,
 }
 
 llvm::Expected<std::unique_ptr<RISCVISAInfo>>
+RISCVISAInfo::parseNormalizedArchString(StringRef Arch) {
+  if (llvm::any_of(Arch, isupper)) {
+    return createStringError(errc::invalid_argument,
+                             "string must be lowercase");
+  }
+  // Must start with a valid base ISA name.
+  unsigned XLen;
+  if (Arch.startswith("rv32i") || Arch.startswith("rv32e"))
+    XLen = 32;
+  else if (Arch.startswith("rv64i") || Arch.startswith("rv64e"))
+    XLen = 64;
+  else
+    return createStringError(errc::invalid_argument,
+                             "arch string must begin with valid base ISA");
+  std::unique_ptr<RISCVISAInfo> ISAInfo(new RISCVISAInfo(XLen));
+  // Discard rv32/rv64 prefix.
+  Arch = Arch.substr(4);
+
+  // Each extension is of the form ${name}${major_version}p${minor_version}
+  // and separated by _. Split by _ and then extract the name and version
+  // information for each extension.
+  SmallVector<StringRef, 8> Split;
+  Arch.split(Split, '_');
+  for (StringRef Ext : Split) {
+    StringRef Prefix, MinorVersionStr;
+    std::tie(Prefix, MinorVersionStr) = Ext.rsplit('p');
+    if (MinorVersionStr.empty())
+      return createStringError(errc::invalid_argument,
+                               "extension lacks version in expected format");
+    unsigned MajorVersion, MinorVersion;
+    if (MinorVersionStr.getAsInteger(10, MinorVersion))
+      return createStringError(errc::invalid_argument,
+                               "failed to parse minor version number");
+
+    // Split Prefix into the extension name and the major version number
+    // (the trailing digits of Prefix).
+    int TrailingDigits = 0;
+    StringRef ExtName = Prefix;
+    while (!ExtName.empty()) {
+      if (!isDigit(ExtName.back()))
+        break;
+      ExtName = ExtName.drop_back(1);
+      TrailingDigits++;
+    }
+    if (!TrailingDigits)
+      return createStringError(errc::invalid_argument,
+                               "extension lacks version in expected format");
+
+    StringRef MajorVersionStr = Prefix.take_back(TrailingDigits);
+    if (MajorVersionStr.getAsInteger(10, MajorVersion))
+      return createStringError(errc::invalid_argument,
+                               "failed to parse major version number");
+    ISAInfo->addExtension(ExtName, MajorVersion, MinorVersion);
+  }
+  ISAInfo->updateFLen();
+  ISAInfo->updateMinVLen();
+  ISAInfo->updateMaxELen();
+  return std::move(ISAInfo);
+}
+
+llvm::Expected<std::unique_ptr<RISCVISAInfo>>
 RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension,
                               bool ExperimentalExtensionVersionCheck,
                               bool IgnoreUnknown) {
@@ -515,8 +663,9 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension,
   bool HasRV64 = Arch.startswith("rv64");
   // ISA string must begin with rv32 or rv64.
   if (!(Arch.startswith("rv32") || HasRV64) || (Arch.size() < 5)) {
-    return createStringError(errc::invalid_argument,
-                             "string must begin with rv32{i,e,g} or rv64{i,g}");
+    return createStringError(
+        errc::invalid_argument,
+        "string must begin with rv32{i,e,g} or rv64{i,e,g}");
   }
 
   unsigned XLen = HasRV64 ? 64 : 32;
@@ -532,27 +681,27 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension,
   default:
     return createStringError(errc::invalid_argument,
                              "first letter should be 'e', 'i' or 'g'");
-  case 'e': {
-    // Extension 'e' is not allowed in rv64.
-    if (HasRV64)
-      return createStringError(
-          errc::invalid_argument,
-          "standard user-level extension 'e' requires 'rv32'");
-    break;
-  }
+  case 'e':
   case 'i':
     break;
   case 'g':
-    // g = imafd
+    // g expands to extensions in RISCVGImplications.
+    if (Arch.size() > 5 && isDigit(Arch[5]))
+      return createStringError(errc::invalid_argument,
+                               "version not supported for 'g'");
     StdExts = StdExts.drop_front(4);
     break;
   }
 
+  if (Arch.back() == '_')
+    return createStringError(errc::invalid_argument,
+                             "extension name missing after separator '_'");
+
   // Skip rvxxx
   StringRef Exts = Arch.substr(5);
 
   // Remove multi-letter standard extensions, non-standard extensions and
-  // supervisor-level extensions. They have 'z', 'x', 's', 'sx' prefixes.
+  // supervisor-level extensions. They have 'z', 'x', 's' prefixes.
   // Parse them at the end.
   // Find the very first occurrence of 's', 'x' or 'z'.
   StringRef OtherExts;
@@ -563,36 +712,48 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension,
   }
 
   unsigned Major, Minor, ConsumeLength;
-  if (auto E = getExtensionVersion(std::string(1, Baseline), Exts, Major, Minor,
-                                   ConsumeLength, EnableExperimentalExtension,
-                                   ExperimentalExtensionVersionCheck))
-    return std::move(E);
-
   if (Baseline == 'g') {
+    // Versions for g are disallowed, and this was checked for previously.
+    ConsumeLength = 0;
+
     // No matter which version is given to `g`, we always set imafd to default
     // version since the we don't have clear version scheme for that on
     // ISA spec.
-    for (const auto *Ext : {"i", "m", "a", "f", "d"})
+    for (const auto *Ext : RISCVGImplications) {
       if (auto Version = findDefaultVersion(Ext))
         ISAInfo->addExtension(Ext, Version->Major, Version->Minor);
       else
         llvm_unreachable("Default extension version not found?");
-  } else
+    }
+  } else {
     // Baseline is `i` or `e`
-    ISAInfo->addExtension(std::string(1, Baseline), Major, Minor);
+    if (auto E = getExtensionVersion(
+            StringRef(&Baseline, 1), Exts, Major, Minor, ConsumeLength,
+            EnableExperimentalExtension, ExperimentalExtensionVersionCheck)) {
+      if (!IgnoreUnknown)
+        return std::move(E);
+      // If IgnoreUnknown, then ignore an unrecognised version of the baseline
+      // ISA and just use the default supported version.
+      consumeError(std::move(E));
+      auto Version = findDefaultVersion(StringRef(&Baseline, 1));
+      Major = Version->Major;
+      Minor = Version->Minor;
+    }
+
+    ISAInfo->addExtension(StringRef(&Baseline, 1), Major, Minor);
+  }
 
   // Consume the base ISA version number and any '_' between rvxxx and the
   // first extension
   Exts = Exts.drop_front(ConsumeLength);
   Exts.consume_front("_");
 
-  // TODO: Use version number when setting target features
-
   auto StdExtsItr = StdExts.begin();
   auto StdExtsEnd = StdExts.end();
-  auto GoToNextExt = [](StringRef::iterator &I, unsigned ConsumeLength) {
+  auto GoToNextExt = [](StringRef::iterator &I, unsigned ConsumeLength,
+                        StringRef::iterator E) {
     I += 1 + ConsumeLength;
-    if (*I == '_')
+    if (I != E && *I == '_')
       ++I;
   };
   for (auto I = Exts.begin(), E = Exts.end(); I != E;) {
@@ -619,38 +780,37 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension,
     // Move to next char to prevent repeated letter.
     ++StdExtsItr;
 
-    std::string Next;
+    StringRef Next;
     unsigned Major, Minor, ConsumeLength;
     if (std::next(I) != E)
-      Next = std::string(std::next(I), E);
-    if (auto E = getExtensionVersion(std::string(1, C), Next, Major, Minor,
+      Next = StringRef(std::next(I), E - std::next(I));
+    if (auto E = getExtensionVersion(StringRef(&C, 1), Next, Major, Minor,
                                      ConsumeLength, EnableExperimentalExtension,
                                      ExperimentalExtensionVersionCheck)) {
       if (IgnoreUnknown) {
         consumeError(std::move(E));
-        GoToNextExt(I, ConsumeLength);
+        GoToNextExt(I, ConsumeLength, Exts.end());
         continue;
       }
       return std::move(E);
     }
 
     // The order is OK, then push it into features.
-    // TODO: Use version number when setting target features
     // Currently LLVM supports only "mafdcvh".
     if (!isSupportedExtension(StringRef(&C, 1))) {
       if (IgnoreUnknown) {
-        GoToNextExt(I, ConsumeLength);
+        GoToNextExt(I, ConsumeLength, Exts.end());
         continue;
       }
       return createStringError(errc::invalid_argument,
                                "unsupported standard user-level extension '%c'",
                                C);
     }
-    ISAInfo->addExtension(std::string(1, C), Major, Minor);
+    ISAInfo->addExtension(StringRef(&C, 1), Major, Minor);
 
     // Consume full extension name and version, including any optional '_'
     // between this extension and the next
-    GoToNextExt(I, ConsumeLength);
+    GoToNextExt(I, ConsumeLength, Exts.end());
   }
 
   // Handle other types of extensions other than the standard
@@ -658,9 +818,9 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension,
   // Parse the ISA string containing non-standard user-level
   // extensions, standard supervisor-level extensions and
   // non-standard supervisor-level extensions.
-  // These extensions start with 'z', 'x', 's', 'sx' prefixes, follow a
-  // canonical order, might have a version number (major, minor)
-  // and are separated by a single underscore '_'.
+  // These extensions start with 'z', 's', 'x' prefixes, might have a version
+  // number (major, minor) and are separated by a single underscore '_'. We do
+  // not enforce a canonical order for them.
   // Set the hardware features for the extensions that are supported.
 
   // Multi-letter extensions are seperated by a single underscore
@@ -669,9 +829,6 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension,
   OtherExts.split(Split, '_');
 
   SmallVector<StringRef, 8> AllExts;
-  std::array<StringRef, 4> Prefix{"z", "x", "s", "sx"};
-  auto I = Prefix.begin();
-  auto E = Prefix.end();
   if (Split.size() > 1 || Split[0] != "") {
     for (StringRef Ext : Split) {
       if (Ext.empty())
@@ -680,7 +837,7 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension,
 
       StringRef Type = getExtensionType(Ext);
       StringRef Desc = getExtensionTypeDesc(Ext);
-      auto Pos = findFirstNonVersionCharacter(Ext) + 1;
+      auto Pos = findLastNonVersionCharacter(Ext) + 1;
       StringRef Name(Ext.substr(0, Pos));
       StringRef Vers(Ext.substr(Pos));
 
@@ -691,18 +848,6 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension,
                                  "invalid extension prefix '" + Ext + "'");
       }
 
-      // Check ISA extensions are specified in the canonical order.
-      while (I != E && *I != Type)
-        ++I;
-
-      if (I == E) {
-        if (IgnoreUnknown)
-          continue;
-        return createStringError(errc::invalid_argument,
-                                 "%s not given in canonical order '%s'",
-                                 Desc.str().c_str(), Ext.str().c_str());
-      }
-
       if (!IgnoreUnknown && Name.size() == Type.size()) {
         return createStringError(errc::invalid_argument,
                                  "%s name missing after '%s'",
@@ -726,6 +871,9 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension,
                                  Desc.str().c_str(), Name.str().c_str());
       }
 
+      if (IgnoreUnknown && !isSupportedExtension(Name))
+        continue;
+
       ISAInfo->addExtension(Name, Major, Minor);
       // Extension format is correct, keep parsing the extensions.
       // TODO: Save Type, Name, Major, Minor to avoid parsing them later.
@@ -745,51 +893,55 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension,
 }
 
 Error RISCVISAInfo::checkDependency() {
-  bool IsRv32 = XLen == 32;
-  bool HasE = Exts.count("e") != 0;
-  bool HasD = Exts.count("d") != 0;
+  bool HasC = Exts.count("c") != 0;
   bool HasF = Exts.count("f") != 0;
   bool HasZfinx = Exts.count("zfinx") != 0;
-  bool HasZdinx = Exts.count("zdinx") != 0;
   bool HasVector = Exts.count("zve32x") != 0;
-  bool HasZve32f = Exts.count("zve32f") != 0;
-  bool HasZve64d = Exts.count("zve64d") != 0;
   bool HasZvl = MinVLen != 0;
+  bool HasZcmt = Exts.count("zcmt") != 0;
 
-  if (HasE && !IsRv32)
+  if (HasF && HasZfinx)
+    return createStringError(errc::invalid_argument,
+                             "'f' and 'zfinx' extensions are incompatible");
+
+  if (HasZvl && !HasVector)
     return createStringError(
         errc::invalid_argument,
-        "standard user-level extension 'e' requires 'rv32'");
+        "'zvl*b' requires 'v' or 'zve*' extension to also be specified");
 
-  // It's illegal to specify the 'd' (double-precision floating point)
-  // extension without also specifying the 'f' (single precision
-  // floating-point) extension.
-  // TODO: This has been removed in later specs, which specify that D implies F
-  if (HasD && !HasF)
-    return createStringError(errc::invalid_argument,
-                             "d requires f extension to also be specified");
+  if (Exts.count("zvbb") && !HasVector)
+    return createStringError(
+        errc::invalid_argument,
+        "'zvbb' requires 'v' or 'zve*' extension to also be specified");
 
-  if (HasZve32f && !HasF && !HasZfinx)
+  if (Exts.count("zvbc") && !Exts.count("zve64x"))
     return createStringError(
         errc::invalid_argument,
-        "zve32f requires f or zfinx extension to also be specified");
+        "'zvbc' requires 'v' or 'zve64*' extension to also be specified");
 
-  if (HasZve64d && !HasD && !HasZdinx)
+  if ((Exts.count("zvkg") || Exts.count("zvkned") || Exts.count("zvknha") ||
+       Exts.count("zvksed") || Exts.count("zvksh")) &&
+      !HasVector)
     return createStringError(
         errc::invalid_argument,
-        "zve64d requires d or zdinx extension to also be specified");
+        "'zvk*' requires 'v' or 'zve*' extension to also be specified");
 
-  if (Exts.count("zvfh") && !Exts.count("zfh") && !Exts.count("zfhmin") &&
-      !Exts.count("zhinx") && !Exts.count("zhinxmin"))
+  if (Exts.count("zvknhb") && !Exts.count("zve64x"))
     return createStringError(
         errc::invalid_argument,
-        "zvfh requires zfh, zfhmin, zhinx or zhinxmin extension to also be "
-        "specified");
+        "'zvknhb' requires 'v' or 'zve64*' extension to also be specified");
 
-  if (HasZvl && !HasVector)
+  if ((HasZcmt || Exts.count("zcmp")) && Exts.count("d") &&
+      (HasC || Exts.count("zcd")))
     return createStringError(
         errc::invalid_argument,
-        "zvl*b requires v or zve* extension to also be specified");
+        Twine("'") + (HasZcmt ? "zcmt" : "zcmp") +
+        "' extension is incompatible with '" + (HasC ? "c" : "zcd") +
+        "' extension when 'd' extension is enabled");
+
+  if (XLen != 32 && Exts.count("zcf"))
+    return createStringError(errc::invalid_argument,
+                             "'zcf' is only supported for 'rv32'");
 
   // Additional dependency checks.
   // TODO: The 'q' extension requires rv64.
@@ -798,34 +950,58 @@ Error RISCVISAInfo::checkDependency() {
   return Error::success();
 }
 
-static const char *ImpliedExtsV[] = {"zvl128b", "zve64d", "f", "d"};
-static const char *ImpliedExtsZfhmin[] = {"f"};
-static const char *ImpliedExtsZfh[] = {"f"};
+static const char *ImpliedExtsD[] = {"f"};
+static const char *ImpliedExtsF[] = {"zicsr"};
+static const char *ImpliedExtsV[] = {"zvl128b", "zve64d"};
+static const char *ImpliedExtsXTHeadVdot[] = {"v"};
+static const char *ImpliedExtsXsfvcp[] = {"zve32x"};
+static const char *ImpliedExtsZacas[] = {"a"};
+static const char *ImpliedExtsZcb[] = {"zca"};
+static const char *ImpliedExtsZcd[] = {"zca"};
+static const char *ImpliedExtsZce[] = {"zcb", "zcmp", "zcmt"};
+static const char *ImpliedExtsZcf[] = {"zca"};
+static const char *ImpliedExtsZcmp[] = {"zca"};
+static const char *ImpliedExtsZcmt[] = {"zca"};
 static const char *ImpliedExtsZdinx[] = {"zfinx"};
-static const char *ImpliedExtsZhinxmin[] = {"zfinx"};
+static const char *ImpliedExtsZfa[] = {"f"};
+static const char *ImpliedExtsZfbfmin[] = {"f"};
+static const char *ImpliedExtsZfh[] = {"f"};
+static const char *ImpliedExtsZfhmin[] = {"f"};
+static const char *ImpliedExtsZfinx[] = {"zicsr"};
 static const char *ImpliedExtsZhinx[] = {"zfinx"};
-static const char *ImpliedExtsZve64d[] = {"zve64f"};
+static const char *ImpliedExtsZhinxmin[] = {"zfinx"};
+static const char *ImpliedExtsZicntr[] = {"zicsr"};
+static const char *ImpliedExtsZihpm[] = {"zicsr"};
+static const char *ImpliedExtsZk[] = {"zkn", "zkt", "zkr"};
+static const char *ImpliedExtsZkn[] = {"zbkb", "zbkc", "zbkx",
+                                       "zkne", "zknd", "zknh"};
+static const char *ImpliedExtsZks[] = {"zbkb", "zbkc", "zbkx", "zksed", "zksh"};
+static const char *ImpliedExtsZve32f[] = {"zve32x", "f"};
+static const char *ImpliedExtsZve32x[] = {"zvl32b", "zicsr"};
+static const char *ImpliedExtsZve64d[] = {"zve64f", "d"};
 static const char *ImpliedExtsZve64f[] = {"zve64x", "zve32f"};
 static const char *ImpliedExtsZve64x[] = {"zve32x", "zvl64b"};
-static const char *ImpliedExtsZve32f[] = {"zve32x"};
-static const char *ImpliedExtsZve32x[] = {"zvl32b"};
-static const char *ImpliedExtsZvl65536b[] = {"zvl32768b"};
-static const char *ImpliedExtsZvl32768b[] = {"zvl16384b"};
+static const char *ImpliedExtsZvfbfmin[] = {"zve32f"};
+static const char *ImpliedExtsZvfbfwma[] = {"zve32f"};
+static const char *ImpliedExtsZvfh[] = {"zve32f", "zfhmin"};
+static const char *ImpliedExtsZvkn[] = {"zvbb", "zvkned", "zvknhb", "zvkt"};
+static const char *ImpliedExtsZvknc[] = {"zvbc", "zvkn"};
+static const char *ImpliedExtsZvkng[] = {"zvkg", "zvkn"};
+static const char *ImpliedExtsZvknhb[] = {"zvknha"};
+static const char *ImpliedExtsZvks[] = {"zvbb", "zvksed", "zvksh", "zvkt"};
+static const char *ImpliedExtsZvksc[] = {"zvbc", "zvks"};
+static const char *ImpliedExtsZvksg[] = {"zvkg", "zvks"};
+static const char *ImpliedExtsZvl1024b[] = {"zvl512b"};
+static const char *ImpliedExtsZvl128b[] = {"zvl64b"};
 static const char *ImpliedExtsZvl16384b[] = {"zvl8192b"};
-static const char *ImpliedExtsZvl8192b[] = {"zvl4096b"};
-static const char *ImpliedExtsZvl4096b[] = {"zvl2048b"};
 static const char *ImpliedExtsZvl2048b[] = {"zvl1024b"};
-static const char *ImpliedExtsZvl1024b[] = {"zvl512b"};
-static const char *ImpliedExtsZvl512b[] = {"zvl256b"};
 static const char *ImpliedExtsZvl256b[] = {"zvl128b"};
-static const char *ImpliedExtsZvl128b[] = {"zvl64b"};
+static const char *ImpliedExtsZvl32768b[] = {"zvl16384b"};
+static const char *ImpliedExtsZvl4096b[] = {"zvl2048b"};
+static const char *ImpliedExtsZvl512b[] = {"zvl256b"};
 static const char *ImpliedExtsZvl64b[] = {"zvl32b"};
-static const char *ImpliedExtsZk[] = {"zkn", "zkt", "zkr"};
-static const char *ImpliedExtsZkn[] = {"zbkb", "zbkc", "zbkx",
-                                       "zkne", "zknd", "zknh"};
-static const char *ImpliedExtsZks[] = {"zbkb", "zbkc", "zbkx", "zksed", "zksh"};
-static const char *ImpliedExtsZvfh[] = {"zve32f"};
-static const char *ImpliedExtsXTHeadVdot[] = {"v"};
+static const char *ImpliedExtsZvl65536b[] = {"zvl32768b"};
+static const char *ImpliedExtsZvl8192b[] = {"zvl4096b"};
 
 struct ImpliedExtsEntry {
   StringLiteral Name;
@@ -840,13 +1016,28 @@ struct ImpliedExtsEntry {
 
 // Note: The table needs to be sorted by name.
 static constexpr ImpliedExtsEntry ImpliedExts[] = {
+    {{"d"}, {ImpliedExtsD}},
+    {{"f"}, {ImpliedExtsF}},
     {{"v"}, {ImpliedExtsV}},
+    {{"xsfvcp"}, {ImpliedExtsXsfvcp}},
     {{"xtheadvdot"}, {ImpliedExtsXTHeadVdot}},
+    {{"zacas"}, {ImpliedExtsZacas}},
+    {{"zcb"}, {ImpliedExtsZcb}},
+    {{"zcd"}, {ImpliedExtsZcd}},
+    {{"zce"}, {ImpliedExtsZce}},
+    {{"zcf"}, {ImpliedExtsZcf}},
+    {{"zcmp"}, {ImpliedExtsZcmp}},
+    {{"zcmt"}, {ImpliedExtsZcmt}},
     {{"zdinx"}, {ImpliedExtsZdinx}},
+    {{"zfa"}, {ImpliedExtsZfa}},
+    {{"zfbfmin"}, {ImpliedExtsZfbfmin}},
     {{"zfh"}, {ImpliedExtsZfh}},
     {{"zfhmin"}, {ImpliedExtsZfhmin}},
+    {{"zfinx"}, {ImpliedExtsZfinx}},
     {{"zhinx"}, {ImpliedExtsZhinx}},
     {{"zhinxmin"}, {ImpliedExtsZhinxmin}},
+    {{"zicntr"}, {ImpliedExtsZicntr}},
+    {{"zihpm"}, {ImpliedExtsZihpm}},
     {{"zk"}, {ImpliedExtsZk}},
     {{"zkn"}, {ImpliedExtsZkn}},
     {{"zks"}, {ImpliedExtsZks}},
@@ -855,7 +1046,16 @@ static constexpr ImpliedExtsEntry ImpliedExts[] = {
     {{"zve64d"}, {ImpliedExtsZve64d}},
     {{"zve64f"}, {ImpliedExtsZve64f}},
     {{"zve64x"}, {ImpliedExtsZve64x}},
+    {{"zvfbfmin"}, {ImpliedExtsZvfbfmin}},
+    {{"zvfbfwma"}, {ImpliedExtsZvfbfwma}},
     {{"zvfh"}, {ImpliedExtsZvfh}},
+    {{"zvkn"}, {ImpliedExtsZvkn}},
+    {{"zvknc"}, {ImpliedExtsZvknc}},
+    {{"zvkng"}, {ImpliedExtsZvkng}},
+    {{"zvknhb"}, {ImpliedExtsZvknhb}},
+    {{"zvks"}, {ImpliedExtsZvks}},
+    {{"zvksc"}, {ImpliedExtsZvksc}},
+    {{"zvksg"}, {ImpliedExtsZvksg}},
     {{"zvl1024b"}, {ImpliedExtsZvl1024b}},
     {{"zvl128b"}, {ImpliedExtsZvl128b}},
     {{"zvl16384b"}, {ImpliedExtsZvl16384b}},
@@ -903,6 +1103,13 @@ void RISCVISAInfo::updateImplication() {
       }
     }
   }
+
+  // Add Zcf if Zce and F are enabled on RV32.
+  if (XLen == 32 && Exts.count("zce") && Exts.count("f") &&
+      !Exts.count("zcf")) {
+    auto Version = findDefaultVersion("zcf");
+    addExtension("zcf", Version->Major, Version->Minor);
+  }
 }
 
 struct CombinedExtsEntry {
@@ -914,6 +1121,12 @@ static constexpr CombinedExtsEntry CombineIntoExts[] = {
     {{"zk"}, {ImpliedExtsZk}},
     {{"zkn"}, {ImpliedExtsZkn}},
     {{"zks"}, {ImpliedExtsZks}},
+    {{"zvkn"}, {ImpliedExtsZvkn}},
+    {{"zvknc"}, {ImpliedExtsZvknc}},
+    {{"zvkng"}, {ImpliedExtsZvkng}},
+    {{"zvks"}, {ImpliedExtsZvks}},
+    {{"zvksc"}, {ImpliedExtsZvksc}},
+    {{"zvksg"}, {ImpliedExtsZvksg}},
 };
 
 void RISCVISAInfo::updateCombination() {
@@ -999,6 +1212,8 @@ std::vector<std::string> RISCVISAInfo::toFeatureVector() const {
     std::string ExtName = Ext.first;
     if (ExtName == "i") // i is not recognized in clang -cc1
       continue;
+    if (!isSupportedExtension(ExtName))
+      continue;
     std::string Feature = isExperimentalExtension(ExtName)
                               ? "+experimental-" + ExtName
                               : "+" + ExtName;
@@ -1030,6 +1245,8 @@ StringRef RISCVISAInfo::computeDefaultABI() const {
   } else if (XLen == 64) {
     if (hasExtension("d"))
       return "lp64d";
+    if (hasExtension("e"))
+      return "lp64e";
     return "lp64";
   }
   llvm_unreachable("Invalid XLEN");
diff --git a/llvm/lib/Support/Regex.cpp b/llvm/lib/Support/Regex.cpp
index 7a804a1a2297..dfbd373e4a98 100644
--- a/llvm/lib/Support/Regex.cpp
+++ b/llvm/lib/Support/Regex.cpp
@@ -14,14 +14,11 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
+#include "regex_impl.h"
+
 #include <cassert>
 #include <string>
 
-// Important this comes last because it defines "_REGEX_H_". At least on
-// Darwin, if included before any header that (transitively) includes
-// xlocale.h, this will cause trouble, because of missing regex-related types.
-#include "regex_impl.h"
-
 using namespace llvm;
 
 Regex::Regex() : preg(nullptr), error(REG_BADPAT) {}
diff --git a/llvm/lib/Support/ScaledNumber.cpp b/llvm/lib/Support/ScaledNumber.cpp
index 54d4cc33410b..85d7afbea5c6 100644
--- a/llvm/lib/Support/ScaledNumber.cpp
+++ b/llvm/lib/Support/ScaledNumber.cpp
@@ -44,7 +44,7 @@ std::pair<uint64_t, int16_t> ScaledNumbers::multiply64(uint64_t LHS,
     return std::make_pair(Lower, 0);
 
   // Shift as little as possible to maximize precision.
-  unsigned LeadingZeros = countLeadingZeros(Upper);
+  unsigned LeadingZeros = llvm::countl_zero(Upper);
   int Shift = 64 - LeadingZeros;
   if (LeadingZeros)
     Upper = Upper << LeadingZeros | Lower >> Shift;
@@ -62,7 +62,7 @@ std::pair<uint32_t, int16_t> ScaledNumbers::divide32(uint32_t Dividend,
   // Use 64-bit math and canonicalize the dividend to gain precision.
   uint64_t Dividend64 = Dividend;
   int Shift = 0;
-  if (int Zeros = countLeadingZeros(Dividend64)) {
+  if (int Zeros = llvm::countl_zero(Dividend64)) {
     Shift -= Zeros;
     Dividend64 <<= Zeros;
   }
@@ -84,7 +84,7 @@ std::pair<uint64_t, int16_t> ScaledNumbers::divide64(uint64_t Dividend,
 
   // Minimize size of divisor.
   int Shift = 0;
-  if (int Zeros = countTrailingZeros(Divisor)) {
+  if (int Zeros = llvm::countr_zero(Divisor)) {
     Shift -= Zeros;
     Divisor >>= Zeros;
   }
@@ -94,7 +94,7 @@ std::pair<uint64_t, int16_t> ScaledNumbers::divide64(uint64_t Dividend,
     return std::make_pair(Dividend, Shift);
 
   // Maximize size of dividend.
-  if (int Zeros = countLeadingZeros(Dividend)) {
+  if (int Zeros = llvm::countl_zero(Dividend)) {
     Shift -= Zeros;
     Dividend <<= Zeros;
   }
diff --git a/llvm/lib/Support/SpecialCaseList.cpp b/llvm/lib/Support/SpecialCaseList.cpp
index 0fb65accbf1d..64f66e0f8179 100644
--- a/llvm/lib/Support/SpecialCaseList.cpp
+++ b/llvm/lib/Support/SpecialCaseList.cpp
@@ -37,7 +37,6 @@ bool SpecialCaseList::Matcher::insert(std::string Regexp,
     Strings[Regexp] = LineNumber;
     return true;
   }
-  Trigrams.insert(Regexp);
 
   // Replace * with .*
   for (size_t pos = 0; (pos = Regexp.find('*', pos)) != std::string::npos;
@@ -61,8 +60,6 @@ unsigned SpecialCaseList::Matcher::match(StringRef Query) const {
   auto It = Strings.find(Query);
   if (It != Strings.end())
     return It->second;
-  if (Trigrams.isDefinitelyOut(Query))
-    return false;
   for (const auto &RegExKV : RegExes)
     if (RegExKV.first->match(Query))
       return RegExKV.second;
@@ -175,7 +172,7 @@ bool SpecialCaseList::parse(const MemoryBuffer *MB,
     StringRef Category = SplitRegexp.second;
 
     // Create this section if it has not been seen before.
-    if (SectionsMap.find(Section) == SectionsMap.end()) {
+    if (!SectionsMap.contains(Section)) {
       std::unique_ptr<Matcher> M = std::make_unique<Matcher>();
       std::string REError;
       if (!M->insert(std::string(Section), LineNo, REError)) {
diff --git a/llvm/lib/Support/StringMap.cpp b/llvm/lib/Support/StringMap.cpp
index 9b2f96fca2cd..67c05a87959c 100644
--- a/llvm/lib/Support/StringMap.cpp
+++ b/llvm/lib/Support/StringMap.cpp
@@ -11,8 +11,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/StringMap.h"
-#include "llvm/Support/DJB.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Support/ReverseIteration.h"
+#include "llvm/Support/xxhash.h"
 
 using namespace llvm;
 
@@ -84,7 +85,9 @@ unsigned StringMapImpl::LookupBucketFor(StringRef Name) {
   // Hash table unallocated so far?
   if (NumBuckets == 0)
     init(16);
-  unsigned FullHashValue = djbHash(Name, 0);
+  unsigned FullHashValue = xxh3_64bits(Name);
+  if (shouldReverseIterate())
+    FullHashValue = ~FullHashValue;
   unsigned BucketNo = FullHashValue & (NumBuckets - 1);
   unsigned *HashTable = getHashTable(TheTable, NumBuckets);
 
@@ -139,7 +142,9 @@ unsigned StringMapImpl::LookupBucketFor(StringRef Name) {
 int StringMapImpl::FindKey(StringRef Key) const {
   if (NumBuckets == 0)
     return -1; // Really empty table?
-  unsigned FullHashValue = djbHash(Key, 0);
+  unsigned FullHashValue = xxh3_64bits(Key);
+  if (shouldReverseIterate())
+    FullHashValue = ~FullHashValue;
   unsigned BucketNo = FullHashValue & (NumBuckets - 1);
   unsigned *HashTable = getHashTable(TheTable, NumBuckets);
 
diff --git a/llvm/lib/Support/StringRef.cpp b/llvm/lib/Support/StringRef.cpp
index fb93940592c7..3cce83a982c4 100644
--- a/llvm/lib/Support/StringRef.cpp
+++ b/llvm/lib/Support/StringRef.cpp
@@ -191,7 +191,7 @@ size_t StringRef::find(StringRef Str, size_t From) const {
 size_t StringRef::find_insensitive(StringRef Str, size_t From) const {
   StringRef This = substr(From);
   while (This.size() >= Str.size()) {
-    if (This.startswith_insensitive(Str))
+    if (This.starts_with_insensitive(Str))
       return From;
     This = This.drop_front();
     ++From;
@@ -509,7 +509,7 @@ bool llvm::getAsSignedInteger(StringRef Str, unsigned Radix,
   return !Str.empty();
 }
 
-bool StringRef::getAsInteger(unsigned Radix, APInt &Result) const {
+bool StringRef::consumeInteger(unsigned Radix, APInt &Result) {
   StringRef Str = *this;
 
   // Autosense radix if not specified.
@@ -529,6 +529,7 @@ bool StringRef::getAsInteger(unsigned Radix, APInt &Result) const {
   // If it was nothing but zeroes....
   if (Str.empty()) {
     Result = APInt(64, 0);
+    *this = Str;
     return false;
   }
 
@@ -561,12 +562,12 @@ bool StringRef::getAsInteger(unsigned Radix, APInt &Result) const {
     else if (Str[0] >= 'A' && Str[0] <= 'Z')
       CharVal = Str[0]-'A'+10;
     else
-      return true;
+      break;
 
     // If the parsed value is larger than the integer radix, the string is
     // invalid.
     if (CharVal >= Radix)
-      return true;
+      break;
 
     // Add in this character.
     if (IsPowerOf2Radix) {
@@ -581,9 +582,25 @@ bool StringRef::getAsInteger(unsigned Radix, APInt &Result) const {
     Str = Str.substr(1);
   }
 
+  // We consider the operation a failure if no characters were consumed
+  // successfully.
+  if (size() == Str.size())
+    return true;
+
+  *this = Str;
   return false;
 }
 
+bool StringRef::getAsInteger(unsigned Radix, APInt &Result) const {
+  StringRef Str = *this;
+  if (Str.consumeInteger(Radix, Result))
+    return true;
+
+  // For getAsInteger, we require the whole string to be consumed or else we
+  // consider it a failure.
+  return !Str.empty();
+}
+
 bool StringRef::getAsDouble(double &Result, bool AllowInexact) const {
   APFloat F(0.0);
   auto StatusOrErr = F.convertFromString(*this, APFloat::rmNearestTiesToEven);
diff --git a/llvm/lib/Support/SuffixTree.cpp b/llvm/lib/Support/SuffixTree.cpp
index 0d419f12cd1d..eaa653078e09 100644
--- a/llvm/lib/Support/SuffixTree.cpp
+++ b/llvm/lib/Support/SuffixTree.cpp
@@ -12,12 +12,22 @@
 
 #include "llvm/Support/SuffixTree.h"
 #include "llvm/Support/Allocator.h"
-#include <vector>
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/SuffixTreeNode.h"
 
 using namespace llvm;
 
-SuffixTree::SuffixTree(const std::vector<unsigned> &Str) : Str(Str) {
-  Root = insertInternalNode(nullptr, EmptyIdx, EmptyIdx, 0);
+/// \returns the number of elements in the substring associated with \p N.
+static size_t numElementsInSubstring(const SuffixTreeNode *N) {
+  assert(N && "Got a null node?");
+  if (auto *Internal = dyn_cast<SuffixTreeInternalNode>(N))
+    if (Internal->isRoot())
+      return 0;
+  return N->getEndIdx() - N->getStartIdx() + 1;
+}
+
+SuffixTree::SuffixTree(const ArrayRef<unsigned> &Str) : Str(Str) {
+  Root = insertRoot();
   Active.Node = Root;
 
   // Keep track of the number of suffixes we have to add of the current
@@ -38,39 +48,38 @@ SuffixTree::SuffixTree(const std::vector<unsigned> &Str) : Str(Str) {
   setSuffixIndices();
 }
 
-SuffixTreeNode *SuffixTree::insertLeaf(SuffixTreeNode &Parent,
+SuffixTreeNode *SuffixTree::insertLeaf(SuffixTreeInternalNode &Parent,
                                        unsigned StartIdx, unsigned Edge) {
-
   assert(StartIdx <= LeafEndIdx && "String can't start after it ends!");
-
-  SuffixTreeNode *N = new (NodeAllocator.Allocate())
-      SuffixTreeNode(StartIdx, &LeafEndIdx, nullptr);
+  auto *N = new (LeafNodeAllocator.Allocate())
+      SuffixTreeLeafNode(StartIdx, &LeafEndIdx);
   Parent.Children[Edge] = N;
-
   return N;
 }
 
-SuffixTreeNode *SuffixTree::insertInternalNode(SuffixTreeNode *Parent,
-                                               unsigned StartIdx,
-                                               unsigned EndIdx, unsigned Edge) {
-
+SuffixTreeInternalNode *
+SuffixTree::insertInternalNode(SuffixTreeInternalNode *Parent,
+                               unsigned StartIdx, unsigned EndIdx,
+                               unsigned Edge) {
   assert(StartIdx <= EndIdx && "String can't start after it ends!");
-  assert(!(!Parent && StartIdx != EmptyIdx) &&
+  assert(!(!Parent && StartIdx != SuffixTreeNode::EmptyIdx) &&
          "Non-root internal nodes must have parents!");
-
-  unsigned *E = new (InternalEndIdxAllocator) unsigned(EndIdx);
-  SuffixTreeNode *N =
-      new (NodeAllocator.Allocate()) SuffixTreeNode(StartIdx, E, Root);
+  auto *N = new (InternalNodeAllocator.Allocate())
+      SuffixTreeInternalNode(StartIdx, EndIdx, Root);
   if (Parent)
     Parent->Children[Edge] = N;
-
   return N;
 }
 
+SuffixTreeInternalNode *SuffixTree::insertRoot() {
+  return insertInternalNode(/*Parent = */ nullptr, SuffixTreeNode::EmptyIdx,
+                            SuffixTreeNode::EmptyIdx, /*Edge = */ 0);
+}
+
 void SuffixTree::setSuffixIndices() {
   // List of nodes we need to visit along with the current length of the
   // string.
-  std::vector<std::pair<SuffixTreeNode *, unsigned>> ToVisit;
+  SmallVector<std::pair<SuffixTreeNode *, unsigned>> ToVisit;
 
   // Current node being visited.
   SuffixTreeNode *CurrNode = Root;
@@ -81,21 +90,23 @@ void SuffixTree::setSuffixIndices() {
   while (!ToVisit.empty()) {
     std::tie(CurrNode, CurrNodeLen) = ToVisit.back();
     ToVisit.pop_back();
-    CurrNode->ConcatLen = CurrNodeLen;
-    for (auto &ChildPair : CurrNode->Children) {
-      assert(ChildPair.second && "Node had a null child!");
-      ToVisit.push_back(
-          {ChildPair.second, CurrNodeLen + ChildPair.second->size()});
-    }
-
+    // Length of the current node from the root down to here.
+    CurrNode->setConcatLen(CurrNodeLen);
+    if (auto *InternalNode = dyn_cast<SuffixTreeInternalNode>(CurrNode))
+      for (auto &ChildPair : InternalNode->Children) {
+        assert(ChildPair.second && "Node had a null child!");
+        ToVisit.push_back(
+            {ChildPair.second,
+             CurrNodeLen + numElementsInSubstring(ChildPair.second)});
+      }
     // No children, so we are at the end of the string.
-    if (CurrNode->Children.size() == 0 && !CurrNode->isRoot())
-      CurrNode->SuffixIdx = Str.size() - CurrNodeLen;
+    if (auto *LeafNode = dyn_cast<SuffixTreeLeafNode>(CurrNode))
+      LeafNode->setSuffixIdx(Str.size() - CurrNodeLen);
   }
 }
 
 unsigned SuffixTree::extend(unsigned EndIdx, unsigned SuffixesToAdd) {
-  SuffixTreeNode *NeedsLink = nullptr;
+  SuffixTreeInternalNode *NeedsLink = nullptr;
 
   while (SuffixesToAdd > 0) {
 
@@ -118,7 +129,7 @@ unsigned SuffixTree::extend(unsigned EndIdx, unsigned SuffixesToAdd) {
       // The active node is an internal node, and we visited it, so it must
       // need a link if it doesn't have one.
       if (NeedsLink) {
-        NeedsLink->Link = Active.Node;
+        NeedsLink->setLink(Active.Node);
         NeedsLink = nullptr;
       }
     } else {
@@ -126,16 +137,18 @@ unsigned SuffixTree::extend(unsigned EndIdx, unsigned SuffixesToAdd) {
       // insert a new node.
       SuffixTreeNode *NextNode = Active.Node->Children[FirstChar];
 
-      unsigned SubstringLen = NextNode->size();
+      unsigned SubstringLen = numElementsInSubstring(NextNode);
 
       // Is the current suffix we're trying to insert longer than the size of
       // the child we want to move to?
       if (Active.Len >= SubstringLen) {
         // If yes, then consume the characters we've seen and move to the next
         // node.
+        assert(isa<SuffixTreeInternalNode>(NextNode) &&
+               "Expected an internal node?");
         Active.Idx += SubstringLen;
         Active.Len -= SubstringLen;
-        Active.Node = NextNode;
+        Active.Node = cast<SuffixTreeInternalNode>(NextNode);
         continue;
       }
 
@@ -144,12 +157,12 @@ unsigned SuffixTree::extend(unsigned EndIdx, unsigned SuffixesToAdd) {
       unsigned LastChar = Str[EndIdx];
 
       // Is the string we're trying to insert a substring of the next node?
-      if (Str[NextNode->StartIdx + Active.Len] == LastChar) {
+      if (Str[NextNode->getStartIdx() + Active.Len] == LastChar) {
         // If yes, then we're done for this step. Remember our insertion point
         // and move to the next end index. At this point, we have an implicit
         // suffix tree.
         if (NeedsLink && !Active.Node->isRoot()) {
-          NeedsLink->Link = Active.Node;
+          NeedsLink->setLink(Active.Node);
           NeedsLink = nullptr;
         }
 
@@ -171,9 +184,9 @@ unsigned SuffixTree::extend(unsigned EndIdx, unsigned SuffixesToAdd) {
       //                      n   l
 
       // The node s from the diagram
-      SuffixTreeNode *SplitNode =
-          insertInternalNode(Active.Node, NextNode->StartIdx,
-                             NextNode->StartIdx + Active.Len - 1, FirstChar);
+      SuffixTreeInternalNode *SplitNode = insertInternalNode(
+          Active.Node, NextNode->getStartIdx(),
+          NextNode->getStartIdx() + Active.Len - 1, FirstChar);
 
       // Insert the new node representing the new substring into the tree as
       // a child of the split node. This is the node l from the diagram.
@@ -181,12 +194,12 @@ unsigned SuffixTree::extend(unsigned EndIdx, unsigned SuffixesToAdd) {
 
       // Make the old node a child of the split node and update its start
       // index. This is the node n from the diagram.
-      NextNode->StartIdx += Active.Len;
-      SplitNode->Children[Str[NextNode->StartIdx]] = NextNode;
+      NextNode->incrementStartIdx(Active.Len);
+      SplitNode->Children[Str[NextNode->getStartIdx()]] = NextNode;
 
       // SplitNode is an internal node, update the suffix link.
       if (NeedsLink)
-        NeedsLink->Link = SplitNode;
+        NeedsLink->setLink(SplitNode);
 
       NeedsLink = SplitNode;
     }
@@ -202,9 +215,68 @@ unsigned SuffixTree::extend(unsigned EndIdx, unsigned SuffixesToAdd) {
       }
     } else {
       // Start the next phase at the next smallest suffix.
-      Active.Node = Active.Node->Link;
+      Active.Node = Active.Node->getLink();
     }
   }
 
   return SuffixesToAdd;
 }
+
+void SuffixTree::RepeatedSubstringIterator::advance() {
+  // Clear the current state. If we're at the end of the range, then this
+  // is the state we want to be in.
+  RS = RepeatedSubstring();
+  N = nullptr;
+
+  // Each leaf node represents a repeat of a string.
+  SmallVector<unsigned> RepeatedSubstringStarts;
+
+  // Continue visiting nodes until we find one which repeats more than once.
+  while (!InternalNodesToVisit.empty()) {
+    RepeatedSubstringStarts.clear();
+    auto *Curr = InternalNodesToVisit.back();
+    InternalNodesToVisit.pop_back();
+
+    // Keep track of the length of the string associated with the node. If
+    // it's too short, we'll quit.
+    unsigned Length = Curr->getConcatLen();
+
+    // Iterate over each child, saving internal nodes for visiting, and
+    // leaf nodes in LeafChildren. Internal nodes represent individual
+    // strings, which may repeat.
+    for (auto &ChildPair : Curr->Children) {
+      // Save all of this node's children for processing.
+      if (auto *InternalChild =
+              dyn_cast<SuffixTreeInternalNode>(ChildPair.second)) {
+        InternalNodesToVisit.push_back(InternalChild);
+        continue;
+      }
+
+      if (Length < MinLength)
+        continue;
+
+      // Have an occurrence of a potentially repeated string. Save it.
+      auto *Leaf = cast<SuffixTreeLeafNode>(ChildPair.second);
+      RepeatedSubstringStarts.push_back(Leaf->getSuffixIdx());
+    }
+
+    // The root never represents a repeated substring. If we're looking at
+    // that, then skip it.
+    if (Curr->isRoot())
+      continue;
+
+    // Do we have any repeated substrings?
+    if (RepeatedSubstringStarts.size() < 2)
+      continue;
+
+    // Yes. Update the state to reflect this, and then bail out.
+    N = Curr;
+    RS.Length = Length;
+    for (unsigned StartIdx : RepeatedSubstringStarts)
+      RS.StartIndices.push_back(StartIdx);
+    break;
+  }
+  // At this point, either NewRS is an empty RepeatedSubstring, or it was
+  // set in the above loop. Similarly, N is either nullptr, or the node
+  // associated with NewRS.
+}
diff --git a/llvm/lib/Support/SuffixTreeNode.cpp b/llvm/lib/Support/SuffixTreeNode.cpp
new file mode 100644
index 000000000000..113b990fd352
--- /dev/null
+++ b/llvm/lib/Support/SuffixTreeNode.cpp
@@ -0,0 +1,40 @@
+//===- llvm/ADT/SuffixTreeNode.cpp - Nodes for SuffixTrees --------*- C++
+//-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines nodes for use within a SuffixTree.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/SuffixTreeNode.h"
+#include "llvm/Support/Casting.h"
+
+using namespace llvm;
+
+unsigned SuffixTreeNode::getStartIdx() const { return StartIdx; }
+void SuffixTreeNode::incrementStartIdx(unsigned Inc) { StartIdx += Inc; }
+void SuffixTreeNode::setConcatLen(unsigned Len) { ConcatLen = Len; }
+unsigned SuffixTreeNode::getConcatLen() const { return ConcatLen; }
+
+bool SuffixTreeInternalNode::isRoot() const {
+  return getStartIdx() == EmptyIdx;
+}
+unsigned SuffixTreeInternalNode::getEndIdx() const { return EndIdx; }
+void SuffixTreeInternalNode::setLink(SuffixTreeInternalNode *L) {
+  assert(L && "Cannot set a null link?");
+  Link = L;
+}
+SuffixTreeInternalNode *SuffixTreeInternalNode::getLink() const { return Link; }
+
+unsigned SuffixTreeLeafNode::getEndIdx() const {
+  assert(EndIdx && "EndIdx is empty?");
+  return *EndIdx;
+}
+
+unsigned SuffixTreeLeafNode::getSuffixIdx() const { return SuffixIdx; }
+void SuffixTreeLeafNode::setSuffixIdx(unsigned Idx) { SuffixIdx = Idx; }
diff --git a/llvm/lib/Support/SymbolRemappingReader.cpp b/llvm/lib/Support/SymbolRemappingReader.cpp
deleted file mode 100644
index 0082696038e3..000000000000
--- a/llvm/lib/Support/SymbolRemappingReader.cpp
+++ /dev/null
@@ -1,81 +0,0 @@
-//===- SymbolRemappingReader.cpp - Read symbol remapping file -------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains definitions needed for reading and applying symbol
-// remapping files.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Support/SymbolRemappingReader.h"
-#include "llvm/ADT/StringSwitch.h"
-#include "llvm/ADT/Twine.h"
-#include "llvm/Support/LineIterator.h"
-#include "llvm/Support/MemoryBuffer.h"
-
-using namespace llvm;
-
-char SymbolRemappingParseError::ID;
-
-/// Load a set of name remappings from a text file.
-///
-/// See the documentation at the top of the file for an explanation of
-/// the expected format.
-Error SymbolRemappingReader::read(MemoryBuffer &B) {
-  line_iterator LineIt(B, /*SkipBlanks=*/true, '#');
-
-  auto ReportError = [&](Twine Msg) {
-    return llvm::make_error<SymbolRemappingParseError>(
-        B.getBufferIdentifier(), LineIt.line_number(), Msg);
-  };
-
-  for (; !LineIt.is_at_eof(); ++LineIt) {
-    StringRef Line = *LineIt;
-    Line = Line.ltrim(' ');
-    // line_iterator only detects comments starting in column 1.
-    if (Line.startswith("#") || Line.empty())
-      continue;
-
-    SmallVector<StringRef, 4> Parts;
-    Line.split(Parts, ' ', /*MaxSplits*/-1, /*KeepEmpty*/false);
-
-    if (Parts.size() != 3)
-      return ReportError("Expected 'kind mangled_name mangled_name', "
-                         "found '" + Line + "'");
-
-    using FK = ItaniumManglingCanonicalizer::FragmentKind;
-    std::optional<FK> FragmentKind = StringSwitch<std::optional<FK>>(Parts[0])
-                                         .Case("name", FK::Name)
-                                         .Case("type", FK::Type)
-                                         .Case("encoding", FK::Encoding)
-                                         .Default(std::nullopt);
-    if (!FragmentKind)
-      return ReportError("Invalid kind, expected 'name', 'type', or 'encoding',"
-                         " found '" + Parts[0] + "'");
-
-    using EE = ItaniumManglingCanonicalizer::EquivalenceError;
-    switch (Canonicalizer.addEquivalence(*FragmentKind, Parts[1], Parts[2])) {
-    case EE::Success:
-      break;
-
-    case EE::ManglingAlreadyUsed:
-      return ReportError("Manglings '" + Parts[1] + "' and '" + Parts[2] + "' "
-                         "have both been used in prior remappings. Move this "
-                         "remapping earlier in the file.");
-
-    case EE::InvalidFirstMangling:
-      return ReportError("Could not demangle '" + Parts[1] + "' "
-                         "as a <" + Parts[0] + ">; invalid mangling?");
-
-    case EE::InvalidSecondMangling:
-      return ReportError("Could not demangle '" + Parts[2] + "' "
-                         "as a <" + Parts[0] + ">; invalid mangling?");
-    }
-  }
-
-  return Error::success();
-}
diff --git a/llvm/lib/Support/ThreadPool.cpp b/llvm/lib/Support/ThreadPool.cpp
index 31461e31c65c..4eef339000e1 100644
--- a/llvm/lib/Support/ThreadPool.cpp
+++ b/llvm/lib/Support/ThreadPool.cpp
@@ -15,6 +15,7 @@
 #include "llvm/Config/llvm-config.h"
 
 #if LLVM_ENABLE_THREADS
+#include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/Threading.h"
 #else
 #include "llvm/Support/raw_ostream.h"
@@ -43,6 +44,7 @@ void ThreadPool::grow(int requested) {
   while (static_cast<int>(Threads.size()) < newThreadCount) {
     int ThreadID = Threads.size();
     Threads.emplace_back([this, ThreadID] {
+      set_thread_name(formatv("llvm-worker-{0}", ThreadID));
       Strategy.apply_thread_strategy(ThreadID);
       processTasks(nullptr);
     });
diff --git a/llvm/lib/Support/Threading.cpp b/llvm/lib/Support/Threading.cpp
index 923935bbca10..7cc7ba44cc72 100644
--- a/llvm/lib/Support/Threading.cpp
+++ b/llvm/lib/Support/Threading.cpp
@@ -83,6 +83,11 @@ unsigned llvm::ThreadPoolStrategy::compute_thread_count() const {
   // the same interface as std::thread but requests the same stack size as the
   // main thread (8MB) before creation.
 const std::optional<unsigned> llvm::thread::DefaultStackSize = 8 * 1024 * 1024;
+#elif defined(_AIX)
+  // On AIX, the default pthread stack size limit is ~192k for 64-bit programs.
+  // This limit is easily reached when doing link-time thinLTO. AIX library
+  // developers have used 4MB, so we'll do the same.
+const std::optional<unsigned> llvm::thread::DefaultStackSize = 4 * 1024 * 1024;
 #else
 const std::optional<unsigned> llvm::thread::DefaultStackSize;
 #endif
diff --git a/llvm/lib/Support/TrigramIndex.cpp b/llvm/lib/Support/TrigramIndex.cpp
deleted file mode 100644
index 40a20ccc6583..000000000000
--- a/llvm/lib/Support/TrigramIndex.cpp
+++ /dev/null
@@ -1,107 +0,0 @@
-//===-- TrigramIndex.cpp - a heuristic for SpecialCaseList ----------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// TrigramIndex implements a heuristic for SpecialCaseList that allows to
-// filter out ~99% incoming queries when all regular expressions in the
-// SpecialCaseList are simple wildcards with '*' and '.'. If rules are more
-// complicated, the check is defeated and it will always pass the queries to a
-// full regex.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Support/TrigramIndex.h"
-#include "llvm/ADT/StringRef.h"
-#include <set>
-
-using namespace llvm;
-
-static const char RegexAdvancedMetachars[] = "()^$|+?[]\\{}";
-
-static bool isAdvancedMetachar(unsigned Char) {
-  return strchr(RegexAdvancedMetachars, Char) != nullptr;
-}
-
-void TrigramIndex::insert(const std::string &Regex) {
-  if (Defeated) return;
-  std::set<unsigned> Was;
-  unsigned Cnt = 0;
-  unsigned Tri = 0;
-  unsigned Len = 0;
-  bool Escaped = false;
-  for (unsigned Char : Regex) {
-    if (!Escaped) {
-      // Regular expressions allow escaping symbols by preceding it with '\'.
-      if (Char == '\\') {
-        Escaped = true;
-        continue;
-      }
-      if (isAdvancedMetachar(Char)) {
-        // This is a more complicated regex than we can handle here.
-        Defeated = true;
-        return;
-      }
-      if (Char == '.' || Char == '*') {
-        Tri = 0;
-        Len = 0;
-        continue;
-      }
-    }
-    if (Escaped && Char >= '1' && Char <= '9') {
-      Defeated = true;
-      return;
-    }
-    // We have already handled escaping and can reset the flag.
-    Escaped = false;
-    Tri = ((Tri << 8) + Char) & 0xFFFFFF;
-    Len++;
-    if (Len < 3)
-      continue;
-    // We don't want the index to grow too much for the popular trigrams,
-    // as they are weak signals. It's ok to still require them for the
-    // rules we have already processed. It's just a small additional
-    // computational cost.
-    if (Index[Tri].size() >= 4)
-      continue;
-    Cnt++;
-    if (!Was.count(Tri)) {
-      // Adding the current rule to the index.
-      Index[Tri].push_back(Counts.size());
-      Was.insert(Tri);
-    }
-  }
-  if (!Cnt) {
-    // This rule does not have remarkable trigrams to rely on.
-    // We have to always call the full regex chain.
-    Defeated = true;
-    return;
-  }
-  Counts.push_back(Cnt);
-}
-
-bool TrigramIndex::isDefinitelyOut(StringRef Query) const {
-  if (Defeated)
-    return false;
-  std::vector<unsigned> CurCounts(Counts.size());
-  unsigned Tri = 0;
-  for (size_t I = 0; I < Query.size(); I++) {
-    Tri = ((Tri << 8) + Query[I]) & 0xFFFFFF;
-    if (I < 2)
-      continue;
-    const auto &II = Index.find(Tri);
-    if (II == Index.end())
-      continue;
-    for (size_t J : II->second) {
-      CurCounts[J]++;
-      // If we have reached a desired limit, we have to look at the query
-      // more closely by running a full regex.
-      if (CurCounts[J] >= Counts[J])
-        return false;
-    }
-  }
-  return true;
-}
diff --git a/llvm/lib/Support/Unix/Path.inc b/llvm/lib/Support/Unix/Path.inc
index 3efcad4f2bed..e2aece49cbc5 100644
--- a/llvm/lib/Support/Unix/Path.inc
+++ b/llvm/lib/Support/Unix/Path.inc
@@ -190,7 +190,7 @@ static char *getprogpath(char ret[PATH_MAX], const char *bin) {
 
 /// GetMainExecutable - Return the path to the main executable, given the
 /// value of argv[0] from program startup.
-std::string getMainExecutableImpl(const char *argv0, void *MainAddr) {
+std::string getMainExecutable(const char *argv0, void *MainAddr) {
 #if defined(__APPLE__)
   // On OS X the executable path is saved to the stack by dyld. Reading it
   // from there is much faster than calling dladdr, especially for large
diff --git a/llvm/lib/Support/Unix/Signals.inc b/llvm/lib/Support/Unix/Signals.inc
index 05a7335216f4..fcf5701afcfd 100644
--- a/llvm/lib/Support/Unix/Signals.inc
+++ b/llvm/lib/Support/Unix/Signals.inc
@@ -62,6 +62,9 @@
 #if HAVE_MACH_MACH_H
 #include <mach/mach.h>
 #endif
+#ifdef __APPLE__
+#include <mach-o/dyld.h>
+#endif
 #if HAVE_LINK_H
 #include <link.h>
 #endif
@@ -84,13 +87,11 @@ static void InfoSignalHandler(int Sig); // defined below.
 
 using SignalHandlerFunctionType = void (*)();
 /// The function to call if ctrl-c is pressed.
-static std::atomic<SignalHandlerFunctionType> InterruptFunction =
-    ATOMIC_VAR_INIT(nullptr);
-static std::atomic<SignalHandlerFunctionType> InfoSignalFunction =
-    ATOMIC_VAR_INIT(nullptr);
+static std::atomic<SignalHandlerFunctionType> InterruptFunction = nullptr;
+static std::atomic<SignalHandlerFunctionType> InfoSignalFunction = nullptr;
 /// The function to call on SIGPIPE (one-time use only).
 static std::atomic<SignalHandlerFunctionType> OneShotPipeSignalFunction =
-    ATOMIC_VAR_INIT(nullptr);
+    nullptr;
 
 namespace {
 /// Signal-safe removal of files.
@@ -98,8 +99,8 @@ namespace {
 /// themselves is signal-safe. Memory is freed when the head is freed, deletion
 /// is therefore not signal-safe either.
 class FileToRemoveList {
-  std::atomic<char *> Filename = ATOMIC_VAR_INIT(nullptr);
-  std::atomic<FileToRemoveList *> Next = ATOMIC_VAR_INIT(nullptr);
+  std::atomic<char *> Filename = nullptr;
+  std::atomic<FileToRemoveList *> Next = nullptr;
 
   FileToRemoveList() = default;
   // Not signal-safe.
@@ -188,7 +189,7 @@ public:
     Head.exchange(OldHead);
   }
 };
-static std::atomic<FileToRemoveList *> FilesToRemove = ATOMIC_VAR_INIT(nullptr);
+static std::atomic<FileToRemoveList *> FilesToRemove = nullptr;
 
 /// Clean up the list in a signal-friendly manner.
 /// Recall that signals can fire during llvm_shutdown. If this occurs we should
@@ -248,7 +249,7 @@ static const int InfoSigs[] = {SIGUSR1
 static const size_t NumSigs = std::size(IntSigs) + std::size(KillSigs) +
                               std::size(InfoSigs) + 1 /* SIGPIPE */;
 
-static std::atomic<unsigned> NumRegisteredSignals = ATOMIC_VAR_INIT(0);
+static std::atomic<unsigned> NumRegisteredSignals = 0;
 static struct {
   struct sigaction SA;
   int SigNo;
@@ -463,7 +464,7 @@ void llvm::sys::AddSignalHandler(sys::SignalHandlerCallback FnPtr,
   RegisterHandlers();
 }
 
-#if defined(HAVE_BACKTRACE) && ENABLE_BACKTRACES && HAVE_LINK_H &&             \
+#if ENABLE_BACKTRACES && defined(HAVE_BACKTRACE) && HAVE_LINK_H &&             \
     (defined(__linux__) || defined(__FreeBSD__) ||                             \
      defined(__FreeBSD_kernel__) || defined(__NetBSD__))
 struct DlIteratePhdrData {
@@ -509,16 +510,50 @@ static bool findModulesAndOffsets(void **StackTrace, int Depth,
   dl_iterate_phdr(dl_iterate_phdr_cb, &data);
   return true;
 }
+#elif ENABLE_BACKTRACES && defined(__APPLE__) && defined(__LP64__)
+static bool findModulesAndOffsets(void **StackTrace, int Depth,
+                                  const char **Modules, intptr_t *Offsets,
+                                  const char *MainExecutableName,
+                                  StringSaver &StrPool) {
+  uint32_t NumImgs = _dyld_image_count();
+  for (uint32_t ImageIndex = 0; ImageIndex < NumImgs; ImageIndex++) {
+    const char *Name = _dyld_get_image_name(ImageIndex);
+    intptr_t Slide = _dyld_get_image_vmaddr_slide(ImageIndex);
+    auto *Header =
+        (const struct mach_header_64 *)_dyld_get_image_header(ImageIndex);
+    if (Header == NULL)
+      continue;
+    auto Cmd = (const struct load_command *)(&Header[1]);
+    for (uint32_t CmdNum = 0; CmdNum < Header->ncmds; ++CmdNum) {
+      uint32_t BaseCmd = Cmd->cmd & ~LC_REQ_DYLD;
+      if (BaseCmd == LC_SEGMENT_64) {
+        auto CmdSeg64 = (const struct segment_command_64 *)Cmd;
+        for (int j = 0; j < Depth; j++) {
+          if (Modules[j])
+            continue;
+          intptr_t Addr = (intptr_t)StackTrace[j];
+          if ((intptr_t)CmdSeg64->vmaddr + Slide <= Addr &&
+              Addr < intptr_t(CmdSeg64->vmaddr + CmdSeg64->vmsize + Slide)) {
+            Modules[j] = Name;
+            Offsets[j] = Addr - Slide;
+          }
+        }
+      }
+      Cmd = (const load_command *)(((const char *)Cmd) + (Cmd->cmdsize));
+    }
+  }
+  return true;
+}
 #else
-/// This platform does not have dl_iterate_phdr, so we do not yet know how to
-/// find all loaded DSOs.
+/// Backtraces are not enabled or we don't yet know how to find all loaded DSOs
+/// on this platform.
 static bool findModulesAndOffsets(void **StackTrace, int Depth,
                                   const char **Modules, intptr_t *Offsets,
                                   const char *MainExecutableName,
                                   StringSaver &StrPool) {
   return false;
 }
-#endif // defined(HAVE_BACKTRACE) && ENABLE_BACKTRACES && ...
+#endif // ENABLE_BACKTRACES && ... (findModulesAndOffsets variants)
 
 #if ENABLE_BACKTRACES && defined(HAVE__UNWIND_BACKTRACE)
 static int unwindBacktrace(void **StackTrace, int MaxEntries) {
@@ -617,13 +652,12 @@ void llvm::sys::PrintStackTrace(raw_ostream &OS, int Depth) {
 
     if (dlinfo.dli_sname != nullptr) {
       OS << ' ';
-      int res;
-      char *d = itaniumDemangle(dlinfo.dli_sname, nullptr, nullptr, &res);
-      if (!d)
-        OS << dlinfo.dli_sname;
-      else
+      if (char *d = itaniumDemangle(dlinfo.dli_sname)) {
         OS << d;
-      free(d);
+        free(d);
+      } else {
+        OS << dlinfo.dli_sname;
+      }
 
       OS << format(" + %tu", (static_cast<const char *>(StackTrace[i]) -
                               static_cast<const char *>(dlinfo.dli_saddr)));
diff --git a/llvm/lib/Support/VirtualFileSystem.cpp b/llvm/lib/Support/VirtualFileSystem.cpp
index a167e0a76795..d381d79fba96 100644
--- a/llvm/lib/Support/VirtualFileSystem.cpp
+++ b/llvm/lib/Support/VirtualFileSystem.cpp
@@ -43,6 +43,7 @@
 #include <cstdint>
 #include <iterator>
 #include <limits>
+#include <map>
 #include <memory>
 #include <optional>
 #include <string>
@@ -257,12 +258,12 @@ public:
   explicit RealFileSystem(bool LinkCWDToProcess) {
     if (!LinkCWDToProcess) {
       SmallString<128> PWD, RealPWD;
-      if (llvm::sys::fs::current_path(PWD))
-        return; // Awful, but nothing to do here.
-      if (llvm::sys::fs::real_path(PWD, RealPWD))
-        WD = {PWD, PWD};
+      if (std::error_code EC = llvm::sys::fs::current_path(PWD))
+        WD = EC;
+      else if (llvm::sys::fs::real_path(PWD, RealPWD))
+        WD = WorkingDirectory{PWD, PWD};
       else
-        WD = {PWD, RealPWD};
+        WD = WorkingDirectory{PWD, RealPWD};
     }
   }
 
@@ -284,10 +285,10 @@ private:
   // If this FS has its own working dir, use it to make Path absolute.
   // The returned twine is safe to use as long as both Storage and Path live.
   Twine adjustPath(const Twine &Path, SmallVectorImpl<char> &Storage) const {
-    if (!WD)
+    if (!WD || !*WD)
       return Path;
     Path.toVector(Storage);
-    sys::fs::make_absolute(WD->Resolved, Storage);
+    sys::fs::make_absolute(WD->get().Resolved, Storage);
     return Storage;
   }
 
@@ -297,7 +298,7 @@ private:
     // The current working directory, with links resolved. (readlink .).
     SmallString<128> Resolved;
   };
-  std::optional<WorkingDirectory> WD;
+  std::optional<llvm::ErrorOr<WorkingDirectory>> WD;
 };
 
 } // namespace
@@ -323,8 +324,10 @@ RealFileSystem::openFileForRead(const Twine &Name) {
 }
 
 llvm::ErrorOr<std::string> RealFileSystem::getCurrentWorkingDirectory() const {
+  if (WD && *WD)
+    return std::string(WD->get().Specified.str());
   if (WD)
-    return std::string(WD->Specified.str());
+    return WD->getError();
 
   SmallString<128> Dir;
   if (std::error_code EC = llvm::sys::fs::current_path(Dir))
@@ -345,7 +348,7 @@ std::error_code RealFileSystem::setCurrentWorkingDirectory(const Twine &Path) {
     return std::make_error_code(std::errc::not_a_directory);
   if (auto Err = llvm::sys::fs::real_path(Absolute, Resolved))
     return Err;
-  WD = {Absolute, Resolved};
+  WD = WorkingDirectory{Absolute, Resolved};
   return std::error_code();
 }
 
@@ -723,7 +726,7 @@ public:
 
 class InMemoryDirectory : public InMemoryNode {
   Status Stat;
-  llvm::StringMap<std::unique_ptr<InMemoryNode>> Entries;
+  std::map<std::string, std::unique_ptr<InMemoryNode>> Entries;
 
 public:
   InMemoryDirectory(Status Stat)
@@ -739,15 +742,14 @@ public:
   UniqueID getUniqueID() const { return Stat.getUniqueID(); }
 
   InMemoryNode *getChild(StringRef Name) const {
-    auto I = Entries.find(Name);
+    auto I = Entries.find(Name.str());
     if (I != Entries.end())
       return I->second.get();
     return nullptr;
   }
 
   InMemoryNode *addChild(StringRef Name, std::unique_ptr<InMemoryNode> Child) {
-    return Entries.insert(make_pair(Name, std::move(Child)))
-        .first->second.get();
+    return Entries.emplace(Name, std::move(Child)).first->second.get();
   }
 
   using const_iterator = decltype(Entries)::const_iterator;
@@ -2237,6 +2239,14 @@ RedirectingFileSystem::LookupResult::LookupResult(
   }
 }
 
+void RedirectingFileSystem::LookupResult::getPath(
+    llvm::SmallVectorImpl<char> &Result) const {
+  Result.clear();
+  for (Entry *Parent : Parents)
+    llvm::sys::path::append(Result, Parent->getName());
+  llvm::sys::path::append(Result, E->getName());
+}
+
 std::error_code
 RedirectingFileSystem::makeCanonical(SmallVectorImpl<char> &Path) const {
   if (std::error_code EC = makeAbsolute(Path))
@@ -2255,11 +2265,14 @@ ErrorOr<RedirectingFileSystem::LookupResult>
 RedirectingFileSystem::lookupPath(StringRef Path) const {
   sys::path::const_iterator Start = sys::path::begin(Path);
   sys::path::const_iterator End = sys::path::end(Path);
+  llvm::SmallVector<Entry *, 32> Entries;
   for (const auto &Root : Roots) {
     ErrorOr<RedirectingFileSystem::LookupResult> Result =
-        lookupPathImpl(Start, End, Root.get());
-    if (Result || Result.getError() != llvm::errc::no_such_file_or_directory)
+        lookupPathImpl(Start, End, Root.get(), Entries);
+    if (Result || Result.getError() != llvm::errc::no_such_file_or_directory) {
+      Result->Parents = std::move(Entries);
       return Result;
+    }
   }
   return make_error_code(llvm::errc::no_such_file_or_directory);
 }
@@ -2267,7 +2280,8 @@ RedirectingFileSystem::lookupPath(StringRef Path) const {
 ErrorOr<RedirectingFileSystem::LookupResult>
 RedirectingFileSystem::lookupPathImpl(
     sys::path::const_iterator Start, sys::path::const_iterator End,
-    RedirectingFileSystem::Entry *From) const {
+    RedirectingFileSystem::Entry *From,
+    llvm::SmallVectorImpl<Entry *> &Entries) const {
   assert(!isTraversalComponent(*Start) &&
          !isTraversalComponent(From->getName()) &&
          "Paths should not contain traversal components");
@@ -2296,10 +2310,12 @@ RedirectingFileSystem::lookupPathImpl(
   auto *DE = cast<RedirectingFileSystem::DirectoryEntry>(From);
   for (const std::unique_ptr<RedirectingFileSystem::Entry> &DirEntry :
        llvm::make_range(DE->contents_begin(), DE->contents_end())) {
+    Entries.push_back(From);
     ErrorOr<RedirectingFileSystem::LookupResult> Result =
-        lookupPathImpl(Start, End, DirEntry.get());
+        lookupPathImpl(Start, End, DirEntry.get(), Entries);
     if (Result || Result.getError() != llvm::errc::no_such_file_or_directory)
       return Result;
+    Entries.pop_back();
   }
 
   return make_error_code(llvm::errc::no_such_file_or_directory);
@@ -2541,10 +2557,12 @@ RedirectingFileSystem::getRealPath(const Twine &OriginalPath,
     return P;
   }
 
-  // If we found a DirectoryEntry, still fallthrough to the original path if
-  // allowed, because directories don't have a single external contents path.
-  if (Redirection == RedirectKind::Fallthrough)
-    return ExternalFS->getRealPath(CanonicalPath, Output);
+  // We found a DirectoryEntry, which does not have a single external contents
+  // path. Use the canonical virtual path.
+  if (Redirection == RedirectKind::Fallthrough) {
+    Result->getPath(Output);
+    return {};
+  }
   return llvm::errc::invalid_argument;
 }
 
diff --git a/llvm/lib/Support/Windows/Path.inc b/llvm/lib/Support/Windows/Path.inc
index 92cf4fcda5a6..b949b724509f 100644
--- a/llvm/lib/Support/Windows/Path.inc
+++ b/llvm/lib/Support/Windows/Path.inc
@@ -130,7 +130,7 @@ namespace fs {
 
 const file_t kInvalidFile = INVALID_HANDLE_VALUE;
 
-std::string getMainExecutableImpl(const char *argv0, void *MainExecAddr) {
+std::string getMainExecutable(const char *argv0, void *MainExecAddr) {
   SmallVector<wchar_t, MAX_PATH> PathName;
   PathName.resize_for_overwrite(PathName.capacity());
   DWORD Size = ::GetModuleFileNameW(NULL, PathName.data(), PathName.size());
@@ -650,8 +650,6 @@ bool equivalent(file_status A, file_status B) {
   return A.FileIndexHigh == B.FileIndexHigh &&
          A.FileIndexLow == B.FileIndexLow && A.FileSizeHigh == B.FileSizeHigh &&
          A.FileSizeLow == B.FileSizeLow &&
-         A.LastAccessedTimeHigh == B.LastAccessedTimeHigh &&
-         A.LastAccessedTimeLow == B.LastAccessedTimeLow &&
          A.LastWriteTimeHigh == B.LastWriteTimeHigh &&
          A.LastWriteTimeLow == B.LastWriteTimeLow &&
          A.VolumeSerialNumber == B.VolumeSerialNumber;
diff --git a/llvm/lib/Support/Windows/Signals.inc b/llvm/lib/Support/Windows/Signals.inc
index ba93afe0803b..cb82f55fc38b 100644
--- a/llvm/lib/Support/Windows/Signals.inc
+++ b/llvm/lib/Support/Windows/Signals.inc
@@ -10,6 +10,7 @@
 //
 //===----------------------------------------------------------------------===//
 #include "llvm/Support/ConvertUTF.h"
+#include "llvm/Support/ExitCodes.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/Process.h"
@@ -204,6 +205,9 @@ static bool RegisteredUnhandledExceptionFilter = false;
 static bool CleanupExecuted = false;
 static PTOP_LEVEL_EXCEPTION_FILTER OldFilter = NULL;
 
+/// The function to call on "SIGPIPE" (one-time use only).
+static std::atomic<void (*)()> OneShotPipeSignalFunction(nullptr);
+
 // Windows creates a new thread to execute the console handler when an event
 // (such as CTRL/C) occurs.  This causes concurrency issues with the above
 // globals which this critical section addresses.
@@ -575,11 +579,16 @@ void llvm::sys::SetInfoSignalFunction(void (*Handler)()) {
 }
 
 void llvm::sys::SetOneShotPipeSignalFunction(void (*Handler)()) {
-  // Unimplemented.
+  OneShotPipeSignalFunction.exchange(Handler);
 }
 
 void llvm::sys::DefaultOneShotPipeSignalHandler() {
-  // Unimplemented.
+  llvm::sys::Process::Exit(EX_IOERR, /*NoCleanup=*/true);
+}
+
+void llvm::sys::CallOneShotPipeSignalHandler() {
+  if (auto OldOneShotPipeFunction = OneShotPipeSignalFunction.exchange(nullptr))
+    OldOneShotPipeFunction();
 }
 
 /// Add a function to be called when a signal is delivered to the process. The
@@ -816,7 +825,15 @@ WriteWindowsDumpFile(PMINIDUMP_EXCEPTION_INFORMATION ExceptionInfo) {
 }
 
 void sys::CleanupOnSignal(uintptr_t Context) {
-  LLVMUnhandledExceptionFilter((LPEXCEPTION_POINTERS)Context);
+  LPEXCEPTION_POINTERS EP = (LPEXCEPTION_POINTERS)Context;
+  // Broken pipe is not a crash.
+  //
+  // 0xE0000000 is combined with the return code in the exception raised in
+  // CrashRecoveryContext::HandleExit().
+  unsigned RetCode = EP->ExceptionRecord->ExceptionCode;
+  if (RetCode == (0xE0000000 | EX_IOERR))
+    return;
+  LLVMUnhandledExceptionFilter(EP);
 }
 
 static LONG WINAPI LLVMUnhandledExceptionFilter(LPEXCEPTION_POINTERS ep) {
diff --git a/llvm/lib/Support/Windows/Threading.inc b/llvm/lib/Support/Windows/Threading.inc
index aa47484cb5ce..4baf8b8cb82a 100644
--- a/llvm/lib/Support/Windows/Threading.inc
+++ b/llvm/lib/Support/Windows/Threading.inc
@@ -233,7 +233,7 @@ static ArrayRef<ProcessorGroup> getProcessorGroups() {
         unsigned CurrentGroupID = (*ActiveGroups)[0];
         ProcessorGroup NewG{Groups[CurrentGroupID]};
         NewG.Affinity = ProcessAffinityMask;
-        NewG.UsableThreads = countPopulation(ProcessAffinityMask);
+        NewG.UsableThreads = llvm::popcount(ProcessAffinityMask);
         Groups.clear();
         Groups.push_back(NewG);
       }
diff --git a/llvm/lib/Support/YAMLParser.cpp b/llvm/lib/Support/YAMLParser.cpp
index b85b1eb83ef8..6ac2c6aeeb46 100644
--- a/llvm/lib/Support/YAMLParser.cpp
+++ b/llvm/lib/Support/YAMLParser.cpp
@@ -2041,8 +2041,11 @@ StringRef ScalarNode::getValue(SmallVectorImpl<char> &Storage) const {
     }
     return UnquotedValue;
   }
-  // Plain or block.
-  return Value.rtrim(' ');
+  // Plain.
+  // Trim whitespace ('b-char' and 's-white').
+  // NOTE: Alternatively we could change the scanner to not include whitespace
+  //       here in the first place.
+  return Value.rtrim("\x0A\x0D\x20\x09");
 }
 
 StringRef ScalarNode::unescapeDoubleQuoted( StringRef UnquotedValue
diff --git a/llvm/lib/Support/YAMLTraits.cpp b/llvm/lib/Support/YAMLTraits.cpp
index 4eb0b3afd563..f21b7a0ca699 100644
--- a/llvm/lib/Support/YAMLTraits.cpp
+++ b/llvm/lib/Support/YAMLTraits.cpp
@@ -397,17 +397,23 @@ void Input::reportWarning(const SMRange &range, const Twine &message) {
 
 std::unique_ptr<Input::HNode> Input::createHNodes(Node *N) {
   SmallString<128> StringStorage;
-  if (ScalarNode *SN = dyn_cast<ScalarNode>(N)) {
+  switch (N->getType()) {
+  case Node::NK_Scalar: {
+    ScalarNode *SN = dyn_cast<ScalarNode>(N);
     StringRef KeyStr = SN->getValue(StringStorage);
     if (!StringStorage.empty()) {
       // Copy string to permanent storage
       KeyStr = StringStorage.str().copy(StringAllocator);
     }
     return std::make_unique<ScalarHNode>(N, KeyStr);
-  } else if (BlockScalarNode *BSN = dyn_cast<BlockScalarNode>(N)) {
+  }
+  case Node::NK_BlockScalar: {
+    BlockScalarNode *BSN = dyn_cast<BlockScalarNode>(N);
     StringRef ValueCopy = BSN->getValue().copy(StringAllocator);
     return std::make_unique<ScalarHNode>(N, ValueCopy);
-  } else if (SequenceNode *SQ = dyn_cast<SequenceNode>(N)) {
+  }
+  case Node::NK_Sequence: {
+    SequenceNode *SQ = dyn_cast<SequenceNode>(N);
     auto SQHNode = std::make_unique<SequenceHNode>(N);
     for (Node &SN : *SQ) {
       auto Entry = createHNodes(&SN);
@@ -416,7 +422,9 @@ std::unique_ptr<Input::HNode> Input::createHNodes(Node *N) {
       SQHNode->Entries.push_back(std::move(Entry));
     }
     return std::move(SQHNode);
-  } else if (MappingNode *Map = dyn_cast<MappingNode>(N)) {
+  }
+  case Node::NK_Mapping: {
+    MappingNode *Map = dyn_cast<MappingNode>(N);
     auto mapHNode = std::make_unique<MapHNode>(N);
     for (KeyValueNode &KVN : *Map) {
       Node *KeyNode = KVN.getKey();
@@ -435,6 +443,11 @@ std::unique_ptr<Input::HNode> Input::createHNodes(Node *N) {
         // Copy string to permanent storage
         KeyStr = StringStorage.str().copy(StringAllocator);
       }
+      if (mapHNode->Mapping.count(KeyStr))
+        // From YAML spec: "The content of a mapping node is an unordered set of
+        // key/value node pairs, with the restriction that each of the keys is
+        // unique."
+        setError(KeyNode, Twine("duplicated mapping key '") + KeyStr + "'");
       auto ValueHNode = createHNodes(Value);
       if (EC)
         break;
@@ -442,9 +455,10 @@ std::unique_ptr<Input::HNode> Input::createHNodes(Node *N) {
           std::make_pair(std::move(ValueHNode), KeyNode->getSourceRange());
     }
     return std::move(mapHNode);
-  } else if (isa<NullNode>(N)) {
+  }
+  case Node::NK_Null:
     return std::make_unique<EmptyHNode>(N);
-  } else {
+  default:
     setError(N, "unknown node kind");
     return nullptr;
   }
diff --git a/llvm/lib/Support/Z3Solver.cpp b/llvm/lib/Support/Z3Solver.cpp
index a49bedcfd2b0..eb671fe2596d 100644
--- a/llvm/lib/Support/Z3Solver.cpp
+++ b/llvm/lib/Support/Z3Solver.cpp
@@ -729,7 +729,7 @@ public:
     const Z3_sort Z3Sort = toZ3Sort(*getBitvectorSort(BitWidth)).Sort;
 
     // Slow path, when 64 bits are not enough.
-    if (LLVM_UNLIKELY(Int.getBitWidth() > 64u)) {
+    if (LLVM_UNLIKELY(!Int.isRepresentableByInt64())) {
       SmallString<40> Buffer;
       Int.toString(Buffer, 10);
       return newExprRef(Z3Expr(
diff --git a/llvm/lib/Support/raw_ostream.cpp b/llvm/lib/Support/raw_ostream.cpp
index 92b15f14c62f..a4fc605019c2 100644
--- a/llvm/lib/Support/raw_ostream.cpp
+++ b/llvm/lib/Support/raw_ostream.cpp
@@ -56,6 +56,7 @@
 
 #ifdef _WIN32
 #include "llvm/Support/ConvertUTF.h"
+#include "llvm/Support/Signals.h"
 #include "llvm/Support/Windows/WindowsSupport.h"
 #endif
 
@@ -83,8 +84,15 @@ raw_ostream::~raw_ostream() {
 }
 
 size_t raw_ostream::preferred_buffer_size() const {
+#ifdef _WIN32
+  // On Windows BUFSIZ is only 512 which results in more calls to write. This
+  // overhead can cause significant performance degradation. Therefore use a
+  // better default.
+  return (16 * 1024);
+#else
   // BUFSIZ is intended to be a reasonable default.
   return BUFSIZ;
+#endif
 }
 
 void raw_ostream::SetBuffered() {
@@ -775,6 +783,15 @@ void raw_fd_ostream::write_impl(const char *Ptr, size_t Size) {
           )
         continue;
 
+#ifdef _WIN32
+      // Windows equivalents of SIGPIPE/EPIPE.
+      DWORD WinLastError = GetLastError();
+      if (WinLastError == ERROR_BROKEN_PIPE ||
+          (WinLastError == ERROR_NO_DATA && errno == EINVAL)) {
+        llvm::sys::CallOneShotPipeSignalHandler();
+        errno = EPIPE;
+      }
+#endif
       // Otherwise it's a non-recoverable error. Note it and quit.
       error_detected(std::error_code(errno, std::generic_category()));
       break;
@@ -802,8 +819,6 @@ uint64_t raw_fd_ostream::seek(uint64_t off) {
   flush();
 #ifdef _WIN32
   pos = ::_lseeki64(FD, off, SEEK_SET);
-#elif defined(HAVE_LSEEK64)
-  pos = ::lseek64(FD, off, SEEK_SET);
 #else
   pos = ::lseek(FD, off, SEEK_SET);
 #endif
@@ -992,7 +1007,7 @@ Error llvm::writeToOutput(StringRef OutputFileName,
     return Write(Out);
   }
 
-  unsigned Mode = sys::fs::all_read | sys::fs::all_write | sys::fs::all_exe;
+  unsigned Mode = sys::fs::all_read | sys::fs::all_write;
   Expected<sys::fs::TempFile> Temp =
       sys::fs::TempFile::create(OutputFileName + ".temp-stream-%%%%%%", Mode);
   if (!Temp)
diff --git a/llvm/lib/Support/regcomp.c b/llvm/lib/Support/regcomp.c
index 9d484195a6d6..4e9082cec456 100644
--- a/llvm/lib/Support/regcomp.c
+++ b/llvm/lib/Support/regcomp.c
@@ -329,7 +329,15 @@ llvm_regcomp(llvm_regex_t *preg, const char *pattern, int cflags)
 
 	/* set things up */
 	p->g = g;
+	/* suppress warning from the following explicit cast. */
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wcast-qual"
+#endif /* __GNUC__ */
 	p->next = (char *)pattern;	/* convenience; we do not modify it */
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif /* __GNUC__ */
 	p->end = p->next + len;
 	p->error = 0;
 	p->ncsalloc = 0;
diff --git a/llvm/lib/Support/regex_impl.h b/llvm/lib/Support/regex_impl.h
index 8ddac7dcf998..8f0c532205ed 100644
--- a/llvm/lib/Support/regex_impl.h
+++ b/llvm/lib/Support/regex_impl.h
@@ -35,8 +35,8 @@
  *	@(#)regex.h	8.1 (Berkeley) 6/2/93
  */
 
-#ifndef _REGEX_H_
-#define	_REGEX_H_
+#ifndef LLVM_SUPPORT_REGEX_IMPL_H
+#define LLVM_SUPPORT_REGEX_IMPL_H
 
 #include <sys/types.h>
 typedef off_t llvm_regoff_t;
@@ -105,4 +105,4 @@ size_t  llvm_strlcpy(char *dst, const char *src, size_t siz);
 }
 #endif
 
-#endif /* !_REGEX_H_ */
+#endif /* LLVM_SUPPORT_REGEX_IMPL_H */
diff --git a/llvm/lib/Support/xxhash.cpp b/llvm/lib/Support/xxhash.cpp
index 9a3f5faa336b..577f14189caf 100644
--- a/llvm/lib/Support/xxhash.cpp
+++ b/llvm/lib/Support/xxhash.cpp
@@ -1,6 +1,6 @@
 /*
 *  xxHash - Fast Hash algorithm
-*  Copyright (C) 2012-2016, Yann Collet
+*  Copyright (C) 2012-2021, Yann Collet
 *
 *  BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
 *
@@ -32,10 +32,14 @@
 *  - xxHash source repository : https://github.com/Cyan4973/xxHash
 */
 
-/* based on revision d2df04efcbef7d7f6886d345861e5dfda4edacc1 Removed
- * everything but a simple interface for computing XXh64. */
+// xxhash64 is based on commit d2df04efcbef7d7f6886d345861e5dfda4edacc1. Removed
+// everything but a simple interface for computing xxh64.
+
+// xxh3_64bits is based on commit d5891596637d21366b9b1dcf2c0007a3edb26a9e (July
+// 2023).
 
 #include "llvm/Support/xxhash.h"
+#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Endian.h"
 
 #include <stdlib.h>
@@ -47,6 +51,10 @@ static uint64_t rotl64(uint64_t X, size_t R) {
   return (X << R) | (X >> (64 - R));
 }
 
+constexpr uint32_t PRIME32_1 = 0x9E3779B1;
+constexpr uint32_t PRIME32_2 = 0x85EBCA77;
+constexpr uint32_t PRIME32_3 = 0xC2B2AE3D;
+
 static const uint64_t PRIME64_1 = 11400714785074694791ULL;
 static const uint64_t PRIME64_2 = 14029467366897019727ULL;
 static const uint64_t PRIME64_3 = 1609587929392839161ULL;
@@ -67,6 +75,15 @@ static uint64_t mergeRound(uint64_t Acc, uint64_t Val) {
   return Acc;
 }
 
+static uint64_t XXH64_avalanche(uint64_t hash) {
+  hash ^= hash >> 33;
+  hash *= PRIME64_2;
+  hash ^= hash >> 29;
+  hash *= PRIME64_3;
+  hash ^= hash >> 32;
+  return hash;
+}
+
 uint64_t llvm::xxHash64(StringRef Data) {
   size_t Len = Data.size();
   uint64_t Seed = 0;
@@ -104,14 +121,15 @@ uint64_t llvm::xxHash64(StringRef Data) {
 
   H64 += (uint64_t)Len;
 
-  while (P + 8 <= BEnd) {
+  while (reinterpret_cast<uintptr_t>(P) + 8 <=
+         reinterpret_cast<uintptr_t>(BEnd)) {
     uint64_t const K1 = round(0, endian::read64le(P));
     H64 ^= K1;
     H64 = rotl64(H64, 27) * PRIME64_1 + PRIME64_4;
     P += 8;
   }
 
-  if (P + 4 <= BEnd) {
+  if (reinterpret_cast<uintptr_t>(P) + 4 <= reinterpret_cast<uintptr_t>(BEnd)) {
     H64 ^= (uint64_t)(endian::read32le(P)) * PRIME64_1;
     H64 = rotl64(H64, 23) * PRIME64_2 + PRIME64_3;
     P += 4;
@@ -123,15 +141,267 @@ uint64_t llvm::xxHash64(StringRef Data) {
     P++;
   }
 
-  H64 ^= H64 >> 33;
-  H64 *= PRIME64_2;
-  H64 ^= H64 >> 29;
-  H64 *= PRIME64_3;
-  H64 ^= H64 >> 32;
-
-  return H64;
+  return XXH64_avalanche(H64);
 }
 
 uint64_t llvm::xxHash64(ArrayRef<uint8_t> Data) {
   return xxHash64({(const char *)Data.data(), Data.size()});
 }
+
+constexpr size_t XXH3_SECRETSIZE_MIN = 136;
+constexpr size_t XXH_SECRET_DEFAULT_SIZE = 192;
+
+/* Pseudorandom data taken directly from FARSH */
+// clang-format off
+constexpr uint8_t kSecret[XXH_SECRET_DEFAULT_SIZE] = {
+    0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c,
+    0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f,
+    0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21,
+    0xb8, 0x08, 0x46, 0x74, 0xf7, 0x43, 0x24, 0x8e, 0xe0, 0x35, 0x90, 0xe6, 0x81, 0x3a, 0x26, 0x4c,
+    0x3c, 0x28, 0x52, 0xbb, 0x91, 0xc3, 0x00, 0xcb, 0x88, 0xd0, 0x65, 0x8b, 0x1b, 0x53, 0x2e, 0xa3,
+    0x71, 0x64, 0x48, 0x97, 0xa2, 0x0d, 0xf9, 0x4e, 0x38, 0x19, 0xef, 0x46, 0xa9, 0xde, 0xac, 0xd8,
+    0xa8, 0xfa, 0x76, 0x3f, 0xe3, 0x9c, 0x34, 0x3f, 0xf9, 0xdc, 0xbb, 0xc7, 0xc7, 0x0b, 0x4f, 0x1d,
+    0x8a, 0x51, 0xe0, 0x4b, 0xcd, 0xb4, 0x59, 0x31, 0xc8, 0x9f, 0x7e, 0xc9, 0xd9, 0x78, 0x73, 0x64,
+    0xea, 0xc5, 0xac, 0x83, 0x34, 0xd3, 0xeb, 0xc3, 0xc5, 0x81, 0xa0, 0xff, 0xfa, 0x13, 0x63, 0xeb,
+    0x17, 0x0d, 0xdd, 0x51, 0xb7, 0xf0, 0xda, 0x49, 0xd3, 0x16, 0x55, 0x26, 0x29, 0xd4, 0x68, 0x9e,
+    0x2b, 0x16, 0xbe, 0x58, 0x7d, 0x47, 0xa1, 0xfc, 0x8f, 0xf8, 0xb8, 0xd1, 0x7a, 0xd0, 0x31, 0xce,
+    0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e,
+};
+// clang-format on
+
+constexpr uint64_t PRIME_MX1 = 0x165667919E3779F9;
+constexpr uint64_t PRIME_MX2 = 0x9FB21C651E98DF25;
+
+// Calculates a 64-bit to 128-bit multiply, then XOR folds it.
+static uint64_t XXH3_mul128_fold64(uint64_t lhs, uint64_t rhs) {
+#if defined(__SIZEOF_INT128__) ||                                              \
+    (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128)
+  __uint128_t product = (__uint128_t)lhs * (__uint128_t)rhs;
+  return uint64_t(product) ^ uint64_t(product >> 64);
+
+#else
+  /* First calculate all of the cross products. */
+  const uint64_t lo_lo = (lhs & 0xFFFFFFFF) * (rhs & 0xFFFFFFFF);
+  const uint64_t hi_lo = (lhs >> 32) * (rhs & 0xFFFFFFFF);
+  const uint64_t lo_hi = (lhs & 0xFFFFFFFF) * (rhs >> 32);
+  const uint64_t hi_hi = (lhs >> 32) * (rhs >> 32);
+
+  /* Now add the products together. These will never overflow. */
+  const uint64_t cross = (lo_lo >> 32) + (hi_lo & 0xFFFFFFFF) + lo_hi;
+  const uint64_t upper = (hi_lo >> 32) + (cross >> 32) + hi_hi;
+  const uint64_t lower = (cross << 32) | (lo_lo & 0xFFFFFFFF);
+
+  return upper ^ lower;
+#endif
+}
+
+constexpr size_t XXH_STRIPE_LEN = 64;
+constexpr size_t XXH_SECRET_CONSUME_RATE = 8;
+constexpr size_t XXH_ACC_NB = XXH_STRIPE_LEN / sizeof(uint64_t);
+
+static uint64_t XXH3_avalanche(uint64_t hash) {
+  hash ^= hash >> 37;
+  hash *= PRIME_MX1;
+  hash ^= hash >> 32;
+  return hash;
+}
+
+static uint64_t XXH3_len_1to3_64b(const uint8_t *input, size_t len,
+                                  const uint8_t *secret, uint64_t seed) {
+  const uint8_t c1 = input[0];
+  const uint8_t c2 = input[len >> 1];
+  const uint8_t c3 = input[len - 1];
+  uint32_t combined = ((uint32_t)c1 << 16) | ((uint32_t)c2 << 24) |
+                      ((uint32_t)c3 << 0) | ((uint32_t)len << 8);
+  uint64_t bitflip =
+      (uint64_t)(endian::read32le(secret) ^ endian::read32le(secret + 4)) +
+      seed;
+  return XXH64_avalanche(uint64_t(combined) ^ bitflip);
+}
+
+static uint64_t XXH3_len_4to8_64b(const uint8_t *input, size_t len,
+                                  const uint8_t *secret, uint64_t seed) {
+  seed ^= (uint64_t)byteswap(uint32_t(seed)) << 32;
+  const uint32_t input1 = endian::read32le(input);
+  const uint32_t input2 = endian::read32le(input + len - 4);
+  uint64_t acc =
+      (endian::read64le(secret + 8) ^ endian::read64le(secret + 16)) - seed;
+  const uint64_t input64 = (uint64_t)input2 | ((uint64_t)input1 << 32);
+  acc ^= input64;
+  // XXH3_rrmxmx(acc, len)
+  acc ^= rotl64(acc, 49) ^ rotl64(acc, 24);
+  acc *= PRIME_MX2;
+  acc ^= (acc >> 35) + (uint64_t)len;
+  acc *= PRIME_MX2;
+  return acc ^ (acc >> 28);
+}
+
+static uint64_t XXH3_len_9to16_64b(const uint8_t *input, size_t len,
+                                   const uint8_t *secret, uint64_t const seed) {
+  uint64_t input_lo =
+      (endian::read64le(secret + 24) ^ endian::read64le(secret + 32)) + seed;
+  uint64_t input_hi =
+      (endian::read64le(secret + 40) ^ endian::read64le(secret + 48)) - seed;
+  input_lo ^= endian::read64le(input);
+  input_hi ^= endian::read64le(input + len - 8);
+  uint64_t acc = uint64_t(len) + byteswap(input_lo) + input_hi +
+                 XXH3_mul128_fold64(input_lo, input_hi);
+  return XXH3_avalanche(acc);
+}
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE
+static uint64_t XXH3_len_0to16_64b(const uint8_t *input, size_t len,
+                                   const uint8_t *secret, uint64_t const seed) {
+  if (LLVM_LIKELY(len > 8))
+    return XXH3_len_9to16_64b(input, len, secret, seed);
+  if (LLVM_LIKELY(len >= 4))
+    return XXH3_len_4to8_64b(input, len, secret, seed);
+  if (len != 0)
+    return XXH3_len_1to3_64b(input, len, secret, seed);
+  return XXH64_avalanche(seed ^ endian::read64le(secret + 56) ^
+                         endian::read64le(secret + 64));
+}
+
+static uint64_t XXH3_mix16B(const uint8_t *input, uint8_t const *secret,
+                            uint64_t seed) {
+  uint64_t lhs = seed;
+  uint64_t rhs = 0U - seed;
+  lhs += endian::read64le(secret);
+  rhs += endian::read64le(secret + 8);
+  lhs ^= endian::read64le(input);
+  rhs ^= endian::read64le(input + 8);
+  return XXH3_mul128_fold64(lhs, rhs);
+}
+
+/* For mid range keys, XXH3 uses a Mum-hash variant. */
+LLVM_ATTRIBUTE_ALWAYS_INLINE
+static uint64_t XXH3_len_17to128_64b(const uint8_t *input, size_t len,
+                                     const uint8_t *secret,
+                                     uint64_t const seed) {
+  uint64_t acc = len * PRIME64_1, acc_end;
+  acc += XXH3_mix16B(input + 0, secret + 0, seed);
+  acc_end = XXH3_mix16B(input + len - 16, secret + 16, seed);
+  if (len > 32) {
+    acc += XXH3_mix16B(input + 16, secret + 32, seed);
+    acc_end += XXH3_mix16B(input + len - 32, secret + 48, seed);
+    if (len > 64) {
+      acc += XXH3_mix16B(input + 32, secret + 64, seed);
+      acc_end += XXH3_mix16B(input + len - 48, secret + 80, seed);
+      if (len > 96) {
+        acc += XXH3_mix16B(input + 48, secret + 96, seed);
+        acc_end += XXH3_mix16B(input + len - 64, secret + 112, seed);
+      }
+    }
+  }
+  return XXH3_avalanche(acc + acc_end);
+}
+
+constexpr size_t XXH3_MIDSIZE_MAX = 240;
+
+LLVM_ATTRIBUTE_NOINLINE
+static uint64_t XXH3_len_129to240_64b(const uint8_t *input, size_t len,
+                                      const uint8_t *secret, uint64_t seed) {
+  constexpr size_t XXH3_MIDSIZE_STARTOFFSET = 3;
+  constexpr size_t XXH3_MIDSIZE_LASTOFFSET = 17;
+  uint64_t acc = (uint64_t)len * PRIME64_1;
+  const unsigned nbRounds = len / 16;
+  for (unsigned i = 0; i < 8; ++i)
+    acc += XXH3_mix16B(input + 16 * i, secret + 16 * i, seed);
+  acc = XXH3_avalanche(acc);
+
+  for (unsigned i = 8; i < nbRounds; ++i) {
+    acc += XXH3_mix16B(input + 16 * i,
+                       secret + 16 * (i - 8) + XXH3_MIDSIZE_STARTOFFSET, seed);
+  }
+  /* last bytes */
+  acc +=
+      XXH3_mix16B(input + len - 16,
+                  secret + XXH3_SECRETSIZE_MIN - XXH3_MIDSIZE_LASTOFFSET, seed);
+  return XXH3_avalanche(acc);
+}
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE
+static void XXH3_accumulate_512_scalar(uint64_t *acc, const uint8_t *input,
+                                       const uint8_t *secret) {
+  for (size_t i = 0; i < XXH_ACC_NB; ++i) {
+    uint64_t data_val = endian::read64le(input + 8 * i);
+    uint64_t data_key = data_val ^ endian::read64le(secret + 8 * i);
+    acc[i ^ 1] += data_val;
+    acc[i] += uint32_t(data_key) * (data_key >> 32);
+  }
+}
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE
+static void XXH3_accumulate_scalar(uint64_t *acc, const uint8_t *input,
+                                   const uint8_t *secret, size_t nbStripes) {
+  for (size_t n = 0; n < nbStripes; ++n)
+    XXH3_accumulate_512_scalar(acc, input + n * XXH_STRIPE_LEN,
+                               secret + n * XXH_SECRET_CONSUME_RATE);
+}
+
+static void XXH3_scrambleAcc(uint64_t *acc, const uint8_t *secret) {
+  for (size_t i = 0; i < XXH_ACC_NB; ++i) {
+    acc[i] ^= acc[i] >> 47;
+    acc[i] ^= endian::read64le(secret + 8 * i);
+    acc[i] *= PRIME32_1;
+  }
+}
+
+static uint64_t XXH3_mix2Accs(const uint64_t *acc, const uint8_t *secret) {
+  return XXH3_mul128_fold64(acc[0] ^ endian::read64le(secret),
+                            acc[1] ^ endian::read64le(secret + 8));
+}
+
+static uint64_t XXH3_mergeAccs(const uint64_t *acc, const uint8_t *key,
+                               uint64_t start) {
+  uint64_t result64 = start;
+  for (size_t i = 0; i < 4; ++i)
+    result64 += XXH3_mix2Accs(acc + 2 * i, key + 16 * i);
+  return XXH3_avalanche(result64);
+}
+
+LLVM_ATTRIBUTE_NOINLINE
+static uint64_t XXH3_hashLong_64b(const uint8_t *input, size_t len,
+                                  const uint8_t *secret, size_t secretSize) {
+  const size_t nbStripesPerBlock =
+      (secretSize - XXH_STRIPE_LEN) / XXH_SECRET_CONSUME_RATE;
+  const size_t block_len = XXH_STRIPE_LEN * nbStripesPerBlock;
+  const size_t nb_blocks = (len - 1) / block_len;
+  alignas(16) uint64_t acc[XXH_ACC_NB] = {
+      PRIME32_3, PRIME64_1, PRIME64_2, PRIME64_3,
+      PRIME64_4, PRIME32_2, PRIME64_5, PRIME32_1,
+  };
+  for (size_t n = 0; n < nb_blocks; ++n) {
+    XXH3_accumulate_scalar(acc, input + n * block_len, secret,
+                           nbStripesPerBlock);
+    XXH3_scrambleAcc(acc, secret + secretSize - XXH_STRIPE_LEN);
+  }
+
+  /* last partial block */
+  const size_t nbStripes = (len - 1 - (block_len * nb_blocks)) / XXH_STRIPE_LEN;
+  assert(nbStripes <= secretSize / XXH_SECRET_CONSUME_RATE);
+  XXH3_accumulate_scalar(acc, input + nb_blocks * block_len, secret, nbStripes);
+
+  /* last stripe */
+  constexpr size_t XXH_SECRET_LASTACC_START = 7;
+  XXH3_accumulate_512_scalar(acc, input + len - XXH_STRIPE_LEN,
+                             secret + secretSize - XXH_STRIPE_LEN -
+                                 XXH_SECRET_LASTACC_START);
+
+  /* converge into final hash */
+  constexpr size_t XXH_SECRET_MERGEACCS_START = 11;
+  return XXH3_mergeAccs(acc, secret + XXH_SECRET_MERGEACCS_START,
+                        (uint64_t)len * PRIME64_1);
+}
+
+uint64_t llvm::xxh3_64bits(ArrayRef<uint8_t> data) {
+  auto *in = data.data();
+  size_t len = data.size();
+  if (len <= 16)
+    return XXH3_len_0to16_64b(in, len, kSecret, 0);
+  if (len <= 128)
+    return XXH3_len_17to128_64b(in, len, kSecret, 0);
+  if (len <= XXH3_MIDSIZE_MAX)
+    return XXH3_len_129to240_64b(in, len, kSecret, 0);
+  return XXH3_hashLong_64b(in, len, kSecret, sizeof(kSecret));
+}