diff options
Diffstat (limited to 'llvm/lib/Support')
70 files changed, 3132 insertions, 1999 deletions
diff --git a/llvm/lib/Support/APFloat.cpp b/llvm/lib/Support/APFloat.cpp index eae4fdb6c3d0..4a73739b5282 100644 --- a/llvm/lib/Support/APFloat.cpp +++ b/llvm/lib/Support/APFloat.cpp @@ -14,8 +14,10 @@ #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APSInt.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/FloatingPointMode.h" #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/Hashing.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/Config/llvm-config.h" @@ -51,209 +53,303 @@ static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisi namespace llvm { - // How the nonfinite values Inf and NaN are represented. - enum class fltNonfiniteBehavior { - // Represents standard IEEE 754 behavior. A value is nonfinite if the - // exponent field is all 1s. In such cases, a value is Inf if the - // significand bits are all zero, and NaN otherwise - IEEE754, - - // Only the Float8E5M2 has this behavior. There is no Inf representation. A - // value is NaN if the exponent field and the mantissa field are all 1s. - // This behavior matches the FP8 E4M3 type described in - // https://arxiv.org/abs/2209.05433. We treat both signed and unsigned NaNs - // as non-signalling, although the paper does not state whether the NaN - // values are signalling or not. - NanOnly, - }; +// How the nonfinite values Inf and NaN are represented. +enum class fltNonfiniteBehavior { + // Represents standard IEEE 754 behavior. A value is nonfinite if the + // exponent field is all 1s. In such cases, a value is Inf if the + // significand bits are all zero, and NaN otherwise + IEEE754, + + // This behavior is present in the Float8ExMyFN* types (Float8E4M3FN, + // Float8E5M2FNUZ, Float8E4M3FNUZ, and Float8E4M3B11FNUZ). There is no + // representation for Inf, and operations that would ordinarily produce Inf + // produce NaN instead. + // The details of the NaN representation(s) in this form are determined by the + // `fltNanEncoding` enum. We treat all NaNs as quiet, as the available + // encodings do not distinguish between signalling and quiet NaN. + NanOnly, +}; - /* Represents floating point arithmetic semantics. */ - struct fltSemantics { - /* The largest E such that 2^E is representable; this matches the - definition of IEEE 754. */ - APFloatBase::ExponentType maxExponent; +// How NaN values are represented. This is curently only used in combination +// with fltNonfiniteBehavior::NanOnly, and using a variant other than IEEE +// while having IEEE non-finite behavior is liable to lead to unexpected +// results. +enum class fltNanEncoding { + // Represents the standard IEEE behavior where a value is NaN if its + // exponent is all 1s and the significand is non-zero. + IEEE, + + // Represents the behavior in the Float8E4M3 floating point type where NaN is + // represented by having the exponent and mantissa set to all 1s. + // This behavior matches the FP8 E4M3 type described in + // https://arxiv.org/abs/2209.05433. We treat both signed and unsigned NaNs + // as non-signalling, although the paper does not state whether the NaN + // values are signalling or not. + AllOnes, + + // Represents the behavior in Float8E{5,4}E{2,3}FNUZ floating point types + // where NaN is represented by a sign bit of 1 and all 0s in the exponent + // and mantissa (i.e. the negative zero encoding in a IEEE float). Since + // there is only one NaN value, it is treated as quiet NaN. This matches the + // behavior described in https://arxiv.org/abs/2206.02915 . + NegativeZero, +}; - /* The smallest E such that 2^E is a normalized number; this - matches the definition of IEEE 754. */ - APFloatBase::ExponentType minExponent; +/* Represents floating point arithmetic semantics. */ +struct fltSemantics { + /* The largest E such that 2^E is representable; this matches the + definition of IEEE 754. */ + APFloatBase::ExponentType maxExponent; - /* Number of bits in the significand. This includes the integer - bit. */ - unsigned int precision; + /* The smallest E such that 2^E is a normalized number; this + matches the definition of IEEE 754. */ + APFloatBase::ExponentType minExponent; - /* Number of bits actually used in the semantics. */ - unsigned int sizeInBits; + /* Number of bits in the significand. This includes the integer + bit. */ + unsigned int precision; - fltNonfiniteBehavior nonFiniteBehavior = fltNonfiniteBehavior::IEEE754; + /* Number of bits actually used in the semantics. */ + unsigned int sizeInBits; - // Returns true if any number described by this semantics can be precisely - // represented by the specified semantics. Does not take into account - // the value of fltNonfiniteBehavior. - bool isRepresentableBy(const fltSemantics &S) const { - return maxExponent <= S.maxExponent && minExponent >= S.minExponent && - precision <= S.precision; - } - }; + fltNonfiniteBehavior nonFiniteBehavior = fltNonfiniteBehavior::IEEE754; - static const fltSemantics semIEEEhalf = {15, -14, 11, 16}; - static const fltSemantics semBFloat = {127, -126, 8, 16}; - static const fltSemantics semIEEEsingle = {127, -126, 24, 32}; - static const fltSemantics semIEEEdouble = {1023, -1022, 53, 64}; - static const fltSemantics semIEEEquad = {16383, -16382, 113, 128}; - static const fltSemantics semFloat8E5M2 = {15, -14, 3, 8}; - static const fltSemantics semFloat8E4M3FN = {8, -6, 4, 8, - fltNonfiniteBehavior::NanOnly}; - static const fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80}; - static const fltSemantics semBogus = {0, 0, 0, 0}; - - /* The IBM double-double semantics. Such a number consists of a pair of IEEE - 64-bit doubles (Hi, Lo), where |Hi| > |Lo|, and if normal, - (double)(Hi + Lo) == Hi. The numeric value it's modeling is Hi + Lo. - Therefore it has two 53-bit mantissa parts that aren't necessarily adjacent - to each other, and two 11-bit exponents. - - Note: we need to make the value different from semBogus as otherwise - an unsafe optimization may collapse both values to a single address, - and we heavily rely on them having distinct addresses. */ - static const fltSemantics semPPCDoubleDouble = {-1, 0, 0, 128}; - - /* These are legacy semantics for the fallback, inaccrurate implementation of - IBM double-double, if the accurate semPPCDoubleDouble doesn't handle the - operation. It's equivalent to having an IEEE number with consecutive 106 - bits of mantissa and 11 bits of exponent. - - It's not equivalent to IBM double-double. For example, a legit IBM - double-double, 1 + epsilon: - - 1 + epsilon = 1 + (1 >> 1076) - - is not representable by a consecutive 106 bits of mantissa. - - Currently, these semantics are used in the following way: - - semPPCDoubleDouble -> (IEEEdouble, IEEEdouble) -> - (64-bit APInt, 64-bit APInt) -> (128-bit APInt) -> - semPPCDoubleDoubleLegacy -> IEEE operations - - We use bitcastToAPInt() to get the bit representation (in APInt) of the - underlying IEEEdouble, then use the APInt constructor to construct the - legacy IEEE float. - - TODO: Implement all operations in semPPCDoubleDouble, and delete these - semantics. */ - static const fltSemantics semPPCDoubleDoubleLegacy = {1023, -1022 + 53, - 53 + 53, 128}; - - const llvm::fltSemantics &APFloatBase::EnumToSemantics(Semantics S) { - switch (S) { - case S_IEEEhalf: - return IEEEhalf(); - case S_BFloat: - return BFloat(); - case S_IEEEsingle: - return IEEEsingle(); - case S_IEEEdouble: - return IEEEdouble(); - case S_IEEEquad: - return IEEEquad(); - case S_PPCDoubleDouble: - return PPCDoubleDouble(); - case S_Float8E5M2: - return Float8E5M2(); - case S_Float8E4M3FN: - return Float8E4M3FN(); - case S_x87DoubleExtended: - return x87DoubleExtended(); - } - llvm_unreachable("Unrecognised floating semantics"); - } - - APFloatBase::Semantics - APFloatBase::SemanticsToEnum(const llvm::fltSemantics &Sem) { - if (&Sem == &llvm::APFloat::IEEEhalf()) - return S_IEEEhalf; - else if (&Sem == &llvm::APFloat::BFloat()) - return S_BFloat; - else if (&Sem == &llvm::APFloat::IEEEsingle()) - return S_IEEEsingle; - else if (&Sem == &llvm::APFloat::IEEEdouble()) - return S_IEEEdouble; - else if (&Sem == &llvm::APFloat::IEEEquad()) - return S_IEEEquad; - else if (&Sem == &llvm::APFloat::PPCDoubleDouble()) - return S_PPCDoubleDouble; - else if (&Sem == &llvm::APFloat::Float8E5M2()) - return S_Float8E5M2; - else if (&Sem == &llvm::APFloat::Float8E4M3FN()) - return S_Float8E4M3FN; - else if (&Sem == &llvm::APFloat::x87DoubleExtended()) - return S_x87DoubleExtended; - else - llvm_unreachable("Unknown floating semantics"); + fltNanEncoding nanEncoding = fltNanEncoding::IEEE; + // Returns true if any number described by this semantics can be precisely + // represented by the specified semantics. Does not take into account + // the value of fltNonfiniteBehavior. + bool isRepresentableBy(const fltSemantics &S) const { + return maxExponent <= S.maxExponent && minExponent >= S.minExponent && + precision <= S.precision; } +}; - const fltSemantics &APFloatBase::IEEEhalf() { - return semIEEEhalf; - } - const fltSemantics &APFloatBase::BFloat() { - return semBFloat; - } - const fltSemantics &APFloatBase::IEEEsingle() { - return semIEEEsingle; - } - const fltSemantics &APFloatBase::IEEEdouble() { - return semIEEEdouble; - } - const fltSemantics &APFloatBase::IEEEquad() { return semIEEEquad; } - const fltSemantics &APFloatBase::PPCDoubleDouble() { - return semPPCDoubleDouble; - } - const fltSemantics &APFloatBase::Float8E5M2() { return semFloat8E5M2; } - const fltSemantics &APFloatBase::Float8E4M3FN() { return semFloat8E4M3FN; } - const fltSemantics &APFloatBase::x87DoubleExtended() { - return semX87DoubleExtended; - } - const fltSemantics &APFloatBase::Bogus() { return semBogus; } +static constexpr fltSemantics semIEEEhalf = {15, -14, 11, 16}; +static constexpr fltSemantics semBFloat = {127, -126, 8, 16}; +static constexpr fltSemantics semIEEEsingle = {127, -126, 24, 32}; +static constexpr fltSemantics semIEEEdouble = {1023, -1022, 53, 64}; +static constexpr fltSemantics semIEEEquad = {16383, -16382, 113, 128}; +static constexpr fltSemantics semFloat8E5M2 = {15, -14, 3, 8}; +static constexpr fltSemantics semFloat8E5M2FNUZ = { + 15, -15, 3, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero}; +static constexpr fltSemantics semFloat8E4M3FN = { + 8, -6, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::AllOnes}; +static constexpr fltSemantics semFloat8E4M3FNUZ = { + 7, -7, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero}; +static constexpr fltSemantics semFloat8E4M3B11FNUZ = { + 4, -10, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero}; +static constexpr fltSemantics semFloatTF32 = {127, -126, 11, 19}; +static constexpr fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80}; +static constexpr fltSemantics semBogus = {0, 0, 0, 0}; + +/* The IBM double-double semantics. Such a number consists of a pair of IEEE + 64-bit doubles (Hi, Lo), where |Hi| > |Lo|, and if normal, + (double)(Hi + Lo) == Hi. The numeric value it's modeling is Hi + Lo. + Therefore it has two 53-bit mantissa parts that aren't necessarily adjacent + to each other, and two 11-bit exponents. + + Note: we need to make the value different from semBogus as otherwise + an unsafe optimization may collapse both values to a single address, + and we heavily rely on them having distinct addresses. */ +static constexpr fltSemantics semPPCDoubleDouble = {-1, 0, 0, 128}; + +/* These are legacy semantics for the fallback, inaccrurate implementation of + IBM double-double, if the accurate semPPCDoubleDouble doesn't handle the + operation. It's equivalent to having an IEEE number with consecutive 106 + bits of mantissa and 11 bits of exponent. + + It's not equivalent to IBM double-double. For example, a legit IBM + double-double, 1 + epsilon: + + 1 + epsilon = 1 + (1 >> 1076) + + is not representable by a consecutive 106 bits of mantissa. + + Currently, these semantics are used in the following way: + + semPPCDoubleDouble -> (IEEEdouble, IEEEdouble) -> + (64-bit APInt, 64-bit APInt) -> (128-bit APInt) -> + semPPCDoubleDoubleLegacy -> IEEE operations + + We use bitcastToAPInt() to get the bit representation (in APInt) of the + underlying IEEEdouble, then use the APInt constructor to construct the + legacy IEEE float. + + TODO: Implement all operations in semPPCDoubleDouble, and delete these + semantics. */ +static constexpr fltSemantics semPPCDoubleDoubleLegacy = {1023, -1022 + 53, + 53 + 53, 128}; + +const llvm::fltSemantics &APFloatBase::EnumToSemantics(Semantics S) { + switch (S) { + case S_IEEEhalf: + return IEEEhalf(); + case S_BFloat: + return BFloat(); + case S_IEEEsingle: + return IEEEsingle(); + case S_IEEEdouble: + return IEEEdouble(); + case S_IEEEquad: + return IEEEquad(); + case S_PPCDoubleDouble: + return PPCDoubleDouble(); + case S_Float8E5M2: + return Float8E5M2(); + case S_Float8E5M2FNUZ: + return Float8E5M2FNUZ(); + case S_Float8E4M3FN: + return Float8E4M3FN(); + case S_Float8E4M3FNUZ: + return Float8E4M3FNUZ(); + case S_Float8E4M3B11FNUZ: + return Float8E4M3B11FNUZ(); + case S_FloatTF32: + return FloatTF32(); + case S_x87DoubleExtended: + return x87DoubleExtended(); + } + llvm_unreachable("Unrecognised floating semantics"); +} + +APFloatBase::Semantics +APFloatBase::SemanticsToEnum(const llvm::fltSemantics &Sem) { + if (&Sem == &llvm::APFloat::IEEEhalf()) + return S_IEEEhalf; + else if (&Sem == &llvm::APFloat::BFloat()) + return S_BFloat; + else if (&Sem == &llvm::APFloat::IEEEsingle()) + return S_IEEEsingle; + else if (&Sem == &llvm::APFloat::IEEEdouble()) + return S_IEEEdouble; + else if (&Sem == &llvm::APFloat::IEEEquad()) + return S_IEEEquad; + else if (&Sem == &llvm::APFloat::PPCDoubleDouble()) + return S_PPCDoubleDouble; + else if (&Sem == &llvm::APFloat::Float8E5M2()) + return S_Float8E5M2; + else if (&Sem == &llvm::APFloat::Float8E5M2FNUZ()) + return S_Float8E5M2FNUZ; + else if (&Sem == &llvm::APFloat::Float8E4M3FN()) + return S_Float8E4M3FN; + else if (&Sem == &llvm::APFloat::Float8E4M3FNUZ()) + return S_Float8E4M3FNUZ; + else if (&Sem == &llvm::APFloat::Float8E4M3B11FNUZ()) + return S_Float8E4M3B11FNUZ; + else if (&Sem == &llvm::APFloat::FloatTF32()) + return S_FloatTF32; + else if (&Sem == &llvm::APFloat::x87DoubleExtended()) + return S_x87DoubleExtended; + else + llvm_unreachable("Unknown floating semantics"); +} + +const fltSemantics &APFloatBase::IEEEhalf() { return semIEEEhalf; } +const fltSemantics &APFloatBase::BFloat() { return semBFloat; } +const fltSemantics &APFloatBase::IEEEsingle() { return semIEEEsingle; } +const fltSemantics &APFloatBase::IEEEdouble() { return semIEEEdouble; } +const fltSemantics &APFloatBase::IEEEquad() { return semIEEEquad; } +const fltSemantics &APFloatBase::PPCDoubleDouble() { + return semPPCDoubleDouble; +} +const fltSemantics &APFloatBase::Float8E5M2() { return semFloat8E5M2; } +const fltSemantics &APFloatBase::Float8E5M2FNUZ() { return semFloat8E5M2FNUZ; } +const fltSemantics &APFloatBase::Float8E4M3FN() { return semFloat8E4M3FN; } +const fltSemantics &APFloatBase::Float8E4M3FNUZ() { return semFloat8E4M3FNUZ; } +const fltSemantics &APFloatBase::Float8E4M3B11FNUZ() { + return semFloat8E4M3B11FNUZ; +} +const fltSemantics &APFloatBase::FloatTF32() { return semFloatTF32; } +const fltSemantics &APFloatBase::x87DoubleExtended() { + return semX87DoubleExtended; +} +const fltSemantics &APFloatBase::Bogus() { return semBogus; } + +constexpr RoundingMode APFloatBase::rmNearestTiesToEven; +constexpr RoundingMode APFloatBase::rmTowardPositive; +constexpr RoundingMode APFloatBase::rmTowardNegative; +constexpr RoundingMode APFloatBase::rmTowardZero; +constexpr RoundingMode APFloatBase::rmNearestTiesToAway; + +/* A tight upper bound on number of parts required to hold the value + pow(5, power) is + + power * 815 / (351 * integerPartWidth) + 1 + + However, whilst the result may require only this many parts, + because we are multiplying two values to get it, the + multiplication may require an extra part with the excess part + being zero (consider the trivial case of 1 * 1, tcFullMultiply + requires two parts to hold the single-part result). So we add an + extra one to guarantee enough space whilst multiplying. */ +const unsigned int maxExponent = 16383; +const unsigned int maxPrecision = 113; +const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1; +const unsigned int maxPowerOfFiveParts = + 2 + + ((maxPowerOfFiveExponent * 815) / (351 * APFloatBase::integerPartWidth)); + +unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) { + return semantics.precision; +} +APFloatBase::ExponentType +APFloatBase::semanticsMaxExponent(const fltSemantics &semantics) { + return semantics.maxExponent; +} +APFloatBase::ExponentType +APFloatBase::semanticsMinExponent(const fltSemantics &semantics) { + return semantics.minExponent; +} +unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) { + return semantics.sizeInBits; +} +unsigned int APFloatBase::semanticsIntSizeInBits(const fltSemantics &semantics, + bool isSigned) { + // The max FP value is pow(2, MaxExponent) * (1 + MaxFraction), so we need + // at least one more bit than the MaxExponent to hold the max FP value. + unsigned int MinBitWidth = semanticsMaxExponent(semantics) + 1; + // Extra sign bit needed. + if (isSigned) + ++MinBitWidth; + return MinBitWidth; +} + +bool APFloatBase::isRepresentableAsNormalIn(const fltSemantics &Src, + const fltSemantics &Dst) { + // Exponent range must be larger. + if (Src.maxExponent >= Dst.maxExponent || Src.minExponent <= Dst.minExponent) + return false; - constexpr RoundingMode APFloatBase::rmNearestTiesToEven; - constexpr RoundingMode APFloatBase::rmTowardPositive; - constexpr RoundingMode APFloatBase::rmTowardNegative; - constexpr RoundingMode APFloatBase::rmTowardZero; - constexpr RoundingMode APFloatBase::rmNearestTiesToAway; + // If the mantissa is long enough, the result value could still be denormal + // with a larger exponent range. + // + // FIXME: This condition is probably not accurate but also shouldn't be a + // practical concern with existing types. + return Dst.precision >= Src.precision; +} - /* A tight upper bound on number of parts required to hold the value - pow(5, power) is +unsigned APFloatBase::getSizeInBits(const fltSemantics &Sem) { + return Sem.sizeInBits; +} - power * 815 / (351 * integerPartWidth) + 1 +static constexpr APFloatBase::ExponentType +exponentZero(const fltSemantics &semantics) { + return semantics.minExponent - 1; +} - However, whilst the result may require only this many parts, - because we are multiplying two values to get it, the - multiplication may require an extra part with the excess part - being zero (consider the trivial case of 1 * 1, tcFullMultiply - requires two parts to hold the single-part result). So we add an - extra one to guarantee enough space whilst multiplying. */ - const unsigned int maxExponent = 16383; - const unsigned int maxPrecision = 113; - const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1; - const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815) / (351 * APFloatBase::integerPartWidth)); +static constexpr APFloatBase::ExponentType +exponentInf(const fltSemantics &semantics) { + return semantics.maxExponent + 1; +} - unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) { - return semantics.precision; - } - APFloatBase::ExponentType - APFloatBase::semanticsMaxExponent(const fltSemantics &semantics) { +static constexpr APFloatBase::ExponentType +exponentNaN(const fltSemantics &semantics) { + if (semantics.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { + if (semantics.nanEncoding == fltNanEncoding::NegativeZero) + return exponentZero(semantics); return semantics.maxExponent; } - APFloatBase::ExponentType - APFloatBase::semanticsMinExponent(const fltSemantics &semantics) { - return semantics.minExponent; - } - unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) { - return semantics.sizeInBits; - } - - unsigned APFloatBase::getSizeInBits(const fltSemantics &Sem) { - return Sem.sizeInBits; + return semantics.maxExponent + 1; } /* A bunch of private, handy routines. */ @@ -262,9 +358,7 @@ static inline Error createError(const Twine &Err) { return make_error<StringError>(Err, inconvertibleErrorCode()); } -static inline unsigned int -partCountForBits(unsigned int bits) -{ +static constexpr inline unsigned int partCountForBits(unsigned int bits) { return ((bits) + APFloatBase::integerPartWidth - 1) / APFloatBase::integerPartWidth; } @@ -509,7 +603,7 @@ trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end, /* If we ran off the end it is exactly zero or one-half, otherwise a little more. */ - if (hexDigit == -1U) + if (hexDigit == UINT_MAX) return digitValue == 0 ? lfExactlyZero: lfExactlyHalf; else return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf; @@ -526,7 +620,7 @@ lostFractionThroughTruncation(const APFloatBase::integerPart *parts, lsb = APInt::tcLSB(parts, partCount); - /* Note this is guaranteed true if bits == 0, or LSB == -1U. */ + /* Note this is guaranteed true if bits == 0, or LSB == UINT_MAX. */ if (bits <= lsb) return lfExactlyZero; if (bits == lsb + 1) @@ -798,10 +892,15 @@ void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) { APInt fill_storage; if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { - // The only NaN representation is where the mantissa is all 1s, which is - // non-signalling. + // Finite-only types do not distinguish signalling and quiet NaN, so + // make them all signalling. SNaN = false; - fill_storage = APInt::getAllOnes(semantics->precision - 1); + if (semantics->nanEncoding == fltNanEncoding::NegativeZero) { + sign = true; + fill_storage = APInt::getZero(semantics->precision - 1); + } else { + fill_storage = APInt::getAllOnes(semantics->precision - 1); + } fill = &fill_storage; } @@ -832,6 +931,9 @@ void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) { // conventionally, this is the next bit down from the QNaN bit. if (APInt::tcIsZero(significand, numParts)) APInt::tcSetBit(significand, QNaNBit - 1); + } else if (semantics->nanEncoding == fltNanEncoding::NegativeZero) { + // The only NaN is a quiet NaN, and it has no bits sets in the significand. + // Do nothing. } else { // We always have to set the QNaN bit to make it a QNaN. APInt::tcSetBit(significand, QNaNBit); @@ -976,7 +1078,8 @@ bool IEEEFloat::isSignificandAllZerosExceptMSB() const { } bool IEEEFloat::isLargest() const { - if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { + if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly && + semantics->nanEncoding == fltNanEncoding::AllOnes) { // The largest number by magnitude in our format will be the floating point // number with maximum exponent and with significand that is all ones except // the LSB. @@ -1418,7 +1521,8 @@ IEEEFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) { exponent = semantics->maxExponent; tcSetLeastSignificantBits(significandParts(), partCount(), semantics->precision); - if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) + if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly && + semantics->nanEncoding == fltNanEncoding::AllOnes) APInt::tcClearBit(significandParts(), 0); return opInexact; @@ -1519,7 +1623,10 @@ IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode, } } + // The all-ones values is an overflow if NaN is all ones. If NaN is + // represented by negative zero, then it is a valid finite value. if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly && + semantics->nanEncoding == fltNanEncoding::AllOnes && exponent == semantics->maxExponent && isSignificandAllOnes()) return handleOverflow(rounding_mode); @@ -1530,8 +1637,11 @@ IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode, underflow for exact results. */ if (lost_fraction == lfExactlyZero) { /* Canonicalize zeroes. */ - if (omsb == 0) + if (omsb == 0) { category = fcZero; + if (semantics->nanEncoding == fltNanEncoding::NegativeZero) + sign = false; + } return opOK; } @@ -1549,18 +1659,22 @@ IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode, /* Renormalize by incrementing the exponent and shifting our significand right one. However if we already have the maximum exponent we overflow to infinity. */ - if (exponent == semantics->maxExponent) { - category = fcInfinity; - - return (opStatus) (opOverflow | opInexact); - } + if (exponent == semantics->maxExponent) + // Invoke overflow handling with a rounding mode that will guarantee + // that the result gets turned into the correct infinity representation. + // This is needed instead of just setting the category to infinity to + // account for 8-bit floating point types that have no inf, only NaN. + return handleOverflow(sign ? rmTowardNegative : rmTowardPositive); shiftSignificandRight(1); return opInexact; } + // The all-ones values is an overflow if NaN is all ones. If NaN is + // represented by negative zero, then it is a valid finite value. if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly && + semantics->nanEncoding == fltNanEncoding::AllOnes && exponent == semantics->maxExponent && isSignificandAllOnes()) return handleOverflow(rounding_mode); } @@ -1574,8 +1688,11 @@ IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode, assert(omsb < semantics->precision); /* Canonicalize zeroes. */ - if (omsb == 0) + if (omsb == 0) { category = fcZero; + if (semantics->nanEncoding == fltNanEncoding::NegativeZero) + sign = false; + } /* The fcZero case is a denormal that underflowed to zero. */ return (opStatus) (opUnderflow | opInexact); @@ -1877,6 +1994,11 @@ IEEEFloat::opStatus IEEEFloat::remainderSpecials(const IEEEFloat &rhs) { /* Change sign. */ void IEEEFloat::changeSign() { + // With NaN-as-negative-zero, neither NaN or negative zero can change + // their signs. + if (semantics->nanEncoding == fltNanEncoding::NegativeZero && + (isZero() || isNaN())) + return; /* Look mummy, this one's easy. */ sign = !sign; } @@ -1906,6 +2028,9 @@ IEEEFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs, if (category == fcZero) { if (rhs.category != fcZero || (sign == rhs.sign) == subtract) sign = (rounding_mode == rmTowardNegative); + // NaN-in-negative-zero means zeros need to be normalized to +0. + if (semantics->nanEncoding == fltNanEncoding::NegativeZero) + sign = false; } return fs; @@ -1931,6 +2056,8 @@ IEEEFloat::opStatus IEEEFloat::multiply(const IEEEFloat &rhs, sign ^= rhs.sign; fs = multiplySpecials(rhs); + if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero) + sign = false; if (isFiniteNonZero()) { lostFraction lost_fraction = multiplySignificand(rhs); fs = normalize(rounding_mode, lost_fraction); @@ -1949,6 +2076,8 @@ IEEEFloat::opStatus IEEEFloat::divide(const IEEEFloat &rhs, sign ^= rhs.sign; fs = divideSpecials(rhs); + if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero) + sign = false; if (isFiniteNonZero()) { lostFraction lost_fraction = divideSignificand(rhs); fs = normalize(rounding_mode, lost_fraction); @@ -2057,8 +2186,13 @@ IEEEFloat::opStatus IEEEFloat::remainder(const IEEEFloat &rhs) { } } - if (isZero()) + if (isZero()) { sign = origSign; // IEEE754 requires this + if (semantics->nanEncoding == fltNanEncoding::NegativeZero) + // But some 8-bit floats only have positive 0. + sign = false; + } + else sign ^= origSign; return fs; @@ -2083,8 +2217,11 @@ IEEEFloat::opStatus IEEEFloat::mod(const IEEEFloat &rhs) { fs = subtract(V, rmNearestTiesToEven); assert(fs==opOK); } - if (isZero()) + if (isZero()) { sign = origSign; // fmod requires this + if (semantics->nanEncoding == fltNanEncoding::NegativeZero) + sign = false; + } return fs; } @@ -2112,8 +2249,11 @@ IEEEFloat::opStatus IEEEFloat::fusedMultiplyAdd(const IEEEFloat &multiplicand, /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a positive zero unless rounding to minus infinity, except that adding two like-signed zeroes gives that zero. */ - if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign) + if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign) { sign = (rounding_mode == rmTowardNegative); + if (semantics->nanEncoding == fltNanEncoding::NegativeZero) + sign = false; + } } else { fs = multiplySpecials(multiplicand); @@ -2389,6 +2529,12 @@ IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics, return is_signaling ? opInvalidOp : opOK; } + // If NaN is negative zero, we need to create a new NaN to avoid converting + // NaN to -Inf. + if (fromSemantics.nanEncoding == fltNanEncoding::NegativeZero && + semantics->nanEncoding != fltNanEncoding::NegativeZero) + makeNaN(false, false); + *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan; // For x87 extended precision, we want to make a NaN, not a special NaN if @@ -2410,6 +2556,14 @@ IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics, makeNaN(false, sign); *losesInfo = true; fs = opInexact; + } else if (category == fcZero && + semantics->nanEncoding == fltNanEncoding::NegativeZero) { + // Negative zero loses info, but positive zero doesn't. + *losesInfo = + fromSemantics.nanEncoding != fltNanEncoding::NegativeZero && sign; + fs = *losesInfo ? opInexact : opOK; + // NaN is negative zero means -0 -> +0, which can lose information + sign = false; } else { *losesInfo = false; fs = opOK; @@ -2696,7 +2850,7 @@ IEEEFloat::convertFromHexadecimalString(StringRef s, } hex_value = hexDigitValue(*p); - if (hex_value == -1U) + if (hex_value == UINT_MAX) break; p++; @@ -2877,9 +3031,11 @@ IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) { if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) { category = fcZero; fs = opOK; + if (semantics->nanEncoding == fltNanEncoding::NegativeZero) + sign = false; - /* Check whether the normalized exponent is high enough to overflow - max during the log-rebasing in the max-exponent check below. */ + /* Check whether the normalized exponent is high enough to overflow + max during the log-rebasing in the max-exponent check below. */ } else if (D.normalizedExponent - 1 > INT_MAX / 42039) { fs = handleOverflow(rounding_mode); @@ -3337,201 +3493,121 @@ APInt IEEEFloat::convertPPCDoubleDoubleAPFloatToAPInt() const { return APInt(128, words); } -APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const { - assert(semantics == (const llvm::fltSemantics*)&semIEEEquad); - assert(partCount()==2); +template <const fltSemantics &S> +APInt IEEEFloat::convertIEEEFloatToAPInt() const { + assert(semantics == &S); + + constexpr int bias = -(S.minExponent - 1); + constexpr unsigned int trailing_significand_bits = S.precision - 1; + constexpr int integer_bit_part = trailing_significand_bits / integerPartWidth; + constexpr integerPart integer_bit = + integerPart{1} << (trailing_significand_bits % integerPartWidth); + constexpr uint64_t significand_mask = integer_bit - 1; + constexpr unsigned int exponent_bits = + S.sizeInBits - 1 - trailing_significand_bits; + static_assert(exponent_bits < 64); + constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1; - uint64_t myexponent, mysignificand, mysignificand2; + uint64_t myexponent; + std::array<integerPart, partCountForBits(trailing_significand_bits)> + mysignificand; if (isFiniteNonZero()) { - myexponent = exponent+16383; //bias - mysignificand = significandParts()[0]; - mysignificand2 = significandParts()[1]; - if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL)) - myexponent = 0; // denormal - } else if (category==fcZero) { - myexponent = 0; - mysignificand = mysignificand2 = 0; - } else if (category==fcInfinity) { - myexponent = 0x7fff; - mysignificand = mysignificand2 = 0; + myexponent = exponent + bias; + std::copy_n(significandParts(), mysignificand.size(), + mysignificand.begin()); + if (myexponent == 1 && + !(significandParts()[integer_bit_part] & integer_bit)) + myexponent = 0; // denormal + } else if (category == fcZero) { + myexponent = ::exponentZero(S) + bias; + mysignificand.fill(0); + } else if (category == fcInfinity) { + if (S.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { + llvm_unreachable("semantics don't support inf!"); + } + myexponent = ::exponentInf(S) + bias; + mysignificand.fill(0); } else { assert(category == fcNaN && "Unknown category!"); - myexponent = 0x7fff; - mysignificand = significandParts()[0]; - mysignificand2 = significandParts()[1]; - } - - uint64_t words[2]; - words[0] = mysignificand; - words[1] = ((uint64_t)(sign & 1) << 63) | - ((myexponent & 0x7fff) << 48) | - (mysignificand2 & 0xffffffffffffLL); + myexponent = ::exponentNaN(S) + bias; + std::copy_n(significandParts(), mysignificand.size(), + mysignificand.begin()); + } + std::array<uint64_t, (S.sizeInBits + 63) / 64> words; + auto words_iter = + std::copy_n(mysignificand.begin(), mysignificand.size(), words.begin()); + if constexpr (significand_mask != 0) { + // Clear the integer bit. + words[mysignificand.size() - 1] &= significand_mask; + } + std::fill(words_iter, words.end(), uint64_t{0}); + constexpr size_t last_word = words.size() - 1; + uint64_t shifted_sign = static_cast<uint64_t>(sign & 1) + << ((S.sizeInBits - 1) % 64); + words[last_word] |= shifted_sign; + uint64_t shifted_exponent = (myexponent & exponent_mask) + << (trailing_significand_bits % 64); + words[last_word] |= shifted_exponent; + if constexpr (last_word == 0) { + return APInt(S.sizeInBits, words[0]); + } + return APInt(S.sizeInBits, words); +} - return APInt(128, words); +APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const { + assert(partCount() == 2); + return convertIEEEFloatToAPInt<semIEEEquad>(); } APInt IEEEFloat::convertDoubleAPFloatToAPInt() const { - assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble); assert(partCount()==1); - - uint64_t myexponent, mysignificand; - - if (isFiniteNonZero()) { - myexponent = exponent+1023; //bias - mysignificand = *significandParts(); - if (myexponent==1 && !(mysignificand & 0x10000000000000LL)) - myexponent = 0; // denormal - } else if (category==fcZero) { - myexponent = 0; - mysignificand = 0; - } else if (category==fcInfinity) { - myexponent = 0x7ff; - mysignificand = 0; - } else { - assert(category == fcNaN && "Unknown category!"); - myexponent = 0x7ff; - mysignificand = *significandParts(); - } - - return APInt(64, ((((uint64_t)(sign & 1) << 63) | - ((myexponent & 0x7ff) << 52) | - (mysignificand & 0xfffffffffffffLL)))); + return convertIEEEFloatToAPInt<semIEEEdouble>(); } APInt IEEEFloat::convertFloatAPFloatToAPInt() const { - assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle); assert(partCount()==1); - - uint32_t myexponent, mysignificand; - - if (isFiniteNonZero()) { - myexponent = exponent+127; //bias - mysignificand = (uint32_t)*significandParts(); - if (myexponent == 1 && !(mysignificand & 0x800000)) - myexponent = 0; // denormal - } else if (category==fcZero) { - myexponent = 0; - mysignificand = 0; - } else if (category==fcInfinity) { - myexponent = 0xff; - mysignificand = 0; - } else { - assert(category == fcNaN && "Unknown category!"); - myexponent = 0xff; - mysignificand = (uint32_t)*significandParts(); - } - - return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) | - (mysignificand & 0x7fffff))); + return convertIEEEFloatToAPInt<semIEEEsingle>(); } APInt IEEEFloat::convertBFloatAPFloatToAPInt() const { - assert(semantics == (const llvm::fltSemantics *)&semBFloat); assert(partCount() == 1); - - uint32_t myexponent, mysignificand; - - if (isFiniteNonZero()) { - myexponent = exponent + 127; // bias - mysignificand = (uint32_t)*significandParts(); - if (myexponent == 1 && !(mysignificand & 0x80)) - myexponent = 0; // denormal - } else if (category == fcZero) { - myexponent = 0; - mysignificand = 0; - } else if (category == fcInfinity) { - myexponent = 0xff; - mysignificand = 0; - } else { - assert(category == fcNaN && "Unknown category!"); - myexponent = 0xff; - mysignificand = (uint32_t)*significandParts(); - } - - return APInt(16, (((sign & 1) << 15) | ((myexponent & 0xff) << 7) | - (mysignificand & 0x7f))); + return convertIEEEFloatToAPInt<semBFloat>(); } APInt IEEEFloat::convertHalfAPFloatToAPInt() const { - assert(semantics == (const llvm::fltSemantics*)&semIEEEhalf); assert(partCount()==1); - - uint32_t myexponent, mysignificand; - - if (isFiniteNonZero()) { - myexponent = exponent+15; //bias - mysignificand = (uint32_t)*significandParts(); - if (myexponent == 1 && !(mysignificand & 0x400)) - myexponent = 0; // denormal - } else if (category==fcZero) { - myexponent = 0; - mysignificand = 0; - } else if (category==fcInfinity) { - myexponent = 0x1f; - mysignificand = 0; - } else { - assert(category == fcNaN && "Unknown category!"); - myexponent = 0x1f; - mysignificand = (uint32_t)*significandParts(); - } - - return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) | - (mysignificand & 0x3ff))); + return convertIEEEFloatToAPInt<semIEEEhalf>(); } APInt IEEEFloat::convertFloat8E5M2APFloatToAPInt() const { - assert(semantics == (const llvm::fltSemantics *)&semFloat8E5M2); assert(partCount() == 1); + return convertIEEEFloatToAPInt<semFloat8E5M2>(); +} - uint32_t myexponent, mysignificand; - - if (isFiniteNonZero()) { - myexponent = exponent + 15; // bias - mysignificand = (uint32_t)*significandParts(); - if (myexponent == 1 && !(mysignificand & 0x4)) - myexponent = 0; // denormal - } else if (category == fcZero) { - myexponent = 0; - mysignificand = 0; - } else if (category == fcInfinity) { - myexponent = 0x1f; - mysignificand = 0; - } else { - assert(category == fcNaN && "Unknown category!"); - myexponent = 0x1f; - mysignificand = (uint32_t)*significandParts(); - } - - return APInt(8, (((sign & 1) << 7) | ((myexponent & 0x1f) << 2) | - (mysignificand & 0x3))); +APInt IEEEFloat::convertFloat8E5M2FNUZAPFloatToAPInt() const { + assert(partCount() == 1); + return convertIEEEFloatToAPInt<semFloat8E5M2FNUZ>(); } APInt IEEEFloat::convertFloat8E4M3FNAPFloatToAPInt() const { - assert(semantics == (const llvm::fltSemantics *)&semFloat8E4M3FN); assert(partCount() == 1); + return convertIEEEFloatToAPInt<semFloat8E4M3FN>(); +} - uint32_t myexponent, mysignificand; +APInt IEEEFloat::convertFloat8E4M3FNUZAPFloatToAPInt() const { + assert(partCount() == 1); + return convertIEEEFloatToAPInt<semFloat8E4M3FNUZ>(); +} - if (isFiniteNonZero()) { - myexponent = exponent + 7; // bias - mysignificand = (uint32_t)*significandParts(); - if (myexponent == 1 && !(mysignificand & 0x8)) - myexponent = 0; // denormal - } else if (category == fcZero) { - myexponent = 0; - mysignificand = 0; - } else if (category == fcInfinity) { - myexponent = 0xf; - mysignificand = 0; - } else { - assert(category == fcNaN && "Unknown category!"); - myexponent = 0xf; - mysignificand = (uint32_t)*significandParts(); - } +APInt IEEEFloat::convertFloat8E4M3B11FNUZAPFloatToAPInt() const { + assert(partCount() == 1); + return convertIEEEFloatToAPInt<semFloat8E4M3B11FNUZ>(); +} - return APInt(8, (((sign & 1) << 7) | ((myexponent & 0xf) << 3) | - (mysignificand & 0x7))); +APInt IEEEFloat::convertFloatTF32APFloatToAPInt() const { + assert(partCount() == 1); + return convertIEEEFloatToAPInt<semFloatTF32>(); } // This function creates an APInt that is just a bit map of the floating @@ -3560,9 +3636,21 @@ APInt IEEEFloat::bitcastToAPInt() const { if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2) return convertFloat8E5M2APFloatToAPInt(); + if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2FNUZ) + return convertFloat8E5M2FNUZAPFloatToAPInt(); + if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FN) return convertFloat8E4M3FNAPFloatToAPInt(); + if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FNUZ) + return convertFloat8E4M3FNUZAPFloatToAPInt(); + + if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3B11FNUZ) + return convertFloat8E4M3B11FNUZAPFloatToAPInt(); + + if (semantics == (const llvm::fltSemantics *)&semFloatTF32) + return convertFloatTF32APFloatToAPInt(); + assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended && "unknown format!"); return convertF80LongDoubleAPFloatToAPInt(); @@ -3643,205 +3731,131 @@ void IEEEFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) { } } -void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) { - uint64_t i1 = api.getRawData()[0]; - uint64_t i2 = api.getRawData()[1]; - uint64_t myexponent = (i2 >> 48) & 0x7fff; - uint64_t mysignificand = i1; - uint64_t mysignificand2 = i2 & 0xffffffffffffLL; +template <const fltSemantics &S> +void IEEEFloat::initFromIEEEAPInt(const APInt &api) { + assert(api.getBitWidth() == S.sizeInBits); + constexpr integerPart integer_bit = integerPart{1} + << ((S.precision - 1) % integerPartWidth); + constexpr uint64_t significand_mask = integer_bit - 1; + constexpr unsigned int trailing_significand_bits = S.precision - 1; + constexpr unsigned int stored_significand_parts = + partCountForBits(trailing_significand_bits); + constexpr unsigned int exponent_bits = + S.sizeInBits - 1 - trailing_significand_bits; + static_assert(exponent_bits < 64); + constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1; + constexpr int bias = -(S.minExponent - 1); - initialize(&semIEEEquad); - assert(partCount()==2); - - sign = static_cast<unsigned int>(i2>>63); - if (myexponent==0 && - (mysignificand==0 && mysignificand2==0)) { - makeZero(sign); - } else if (myexponent==0x7fff && - (mysignificand==0 && mysignificand2==0)) { - makeInf(sign); - } else if (myexponent==0x7fff && - (mysignificand!=0 || mysignificand2 !=0)) { - category = fcNaN; - exponent = exponentNaN(); - significandParts()[0] = mysignificand; - significandParts()[1] = mysignificand2; - } else { - category = fcNormal; - exponent = myexponent - 16383; - significandParts()[0] = mysignificand; - significandParts()[1] = mysignificand2; - if (myexponent==0) // denormal - exponent = -16382; - else - significandParts()[1] |= 0x1000000000000LL; // integer bit + // Copy the bits of the significand. We need to clear out the exponent and + // sign bit in the last word. + std::array<integerPart, stored_significand_parts> mysignificand; + std::copy_n(api.getRawData(), mysignificand.size(), mysignificand.begin()); + if constexpr (significand_mask != 0) { + mysignificand[mysignificand.size() - 1] &= significand_mask; } -} -void IEEEFloat::initFromDoubleAPInt(const APInt &api) { - uint64_t i = *api.getRawData(); - uint64_t myexponent = (i >> 52) & 0x7ff; - uint64_t mysignificand = i & 0xfffffffffffffLL; + // We assume the last word holds the sign bit, the exponent, and potentially + // some of the trailing significand field. + uint64_t last_word = api.getRawData()[api.getNumWords() - 1]; + uint64_t myexponent = + (last_word >> (trailing_significand_bits % 64)) & exponent_mask; - initialize(&semIEEEdouble); - assert(partCount()==1); + initialize(&S); + assert(partCount() == mysignificand.size()); - sign = static_cast<unsigned int>(i>>63); - if (myexponent==0 && mysignificand==0) { - makeZero(sign); - } else if (myexponent==0x7ff && mysignificand==0) { - makeInf(sign); - } else if (myexponent==0x7ff && mysignificand!=0) { - category = fcNaN; - exponent = exponentNaN(); - *significandParts() = mysignificand; - } else { - category = fcNormal; - exponent = myexponent - 1023; - *significandParts() = mysignificand; - if (myexponent==0) // denormal - exponent = -1022; - else - *significandParts() |= 0x10000000000000LL; // integer bit - } -} + sign = static_cast<unsigned int>(last_word >> ((S.sizeInBits - 1) % 64)); -void IEEEFloat::initFromFloatAPInt(const APInt &api) { - uint32_t i = (uint32_t)*api.getRawData(); - uint32_t myexponent = (i >> 23) & 0xff; - uint32_t mysignificand = i & 0x7fffff; + bool all_zero_significand = + llvm::all_of(mysignificand, [](integerPart bits) { return bits == 0; }); - initialize(&semIEEEsingle); - assert(partCount()==1); + bool is_zero = myexponent == 0 && all_zero_significand; - sign = i >> 31; - if (myexponent==0 && mysignificand==0) { - makeZero(sign); - } else if (myexponent==0xff && mysignificand==0) { - makeInf(sign); - } else if (myexponent==0xff && mysignificand!=0) { - category = fcNaN; - exponent = exponentNaN(); - *significandParts() = mysignificand; - } else { - category = fcNormal; - exponent = myexponent - 127; //bias - *significandParts() = mysignificand; - if (myexponent==0) // denormal - exponent = -126; - else - *significandParts() |= 0x800000; // integer bit + if constexpr (S.nonFiniteBehavior == fltNonfiniteBehavior::IEEE754) { + if (myexponent - bias == ::exponentInf(S) && all_zero_significand) { + makeInf(sign); + return; + } } -} -void IEEEFloat::initFromBFloatAPInt(const APInt &api) { - uint32_t i = (uint32_t)*api.getRawData(); - uint32_t myexponent = (i >> 7) & 0xff; - uint32_t mysignificand = i & 0x7f; + bool is_nan = false; - initialize(&semBFloat); - assert(partCount() == 1); + if constexpr (S.nanEncoding == fltNanEncoding::IEEE) { + is_nan = myexponent - bias == ::exponentNaN(S) && !all_zero_significand; + } else if constexpr (S.nanEncoding == fltNanEncoding::AllOnes) { + bool all_ones_significand = + std::all_of(mysignificand.begin(), mysignificand.end() - 1, + [](integerPart bits) { return bits == ~integerPart{0}; }) && + (!significand_mask || + mysignificand[mysignificand.size() - 1] == significand_mask); + is_nan = myexponent - bias == ::exponentNaN(S) && all_ones_significand; + } else if constexpr (S.nanEncoding == fltNanEncoding::NegativeZero) { + is_nan = is_zero && sign; + } - sign = i >> 15; - if (myexponent == 0 && mysignificand == 0) { - makeZero(sign); - } else if (myexponent == 0xff && mysignificand == 0) { - makeInf(sign); - } else if (myexponent == 0xff && mysignificand != 0) { + if (is_nan) { category = fcNaN; - exponent = exponentNaN(); - *significandParts() = mysignificand; - } else { - category = fcNormal; - exponent = myexponent - 127; // bias - *significandParts() = mysignificand; - if (myexponent == 0) // denormal - exponent = -126; - else - *significandParts() |= 0x80; // integer bit + exponent = ::exponentNaN(S); + std::copy_n(mysignificand.begin(), mysignificand.size(), + significandParts()); + return; + } + + if (is_zero) { + makeZero(sign); + return; } + + category = fcNormal; + exponent = myexponent - bias; + std::copy_n(mysignificand.begin(), mysignificand.size(), significandParts()); + if (myexponent == 0) // denormal + exponent = S.minExponent; + else + significandParts()[mysignificand.size()-1] |= integer_bit; // integer bit } -void IEEEFloat::initFromHalfAPInt(const APInt &api) { - uint32_t i = (uint32_t)*api.getRawData(); - uint32_t myexponent = (i >> 10) & 0x1f; - uint32_t mysignificand = i & 0x3ff; +void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) { + initFromIEEEAPInt<semIEEEquad>(api); +} - initialize(&semIEEEhalf); - assert(partCount()==1); +void IEEEFloat::initFromDoubleAPInt(const APInt &api) { + initFromIEEEAPInt<semIEEEdouble>(api); +} - sign = i >> 15; - if (myexponent==0 && mysignificand==0) { - makeZero(sign); - } else if (myexponent==0x1f && mysignificand==0) { - makeInf(sign); - } else if (myexponent==0x1f && mysignificand!=0) { - category = fcNaN; - exponent = exponentNaN(); - *significandParts() = mysignificand; - } else { - category = fcNormal; - exponent = myexponent - 15; //bias - *significandParts() = mysignificand; - if (myexponent==0) // denormal - exponent = -14; - else - *significandParts() |= 0x400; // integer bit - } +void IEEEFloat::initFromFloatAPInt(const APInt &api) { + initFromIEEEAPInt<semIEEEsingle>(api); } -void IEEEFloat::initFromFloat8E5M2APInt(const APInt &api) { - uint32_t i = (uint32_t)*api.getRawData(); - uint32_t myexponent = (i >> 2) & 0x1f; - uint32_t mysignificand = i & 0x3; +void IEEEFloat::initFromBFloatAPInt(const APInt &api) { + initFromIEEEAPInt<semBFloat>(api); +} - initialize(&semFloat8E5M2); - assert(partCount() == 1); +void IEEEFloat::initFromHalfAPInt(const APInt &api) { + initFromIEEEAPInt<semIEEEhalf>(api); +} - sign = i >> 7; - if (myexponent == 0 && mysignificand == 0) { - makeZero(sign); - } else if (myexponent == 0x1f && mysignificand == 0) { - makeInf(sign); - } else if (myexponent == 0x1f && mysignificand != 0) { - category = fcNaN; - exponent = exponentNaN(); - *significandParts() = mysignificand; - } else { - category = fcNormal; - exponent = myexponent - 15; // bias - *significandParts() = mysignificand; - if (myexponent == 0) // denormal - exponent = -14; - else - *significandParts() |= 0x4; // integer bit - } +void IEEEFloat::initFromFloat8E5M2APInt(const APInt &api) { + initFromIEEEAPInt<semFloat8E5M2>(api); +} + +void IEEEFloat::initFromFloat8E5M2FNUZAPInt(const APInt &api) { + initFromIEEEAPInt<semFloat8E5M2FNUZ>(api); } void IEEEFloat::initFromFloat8E4M3FNAPInt(const APInt &api) { - uint32_t i = (uint32_t)*api.getRawData(); - uint32_t myexponent = (i >> 3) & 0xf; - uint32_t mysignificand = i & 0x7; + initFromIEEEAPInt<semFloat8E4M3FN>(api); +} - initialize(&semFloat8E4M3FN); - assert(partCount() == 1); +void IEEEFloat::initFromFloat8E4M3FNUZAPInt(const APInt &api) { + initFromIEEEAPInt<semFloat8E4M3FNUZ>(api); +} - sign = i >> 7; - if (myexponent == 0 && mysignificand == 0) { - makeZero(sign); - } else if (myexponent == 0xf && mysignificand == 7) { - category = fcNaN; - exponent = exponentNaN(); - *significandParts() = mysignificand; - } else { - category = fcNormal; - exponent = myexponent - 7; // bias - *significandParts() = mysignificand; - if (myexponent == 0) // denormal - exponent = -6; - else - *significandParts() |= 0x8; // integer bit - } +void IEEEFloat::initFromFloat8E4M3B11FNUZAPInt(const APInt &api) { + initFromIEEEAPInt<semFloat8E4M3B11FNUZ>(api); +} + +void IEEEFloat::initFromFloatTF32APInt(const APInt &api) { + initFromIEEEAPInt<semFloatTF32>(api); } /// Treat api as containing the bits of a floating point number. @@ -3863,8 +3877,16 @@ void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) { return initFromPPCDoubleDoubleAPInt(api); if (Sem == &semFloat8E5M2) return initFromFloat8E5M2APInt(api); + if (Sem == &semFloat8E5M2FNUZ) + return initFromFloat8E5M2FNUZAPInt(api); if (Sem == &semFloat8E4M3FN) return initFromFloat8E4M3FNAPInt(api); + if (Sem == &semFloat8E4M3FNUZ) + return initFromFloat8E4M3FNUZAPInt(api); + if (Sem == &semFloat8E4M3B11FNUZ) + return initFromFloat8E4M3B11FNUZAPInt(api); + if (Sem == &semFloatTF32) + return initFromFloatTF32APInt(api); llvm_unreachable(nullptr); } @@ -3893,7 +3915,8 @@ void IEEEFloat::makeLargest(bool Negative) { ? (~integerPart(0) >> NumUnusedHighBits) : 0; - if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) + if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly && + semantics->nanEncoding == fltNanEncoding::AllOnes) significand[0] &= ~integerPart(1); } @@ -4074,7 +4097,7 @@ void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision, } // Ignore trailing binary zeros. - int trailingZeros = significand.countTrailingZeros(); + int trailingZeros = significand.countr_zero(); exp += trailingZeros; significand.lshrInPlace(trailingZeros); @@ -4321,6 +4344,8 @@ IEEEFloat::opStatus IEEEFloat::next(bool nextDown) { APInt::tcSet(significandParts(), 0, partCount()); category = fcZero; exponent = 0; + if (semantics->nanEncoding == fltNanEncoding::NegativeZero) + sign = false; break; } @@ -4407,17 +4432,15 @@ IEEEFloat::opStatus IEEEFloat::next(bool nextDown) { } APFloatBase::ExponentType IEEEFloat::exponentNaN() const { - if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) - return semantics->maxExponent; - return semantics->maxExponent + 1; + return ::exponentNaN(*semantics); } APFloatBase::ExponentType IEEEFloat::exponentInf() const { - return semantics->maxExponent + 1; + return ::exponentInf(*semantics); } APFloatBase::ExponentType IEEEFloat::exponentZero() const { - return semantics->minExponent - 1; + return ::exponentZero(*semantics); } void IEEEFloat::makeInf(bool Negative) { @@ -4435,6 +4458,10 @@ void IEEEFloat::makeInf(bool Negative) { void IEEEFloat::makeZero(bool Negative) { category = fcZero; sign = Negative; + if (semantics->nanEncoding == fltNanEncoding::NegativeZero) { + // Merge negative zero to positive because 0b10000...000 is used for NaN + sign = false; + } exponent = exponentZero(); APInt::tcSet(significandParts(), 0, partCount()); } @@ -4477,7 +4504,7 @@ IEEEFloat scalbn(IEEEFloat X, int Exp, IEEEFloat::roundingMode RoundingMode) { int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1; // Clamp to one past the range ends to let normalize handle overlflow. - X.exponent += std::min(std::max(Exp, -MaxIncrement - 1), MaxIncrement); + X.exponent += std::clamp(Exp, -MaxIncrement - 1, MaxIncrement); X.normalize(RoundingMode, lfExactlyZero); if (X.isNaN()) X.makeQuiet(); @@ -5114,6 +5141,19 @@ APFloat::APFloat(const fltSemantics &Semantics, StringRef S) consumeError(StatusOrErr.takeError()); } +FPClassTest APFloat::classify() const { + if (isZero()) + return isNegative() ? fcNegZero : fcPosZero; + if (isNormal()) + return isNegative() ? fcNegNormal : fcPosNormal; + if (isDenormal()) + return isNegative() ? fcNegSubnormal : fcPosSubnormal; + if (isInfinity()) + return isNegative() ? fcNegInf : fcPosInf; + assert(isNaN() && "Other class of FP constant"); + return isSignaling() ? fcSNan : fcQNan; +} + APFloat::opStatus APFloat::convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo) { if (&getSemantics() == &ToSemantics) { diff --git a/llvm/lib/Support/APInt.cpp b/llvm/lib/Support/APInt.cpp index afe7478a8b2a..05b1526da95f 100644 --- a/llvm/lib/Support/APInt.cpp +++ b/llvm/lib/Support/APInt.cpp @@ -19,6 +19,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/bit.h" #include "llvm/Config/llvm-config.h" +#include "llvm/Support/Alignment.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" @@ -68,7 +69,7 @@ inline static unsigned getDigit(char cdigit, uint8_t radix) { if (r < radix) return r; - return -1U; + return UINT_MAX; } @@ -164,6 +165,14 @@ void APInt::Profile(FoldingSetNodeID& ID) const { ID.AddInteger(U.pVal[i]); } +bool APInt::isAligned(Align A) const { + if (isZero()) + return true; + const unsigned TrailingZeroes = countr_zero(); + const unsigned MinimumTrailingZeroes = Log2(A); + return TrailingZeroes >= MinimumTrailingZeroes; +} + /// Prefix increment operator. Increments the APInt by one. APInt& APInt::operator++() { if (isSingleWord()) @@ -479,7 +488,6 @@ APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const { uint64_t APInt::extractBitsAsZExtValue(unsigned numBits, unsigned bitPosition) const { - assert(numBits > 0 && "Can't extract zero bits"); assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth && "Illegal bit extraction"); assert(numBits <= 64 && "Illegal bit extraction"); @@ -626,7 +634,7 @@ unsigned APInt::countLeadingZerosSlowCase() const { if (V == 0) Count += APINT_BITS_PER_WORD; else { - Count += llvm::countLeadingZeros(V); + Count += llvm::countl_zero(V); break; } } @@ -646,13 +654,13 @@ unsigned APInt::countLeadingOnesSlowCase() const { shift = APINT_BITS_PER_WORD - highWordBits; } int i = getNumWords() - 1; - unsigned Count = llvm::countLeadingOnes(U.pVal[i] << shift); + unsigned Count = llvm::countl_one(U.pVal[i] << shift); if (Count == highWordBits) { for (i--; i >= 0; --i) { if (U.pVal[i] == WORDTYPE_MAX) Count += APINT_BITS_PER_WORD; else { - Count += llvm::countLeadingOnes(U.pVal[i]); + Count += llvm::countl_one(U.pVal[i]); break; } } @@ -666,7 +674,7 @@ unsigned APInt::countTrailingZerosSlowCase() const { for (; i < getNumWords() && U.pVal[i] == 0; ++i) Count += APINT_BITS_PER_WORD; if (i < getNumWords()) - Count += llvm::countTrailingZeros(U.pVal[i]); + Count += llvm::countr_zero(U.pVal[i]); return std::min(Count, BitWidth); } @@ -676,7 +684,7 @@ unsigned APInt::countTrailingOnesSlowCase() const { for (; i < getNumWords() && U.pVal[i] == WORDTYPE_MAX; ++i) Count += APINT_BITS_PER_WORD; if (i < getNumWords()) - Count += llvm::countTrailingOnes(U.pVal[i]); + Count += llvm::countr_one(U.pVal[i]); assert(Count <= BitWidth); return Count; } @@ -707,18 +715,18 @@ bool APInt::isSubsetOfSlowCase(const APInt &RHS) const { APInt APInt::byteSwap() const { assert(BitWidth >= 16 && BitWidth % 8 == 0 && "Cannot byteswap!"); if (BitWidth == 16) - return APInt(BitWidth, ByteSwap_16(uint16_t(U.VAL))); + return APInt(BitWidth, llvm::byteswap<uint16_t>(U.VAL)); if (BitWidth == 32) - return APInt(BitWidth, ByteSwap_32(unsigned(U.VAL))); + return APInt(BitWidth, llvm::byteswap<uint32_t>(U.VAL)); if (BitWidth <= 64) { - uint64_t Tmp1 = ByteSwap_64(U.VAL); + uint64_t Tmp1 = llvm::byteswap<uint64_t>(U.VAL); Tmp1 >>= (64 - BitWidth); return APInt(BitWidth, Tmp1); } APInt Result(getNumWords() * APINT_BITS_PER_WORD, 0); for (unsigned I = 0, N = getNumWords(); I != N; ++I) - Result.U.pVal[I] = ByteSwap_64(U.pVal[N - I - 1]); + Result.U.pVal[I] = llvm::byteswap<uint64_t>(U.pVal[N - I - 1]); if (Result.BitWidth != BitWidth) { Result.lshrInPlace(Result.BitWidth - BitWidth); Result.BitWidth = BitWidth; @@ -767,8 +775,8 @@ APInt llvm::APIntOps::GreatestCommonDivisor(APInt A, APInt B) { // Count common powers of 2 and remove all other powers of 2. unsigned Pow2; { - unsigned Pow2_A = A.countTrailingZeros(); - unsigned Pow2_B = B.countTrailingZeros(); + unsigned Pow2_A = A.countr_zero(); + unsigned Pow2_B = B.countr_zero(); if (Pow2_A > Pow2_B) { A.lshrInPlace(Pow2_A - Pow2_B); Pow2 = Pow2_B; @@ -789,10 +797,10 @@ APInt llvm::APIntOps::GreatestCommonDivisor(APInt A, APInt B) { while (A != B) { if (A.ugt(B)) { A -= B; - A.lshrInPlace(A.countTrailingZeros() - Pow2); + A.lshrInPlace(A.countr_zero() - Pow2); } else { B -= A; - B.lshrInPlace(B.countTrailingZeros() - Pow2); + B.lshrInPlace(B.countr_zero() - Pow2); } } @@ -1318,7 +1326,7 @@ static void KnuthDiv(uint32_t *u, uint32_t *v, uint32_t *q, uint32_t* r, // and v so that its high bits are shifted to the top of v's range without // overflow. Note that this can require an extra word in u so that u must // be of length m+n+1. - unsigned shift = countLeadingZeros(v[n-1]); + unsigned shift = llvm::countl_zero(v[n - 1]); uint32_t v_carry = 0; uint32_t u_carry = 0; if (shift) { @@ -1967,7 +1975,7 @@ APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const { } APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const { - if (countLeadingZeros() + RHS.countLeadingZeros() + 2 <= BitWidth) { + if (countl_zero() + RHS.countl_zero() + 2 <= BitWidth) { Overflow = true; return *this * RHS; } @@ -1984,24 +1992,32 @@ APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const { } APInt APInt::sshl_ov(const APInt &ShAmt, bool &Overflow) const { - Overflow = ShAmt.uge(getBitWidth()); + return sshl_ov(ShAmt.getLimitedValue(getBitWidth()), Overflow); +} + +APInt APInt::sshl_ov(unsigned ShAmt, bool &Overflow) const { + Overflow = ShAmt >= getBitWidth(); if (Overflow) return APInt(BitWidth, 0); if (isNonNegative()) // Don't allow sign change. - Overflow = ShAmt.uge(countLeadingZeros()); + Overflow = ShAmt >= countl_zero(); else - Overflow = ShAmt.uge(countLeadingOnes()); + Overflow = ShAmt >= countl_one(); return *this << ShAmt; } APInt APInt::ushl_ov(const APInt &ShAmt, bool &Overflow) const { - Overflow = ShAmt.uge(getBitWidth()); + return ushl_ov(ShAmt.getLimitedValue(getBitWidth()), Overflow); +} + +APInt APInt::ushl_ov(unsigned ShAmt, bool &Overflow) const { + Overflow = ShAmt >= getBitWidth(); if (Overflow) return APInt(BitWidth, 0); - Overflow = ShAmt.ugt(countLeadingZeros()); + Overflow = ShAmt > countl_zero(); return *this << ShAmt; } @@ -2067,6 +2083,10 @@ APInt APInt::umul_sat(const APInt &RHS) const { } APInt APInt::sshl_sat(const APInt &RHS) const { + return sshl_sat(RHS.getLimitedValue(getBitWidth())); +} + +APInt APInt::sshl_sat(unsigned RHS) const { bool Overflow; APInt Res = sshl_ov(RHS, Overflow); if (!Overflow) @@ -2077,6 +2097,10 @@ APInt APInt::sshl_sat(const APInt &RHS) const { } APInt APInt::ushl_sat(const APInt &RHS) const { + return ushl_sat(RHS.getLimitedValue(getBitWidth())); +} + +APInt APInt::ushl_sat(unsigned RHS) const { bool Overflow; APInt Res = ushl_ov(RHS, Overflow); if (!Overflow) @@ -2136,8 +2160,8 @@ void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) { this->negate(); } -void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix, - bool Signed, bool formatAsCLiteral) const { +void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix, bool Signed, + bool formatAsCLiteral, bool UpperCase) const { assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2 || Radix == 36) && "Radix should be 2, 8, 10, 16, or 36!"); @@ -2173,7 +2197,9 @@ void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix, return; } - static const char Digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + static const char BothDigits[] = "0123456789abcdefghijklmnopqrstuvwxyz" + "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + const char *Digits = BothDigits + (UpperCase ? 36 : 0); if (isSingleWord()) { char Buffer[65]; @@ -2290,14 +2316,6 @@ static inline APInt::WordType highHalf(APInt::WordType part) { return part >> (APInt::APINT_BITS_PER_WORD / 2); } -/// Returns the bit number of the most significant set bit of a part. -/// If the input number has no bits set -1U is returned. -static unsigned partMSB(APInt::WordType value) { return findLastSet(value); } - -/// Returns the bit number of the least significant set bit of a part. If the -/// input number has no bits set -1U is returned. -static unsigned partLSB(APInt::WordType value) { return findFirstSet(value); } - /// Sets the least significant part of a bignum to the input value, and zeroes /// out higher parts. void APInt::tcSet(WordType *dst, WordType part, unsigned parts) { @@ -2338,32 +2356,33 @@ void APInt::tcClearBit(WordType *parts, unsigned bit) { } /// Returns the bit number of the least significant set bit of a number. If the -/// input number has no bits set -1U is returned. +/// input number has no bits set UINT_MAX is returned. unsigned APInt::tcLSB(const WordType *parts, unsigned n) { for (unsigned i = 0; i < n; i++) { if (parts[i] != 0) { - unsigned lsb = partLSB(parts[i]); + unsigned lsb = llvm::countr_zero(parts[i]); return lsb + i * APINT_BITS_PER_WORD; } } - return -1U; + return UINT_MAX; } /// Returns the bit number of the most significant set bit of a number. -/// If the input number has no bits set -1U is returned. +/// If the input number has no bits set UINT_MAX is returned. unsigned APInt::tcMSB(const WordType *parts, unsigned n) { do { --n; if (parts[n] != 0) { - unsigned msb = partMSB(parts[n]); + static_assert(sizeof(parts[n]) <= sizeof(uint64_t)); + unsigned msb = llvm::Log2_64(parts[n]); return msb + n * APINT_BITS_PER_WORD; } } while (n); - return -1U; + return UINT_MAX; } /// Copy the bit vector of width srcBITS from SRC, starting at bit srcLSB, to @@ -2961,7 +2980,7 @@ llvm::APIntOps::GetMostSignificantDifferentBit(const APInt &A, const APInt &B) { assert(A.getBitWidth() == B.getBitWidth() && "Must have the same bitwidth"); if (A == B) return std::nullopt; - return A.getBitWidth() - ((A ^ B).countLeadingZeros() + 1); + return A.getBitWidth() - ((A ^ B).countl_zero() + 1); } APInt llvm::APIntOps::ScaleBitMask(const APInt &A, unsigned NewBitWidth, diff --git a/llvm/lib/Support/APSInt.cpp b/llvm/lib/Support/APSInt.cpp index b65b6824eaf8..5a9f44f304a2 100644 --- a/llvm/lib/Support/APSInt.cpp +++ b/llvm/lib/Support/APSInt.cpp @@ -25,7 +25,7 @@ APSInt::APSInt(StringRef Str) { unsigned NumBits = ((Str.size() * 64) / 19) + 2; APInt Tmp(NumBits, Str, /*radix=*/10); if (Str[0] == '-') { - unsigned MinBits = Tmp.getMinSignedBits(); + unsigned MinBits = Tmp.getSignificantBits(); if (MinBits < NumBits) Tmp = Tmp.trunc(std::max<unsigned>(1, MinBits)); *this = APSInt(Tmp, /*isUnsigned=*/false); diff --git a/llvm/lib/Support/AddressRanges.cpp b/llvm/lib/Support/AddressRanges.cpp deleted file mode 100644 index 187d5be00dae..000000000000 --- a/llvm/lib/Support/AddressRanges.cpp +++ /dev/null @@ -1,70 +0,0 @@ -//===- AddressRanges.cpp ----------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "llvm/ADT/AddressRanges.h" -#include "llvm/ADT/STLExtras.h" -#include <inttypes.h> - -using namespace llvm; - -AddressRanges::Collection::const_iterator -AddressRanges::insert(AddressRange Range) { - if (Range.size() == 0) - return Ranges.end(); - - auto It = llvm::upper_bound(Ranges, Range); - auto It2 = It; - while (It2 != Ranges.end() && It2->start() <= Range.end()) - ++It2; - if (It != It2) { - Range = {Range.start(), std::max(Range.end(), std::prev(It2)->end())}; - It = Ranges.erase(It, It2); - } - if (It != Ranges.begin() && Range.start() <= std::prev(It)->end()) { - --It; - *It = {It->start(), std::max(It->end(), Range.end())}; - return It; - } - - return Ranges.insert(It, Range); -} - -AddressRanges::Collection::const_iterator -AddressRanges::find(uint64_t Addr) const { - auto It = std::partition_point( - Ranges.begin(), Ranges.end(), - [=](const AddressRange &R) { return R.start() <= Addr; }); - - if (It == Ranges.begin()) - return Ranges.end(); - - --It; - if (Addr >= It->end()) - return Ranges.end(); - - return It; -} - -AddressRanges::Collection::const_iterator -AddressRanges::find(AddressRange Range) const { - if (Range.size() == 0) - return Ranges.end(); - - auto It = std::partition_point( - Ranges.begin(), Ranges.end(), - [=](const AddressRange &R) { return R.start() <= Range.start(); }); - - if (It == Ranges.begin()) - return Ranges.end(); - - --It; - if (Range.end() > It->end()) - return Ranges.end(); - - return It; -} diff --git a/llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_unix.S b/llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_unix.S index 449e07492832..69fc0936d73c 100644 --- a/llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_unix.S +++ b/llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_unix.S @@ -1,5 +1,7 @@ #if defined(__x86_64__) +#include "llvm_blake3_prefix.h" + #if defined(__ELF__) && defined(__linux__) .section .note.GNU-stack,"",%progbits #endif diff --git a/llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_windows_gnu.S b/llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_windows_gnu.S index bb58d2ae64b1..5ad1c641a7fc 100644 --- a/llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_windows_gnu.S +++ b/llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_windows_gnu.S @@ -1,3 +1,5 @@ +#include "llvm_blake3_prefix.h" + .intel_syntax noprefix .global _blake3_hash_many_avx2 .global blake3_hash_many_avx2 diff --git a/llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_windows_msvc.asm b/llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_windows_msvc.asm index 352298edd2e8..46bad1d98f38 100644 --- a/llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_windows_msvc.asm +++ b/llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_windows_msvc.asm @@ -1,11 +1,11 @@ -public _blake3_hash_many_avx2 -public blake3_hash_many_avx2 +public _llvm_blake3_hash_many_avx2 +public llvm_blake3_hash_many_avx2 _TEXT SEGMENT ALIGN(16) 'CODE' ALIGN 16 -blake3_hash_many_avx2 PROC -_blake3_hash_many_avx2 PROC +llvm_blake3_hash_many_avx2 PROC +_llvm_blake3_hash_many_avx2 PROC push r15 push r14 push r13 @@ -1785,8 +1785,8 @@ endroundloop1: vmovdqu xmmword ptr [rbx+10H], xmm1 jmp unwind -_blake3_hash_many_avx2 ENDP -blake3_hash_many_avx2 ENDP +_llvm_blake3_hash_many_avx2 ENDP +llvm_blake3_hash_many_avx2 ENDP _TEXT ENDS _RDATA SEGMENT READONLY PAGE ALIAS(".rdata") 'CONST' diff --git a/llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_unix.S b/llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_unix.S index 3afc0e2250e2..f04a135dd1bc 100644 --- a/llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_unix.S +++ b/llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_unix.S @@ -1,5 +1,7 @@ #if defined(__x86_64__) +#include "llvm_blake3_prefix.h" + #if defined(__ELF__) && defined(__linux__) .section .note.GNU-stack,"",%progbits #endif diff --git a/llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_windows_gnu.S b/llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_windows_gnu.S index e10b9f36cbcc..53c586141fbe 100644 --- a/llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_windows_gnu.S +++ b/llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_windows_gnu.S @@ -1,3 +1,5 @@ +#include "llvm_blake3_prefix.h" + .intel_syntax noprefix .global _blake3_hash_many_avx512 diff --git a/llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_windows_msvc.asm b/llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_windows_msvc.asm index b19efbaaeb36..f13d1b260ab8 100644 --- a/llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_windows_msvc.asm +++ b/llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_windows_msvc.asm @@ -1,15 +1,15 @@ -public _blake3_hash_many_avx512 -public blake3_hash_many_avx512 -public blake3_compress_in_place_avx512 -public _blake3_compress_in_place_avx512 -public blake3_compress_xof_avx512 -public _blake3_compress_xof_avx512 +public _llvm_blake3_hash_many_avx512 +public llvm_blake3_hash_many_avx512 +public llvm_blake3_compress_in_place_avx512 +public _llvm_blake3_compress_in_place_avx512 +public llvm_blake3_compress_xof_avx512 +public _llvm_blake3_compress_xof_avx512 _TEXT SEGMENT ALIGN(16) 'CODE' ALIGN 16 -blake3_hash_many_avx512 PROC -_blake3_hash_many_avx512 PROC +llvm_blake3_hash_many_avx512 PROC +_llvm_blake3_hash_many_avx512 PROC push r15 push r14 push r13 @@ -2404,12 +2404,12 @@ endroundloop1: vmovdqu xmmword ptr [rbx+10H], xmm1 jmp unwind -_blake3_hash_many_avx512 ENDP -blake3_hash_many_avx512 ENDP +_llvm_blake3_hash_many_avx512 ENDP +llvm_blake3_hash_many_avx512 ENDP ALIGN 16 -blake3_compress_in_place_avx512 PROC -_blake3_compress_in_place_avx512 PROC +llvm_blake3_compress_in_place_avx512 PROC +_llvm_blake3_compress_in_place_avx512 PROC sub rsp, 72 vmovdqa xmmword ptr [rsp], xmm6 vmovdqa xmmword ptr [rsp+10H], xmm7 @@ -2498,12 +2498,12 @@ _blake3_compress_in_place_avx512 PROC vmovdqa xmm9, xmmword ptr [rsp+30H] add rsp, 72 ret -_blake3_compress_in_place_avx512 ENDP -blake3_compress_in_place_avx512 ENDP +_llvm_blake3_compress_in_place_avx512 ENDP +llvm_blake3_compress_in_place_avx512 ENDP ALIGN 16 -blake3_compress_xof_avx512 PROC -_blake3_compress_xof_avx512 PROC +llvm_blake3_compress_xof_avx512 PROC +_llvm_blake3_compress_xof_avx512 PROC sub rsp, 72 vmovdqa xmmword ptr [rsp], xmm6 vmovdqa xmmword ptr [rsp+10H], xmm7 @@ -2597,8 +2597,8 @@ _blake3_compress_xof_avx512 PROC vmovdqa xmm9, xmmword ptr [rsp+30H] add rsp, 72 ret -_blake3_compress_xof_avx512 ENDP -blake3_compress_xof_avx512 ENDP +_llvm_blake3_compress_xof_avx512 ENDP +llvm_blake3_compress_xof_avx512 ENDP _TEXT ENDS diff --git a/llvm/lib/Support/BLAKE3/blake3_impl.h b/llvm/lib/Support/BLAKE3/blake3_impl.h index 180d0a6eeda8..8e5456d745cd 100644 --- a/llvm/lib/Support/BLAKE3/blake3_impl.h +++ b/llvm/lib/Support/BLAKE3/blake3_impl.h @@ -11,15 +11,7 @@ // For \p LLVM_LIBRARY_VISIBILITY #include "llvm/Support/Compiler.h" -// Remove the 'llvm_' prefix for the rest of the internal implementation. -#define BLAKE3_VERSION_STRING LLVM_BLAKE3_VERSION_STRING -#define BLAKE3_KEY_LEN LLVM_BLAKE3_KEY_LEN -#define BLAKE3_OUT_LEN LLVM_BLAKE3_OUT_LEN -#define BLAKE3_BLOCK_LEN LLVM_BLAKE3_BLOCK_LEN -#define BLAKE3_CHUNK_LEN LLVM_BLAKE3_CHUNK_LEN -#define BLAKE3_MAX_DEPTH LLVM_BLAKE3_MAX_DEPTH -#define blake3_hasher llvm_blake3_hasher -#define blake3_chunk_state llvm_blake3_chunk_state +#include "llvm_blake3_prefix.h" // internal flags enum blake3_flags { diff --git a/llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_unix.S b/llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_unix.S index 0106b13ba851..9a4f5eb7318b 100644 --- a/llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_unix.S +++ b/llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_unix.S @@ -1,5 +1,7 @@ #if defined(__x86_64__) +#include "llvm_blake3_prefix.h" + #if defined(__ELF__) && defined(__linux__) .section .note.GNU-stack,"",%progbits #endif diff --git a/llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_windows_gnu.S b/llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_windows_gnu.S index 8852ba5976e1..bf3b4523a9f1 100644 --- a/llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_windows_gnu.S +++ b/llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_windows_gnu.S @@ -1,3 +1,5 @@ +#include "llvm_blake3_prefix.h" + .intel_syntax noprefix .global blake3_hash_many_sse2 .global _blake3_hash_many_sse2 diff --git a/llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_windows_msvc.asm b/llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_windows_msvc.asm index 507502f11a80..1069c8df4ed6 100644 --- a/llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_windows_msvc.asm +++ b/llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_windows_msvc.asm @@ -1,15 +1,15 @@ -public _blake3_hash_many_sse2 -public blake3_hash_many_sse2 -public blake3_compress_in_place_sse2 -public _blake3_compress_in_place_sse2 -public blake3_compress_xof_sse2 -public _blake3_compress_xof_sse2 +public _llvm_blake3_hash_many_sse2 +public llvm_blake3_hash_many_sse2 +public llvm_blake3_compress_in_place_sse2 +public _llvm_blake3_compress_in_place_sse2 +public llvm_blake3_compress_xof_sse2 +public _llvm_blake3_compress_xof_sse2 _TEXT SEGMENT ALIGN(16) 'CODE' ALIGN 16 -blake3_hash_many_sse2 PROC -_blake3_hash_many_sse2 PROC +llvm_blake3_hash_many_sse2 PROC +_llvm_blake3_hash_many_sse2 PROC push r15 push r14 push r13 @@ -2034,11 +2034,11 @@ endroundloop1: movups xmmword ptr [rbx], xmm0 movups xmmword ptr [rbx+10H], xmm1 jmp unwind -_blake3_hash_many_sse2 ENDP -blake3_hash_many_sse2 ENDP +_llvm_blake3_hash_many_sse2 ENDP +llvm_blake3_hash_many_sse2 ENDP -blake3_compress_in_place_sse2 PROC -_blake3_compress_in_place_sse2 PROC +llvm_blake3_compress_in_place_sse2 PROC +_llvm_blake3_compress_in_place_sse2 PROC sub rsp, 120 movdqa xmmword ptr [rsp], xmm6 movdqa xmmword ptr [rsp+10H], xmm7 @@ -2164,12 +2164,12 @@ _blake3_compress_in_place_sse2 PROC movdqa xmm15, xmmword ptr [rsp+60H] add rsp, 120 ret -_blake3_compress_in_place_sse2 ENDP -blake3_compress_in_place_sse2 ENDP +_llvm_blake3_compress_in_place_sse2 ENDP +llvm_blake3_compress_in_place_sse2 ENDP ALIGN 16 -blake3_compress_xof_sse2 PROC -_blake3_compress_xof_sse2 PROC +llvm_blake3_compress_xof_sse2 PROC +_llvm_blake3_compress_xof_sse2 PROC sub rsp, 120 movdqa xmmword ptr [rsp], xmm6 movdqa xmmword ptr [rsp+10H], xmm7 @@ -2302,8 +2302,8 @@ _blake3_compress_xof_sse2 PROC movdqa xmm15, xmmword ptr [rsp+60H] add rsp, 120 ret -_blake3_compress_xof_sse2 ENDP -blake3_compress_xof_sse2 ENDP +_llvm_blake3_compress_xof_sse2 ENDP +llvm_blake3_compress_xof_sse2 ENDP _TEXT ENDS diff --git a/llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_unix.S b/llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_unix.S index 4e918c5bb2cc..1be4ed744426 100644 --- a/llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_unix.S +++ b/llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_unix.S @@ -1,5 +1,7 @@ #if defined(__x86_64__) +#include "llvm_blake3_prefix.h" + #if defined(__ELF__) && defined(__linux__) .section .note.GNU-stack,"",%progbits #endif diff --git a/llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_windows_gnu.S b/llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_windows_gnu.S index 60d0a4042e71..28bdf3890a29 100644 --- a/llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_windows_gnu.S +++ b/llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_windows_gnu.S @@ -1,3 +1,5 @@ +#include "llvm_blake3_prefix.h" + .intel_syntax noprefix .global blake3_hash_many_sse41 .global _blake3_hash_many_sse41 diff --git a/llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_windows_msvc.asm b/llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_windows_msvc.asm index 8966c7b84406..770935372cd9 100644 --- a/llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_windows_msvc.asm +++ b/llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_windows_msvc.asm @@ -1,15 +1,15 @@ -public _blake3_hash_many_sse41 -public blake3_hash_many_sse41 -public blake3_compress_in_place_sse41 -public _blake3_compress_in_place_sse41 -public blake3_compress_xof_sse41 -public _blake3_compress_xof_sse41 +public _llvm_blake3_hash_many_sse41 +public llvm_blake3_hash_many_sse41 +public llvm_blake3_compress_in_place_sse41 +public _llvm_blake3_compress_in_place_sse41 +public llvm_blake3_compress_xof_sse41 +public _llvm_blake3_compress_xof_sse41 _TEXT SEGMENT ALIGN(16) 'CODE' ALIGN 16 -blake3_hash_many_sse41 PROC -_blake3_hash_many_sse41 PROC +llvm_blake3_hash_many_sse41 PROC +_llvm_blake3_hash_many_sse41 PROC push r15 push r14 push r13 @@ -1797,11 +1797,11 @@ endroundloop1: movups xmmword ptr [rbx], xmm0 movups xmmword ptr [rbx+10H], xmm1 jmp unwind -_blake3_hash_many_sse41 ENDP -blake3_hash_many_sse41 ENDP +_llvm_blake3_hash_many_sse41 ENDP +llvm_blake3_hash_many_sse41 ENDP -blake3_compress_in_place_sse41 PROC -_blake3_compress_in_place_sse41 PROC +llvm_blake3_compress_in_place_sse41 PROC +_llvm_blake3_compress_in_place_sse41 PROC sub rsp, 120 movdqa xmmword ptr [rsp], xmm6 movdqa xmmword ptr [rsp+10H], xmm7 @@ -1916,12 +1916,12 @@ _blake3_compress_in_place_sse41 PROC movdqa xmm15, xmmword ptr [rsp+60H] add rsp, 120 ret -_blake3_compress_in_place_sse41 ENDP -blake3_compress_in_place_sse41 ENDP +_llvm_blake3_compress_in_place_sse41 ENDP +llvm_blake3_compress_in_place_sse41 ENDP ALIGN 16 -blake3_compress_xof_sse41 PROC -_blake3_compress_xof_sse41 PROC +llvm_blake3_compress_xof_sse41 PROC +_llvm_blake3_compress_xof_sse41 PROC sub rsp, 120 movdqa xmmword ptr [rsp], xmm6 movdqa xmmword ptr [rsp+10H], xmm7 @@ -2043,8 +2043,8 @@ _blake3_compress_xof_sse41 PROC movdqa xmm15, xmmword ptr [rsp+60H] add rsp, 120 ret -_blake3_compress_xof_sse41 ENDP -blake3_compress_xof_sse41 ENDP +_llvm_blake3_compress_xof_sse41 ENDP +llvm_blake3_compress_xof_sse41 ENDP _TEXT ENDS diff --git a/llvm/lib/Support/BLAKE3/llvm_blake3_prefix.h b/llvm/lib/Support/BLAKE3/llvm_blake3_prefix.h new file mode 100644 index 000000000000..3cee3691e4cf --- /dev/null +++ b/llvm/lib/Support/BLAKE3/llvm_blake3_prefix.h @@ -0,0 +1,41 @@ +#ifndef LLVM_BLAKE3_PREFIX_H +#define LLVM_BLAKE3_PREFIX_H + +#define BLAKE3_VERSION_STRING LLVM_BLAKE3_VERSION_STRING +#define BLAKE3_KEY_LEN LLVM_BLAKE3_KEY_LEN +#define BLAKE3_OUT_LEN LLVM_BLAKE3_OUT_LEN +#define BLAKE3_BLOCK_LEN LLVM_BLAKE3_BLOCK_LEN +#define BLAKE3_CHUNK_LEN LLVM_BLAKE3_CHUNK_LEN +#define BLAKE3_MAX_DEPTH LLVM_BLAKE3_MAX_DEPTH +#define blake3_hasher llvm_blake3_hasher +#define blake3_chunk_state llvm_blake3_chunk_state +#define blake3_compress_in_place llvm_blake3_compress_in_place +#define blake3_compress_xof llvm_blake3_compress_xof +#define blake3_hash_many llvm_blake3_hash_many +#define blake3_simd_degree llvm_blake3_simd_degree +#define blake3_compress_in_place_portable llvm_blake3_compress_in_place_portable +#define blake3_compress_xof_portable llvm_blake3_compress_xof_portable +#define blake3_hash_many_portable llvm_blake3_hash_many_portable +#define blake3_compress_in_place_sse2 llvm_blake3_compress_in_place_sse2 +#define _blake3_compress_in_place_sse2 _llvm_blake3_compress_in_place_sse2 +#define blake3_compress_xof_sse2 llvm_blake3_compress_xof_sse2 +#define _blake3_compress_xof_sse2 _llvm_blake3_compress_xof_sse2 +#define blake3_hash_many_sse2 llvm_blake3_hash_many_sse2 +#define _blake3_hash_many_sse2 _llvm_blake3_hash_many_sse2 +#define blake3_compress_in_place_sse41 llvm_blake3_compress_in_place_sse41 +#define _blake3_compress_in_place_sse41 _llvm_blake3_compress_in_place_sse41 +#define blake3_compress_xof_sse41 llvm_blake3_compress_xof_sse41 +#define _blake3_compress_xof_sse41 _llvm_blake3_compress_xof_sse41 +#define blake3_hash_many_sse41 llvm_blake3_hash_many_sse41 +#define _blake3_hash_many_sse41 _llvm_blake3_hash_many_sse41 +#define blake3_hash_many_avx2 llvm_blake3_hash_many_avx2 +#define _blake3_hash_many_avx2 _llvm_blake3_hash_many_avx2 +#define blake3_compress_in_place_avx512 llvm_blake3_compress_in_place_avx512 +#define _blake3_compress_in_place_avx512 _llvm_blake3_compress_in_place_avx512 +#define blake3_compress_xof_avx512 llvm_blake3_compress_xof_avx512 +#define _blake3_compress_xof_avx512 _llvm_blake3_compress_xof_avx512 +#define blake3_hash_many_avx512 llvm_blake3_hash_many_avx512 +#define _blake3_hash_many_avx512 _llvm_blake3_hash_many_avx512 +#define blake3_hash_many_neon llvm_blake3_hash_many_neon + +#endif /* LLVM_BLAKE3_PREFIX_H */ diff --git a/llvm/lib/Support/BalancedPartitioning.cpp b/llvm/lib/Support/BalancedPartitioning.cpp new file mode 100644 index 000000000000..113e9484f528 --- /dev/null +++ b/llvm/lib/Support/BalancedPartitioning.cpp @@ -0,0 +1,337 @@ +//===- BalancedPartitioning.cpp -------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements BalancedPartitioning, a recursive balanced graph +// partitioning algorithm. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/BalancedPartitioning.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/ThreadPool.h" + +using namespace llvm; +#define DEBUG_TYPE "balanced-partitioning" + +void BPFunctionNode::dump(raw_ostream &OS) const { + OS << formatv("{{ID={0} Utilities={{{1:$[,]}} Bucket={2}}", Id, + make_range(UtilityNodes.begin(), UtilityNodes.end()), Bucket); +} + +template <typename Func> +void BalancedPartitioning::BPThreadPool::async(Func &&F) { +#if LLVM_ENABLE_THREADS + // This new thread could spawn more threads, so mark it as active + ++NumActiveThreads; + TheThreadPool.async([=]() { + // Run the task + F(); + + // This thread will no longer spawn new threads, so mark it as inactive + if (--NumActiveThreads == 0) { + // There are no more active threads, so mark as finished and notify + { + std::unique_lock<std::mutex> lock(mtx); + assert(!IsFinishedSpawning); + IsFinishedSpawning = true; + } + cv.notify_one(); + } + }); +#else + llvm_unreachable("threads are disabled"); +#endif +} + +void BalancedPartitioning::BPThreadPool::wait() { +#if LLVM_ENABLE_THREADS + // TODO: We could remove the mutex and condition variable and use + // std::atomic::wait() instead, but that isn't available until C++20 + { + std::unique_lock<std::mutex> lock(mtx); + cv.wait(lock, [&]() { return IsFinishedSpawning; }); + assert(IsFinishedSpawning && NumActiveThreads == 0); + } + // Now we can call ThreadPool::wait() since all tasks have been submitted + TheThreadPool.wait(); +#else + llvm_unreachable("threads are disabled"); +#endif +} + +BalancedPartitioning::BalancedPartitioning( + const BalancedPartitioningConfig &Config) + : Config(Config) { + // Pre-computing log2 values + Log2Cache[0] = 0.0; + for (unsigned I = 1; I < LOG_CACHE_SIZE; I++) + Log2Cache[I] = std::log2(I); +} + +void BalancedPartitioning::run(std::vector<BPFunctionNode> &Nodes) const { + LLVM_DEBUG( + dbgs() << format( + "Partitioning %d nodes using depth %d and %d iterations per split\n", + Nodes.size(), Config.SplitDepth, Config.IterationsPerSplit)); + std::optional<BPThreadPool> TP; +#if LLVM_ENABLE_THREADS + ThreadPool TheThreadPool; + if (Config.TaskSplitDepth > 1) + TP.emplace(TheThreadPool); +#endif + + // Record the input order + for (unsigned I = 0; I < Nodes.size(); I++) + Nodes[I].InputOrderIndex = I; + + auto NodesRange = llvm::make_range(Nodes.begin(), Nodes.end()); + auto BisectTask = [=, &TP]() { + bisect(NodesRange, /*RecDepth=*/0, /*RootBucket=*/1, /*Offset=*/0, TP); + }; + if (TP) { + TP->async(std::move(BisectTask)); + TP->wait(); + } else { + BisectTask(); + } + + llvm::stable_sort(NodesRange, [](const auto &L, const auto &R) { + return L.Bucket < R.Bucket; + }); + + LLVM_DEBUG(dbgs() << "Balanced partitioning completed\n"); +} + +void BalancedPartitioning::bisect(const FunctionNodeRange Nodes, + unsigned RecDepth, unsigned RootBucket, + unsigned Offset, + std::optional<BPThreadPool> &TP) const { + unsigned NumNodes = std::distance(Nodes.begin(), Nodes.end()); + if (NumNodes <= 1 || RecDepth >= Config.SplitDepth) { + // We've reach the lowest level of the recursion tree. Fall back to the + // original order and assign to buckets. + llvm::stable_sort(Nodes, [](const auto &L, const auto &R) { + return L.InputOrderIndex < R.InputOrderIndex; + }); + for (auto &N : Nodes) + N.Bucket = Offset++; + return; + } + + LLVM_DEBUG(dbgs() << format("Bisect with %d nodes and root bucket %d\n", + NumNodes, RootBucket)); + + std::mt19937 RNG(RootBucket); + + unsigned LeftBucket = 2 * RootBucket; + unsigned RightBucket = 2 * RootBucket + 1; + + // Split into two and assign to the left and right buckets + split(Nodes, LeftBucket); + + runIterations(Nodes, RecDepth, LeftBucket, RightBucket, RNG); + + // Split nodes wrt the resulting buckets + auto NodesMid = + llvm::partition(Nodes, [&](auto &N) { return N.Bucket == LeftBucket; }); + unsigned MidOffset = Offset + std::distance(Nodes.begin(), NodesMid); + + auto LeftNodes = llvm::make_range(Nodes.begin(), NodesMid); + auto RightNodes = llvm::make_range(NodesMid, Nodes.end()); + + auto LeftRecTask = [=, &TP]() { + bisect(LeftNodes, RecDepth + 1, LeftBucket, Offset, TP); + }; + auto RightRecTask = [=, &TP]() { + bisect(RightNodes, RecDepth + 1, RightBucket, MidOffset, TP); + }; + + if (TP && RecDepth < Config.TaskSplitDepth && NumNodes >= 4) { + TP->async(std::move(LeftRecTask)); + TP->async(std::move(RightRecTask)); + } else { + LeftRecTask(); + RightRecTask(); + } +} + +void BalancedPartitioning::runIterations(const FunctionNodeRange Nodes, + unsigned RecDepth, unsigned LeftBucket, + unsigned RightBucket, + std::mt19937 &RNG) const { + unsigned NumNodes = std::distance(Nodes.begin(), Nodes.end()); + DenseMap<BPFunctionNode::UtilityNodeT, unsigned> UtilityNodeDegree; + for (auto &N : Nodes) + for (auto &UN : N.UtilityNodes) + ++UtilityNodeDegree[UN]; + // Remove utility nodes if they have just one edge or are connected to all + // functions + for (auto &N : Nodes) + llvm::erase_if(N.UtilityNodes, [&](auto &UN) { + return UtilityNodeDegree[UN] <= 1 || UtilityNodeDegree[UN] >= NumNodes; + }); + + // Renumber utility nodes so they can be used to index into Signatures + DenseMap<BPFunctionNode::UtilityNodeT, unsigned> UtilityNodeIndex; + for (auto &N : Nodes) + for (auto &UN : N.UtilityNodes) + if (!UtilityNodeIndex.count(UN)) + UtilityNodeIndex[UN] = UtilityNodeIndex.size(); + for (auto &N : Nodes) + for (auto &UN : N.UtilityNodes) + UN = UtilityNodeIndex[UN]; + + // Initialize signatures + SignaturesT Signatures(/*Size=*/UtilityNodeIndex.size()); + for (auto &N : Nodes) { + for (auto &UN : N.UtilityNodes) { + assert(UN < Signatures.size()); + if (N.Bucket == LeftBucket) { + Signatures[UN].LeftCount++; + } else { + Signatures[UN].RightCount++; + } + } + } + + for (unsigned I = 0; I < Config.IterationsPerSplit; I++) { + unsigned NumMovedNodes = + runIteration(Nodes, LeftBucket, RightBucket, Signatures, RNG); + if (NumMovedNodes == 0) + break; + } +} + +unsigned BalancedPartitioning::runIteration(const FunctionNodeRange Nodes, + unsigned LeftBucket, + unsigned RightBucket, + SignaturesT &Signatures, + std::mt19937 &RNG) const { + // Init signature cost caches + for (auto &Signature : Signatures) { + if (Signature.CachedGainIsValid) + continue; + unsigned L = Signature.LeftCount; + unsigned R = Signature.RightCount; + assert((L > 0 || R > 0) && "incorrect signature"); + float Cost = logCost(L, R); + Signature.CachedGainLR = 0.f; + Signature.CachedGainRL = 0.f; + if (L > 0) + Signature.CachedGainLR = Cost - logCost(L - 1, R + 1); + if (R > 0) + Signature.CachedGainRL = Cost - logCost(L + 1, R - 1); + Signature.CachedGainIsValid = true; + } + + // Compute move gains + typedef std::pair<float, BPFunctionNode *> GainPair; + std::vector<GainPair> Gains; + for (auto &N : Nodes) { + bool FromLeftToRight = (N.Bucket == LeftBucket); + float Gain = moveGain(N, FromLeftToRight, Signatures); + Gains.push_back(std::make_pair(Gain, &N)); + } + + // Collect left and right gains + auto LeftEnd = llvm::partition( + Gains, [&](const auto &GP) { return GP.second->Bucket == LeftBucket; }); + auto LeftRange = llvm::make_range(Gains.begin(), LeftEnd); + auto RightRange = llvm::make_range(LeftEnd, Gains.end()); + + // Sort gains in descending order + auto LargerGain = [](const auto &L, const auto &R) { + return L.first > R.first; + }; + llvm::stable_sort(LeftRange, LargerGain); + llvm::stable_sort(RightRange, LargerGain); + + unsigned NumMovedDataVertices = 0; + for (auto [LeftPair, RightPair] : llvm::zip(LeftRange, RightRange)) { + auto &[LeftGain, LeftNode] = LeftPair; + auto &[RightGain, RightNode] = RightPair; + // Stop when the gain is no longer beneficial + if (LeftGain + RightGain <= 0.f) + break; + // Try to exchange the nodes between buckets + if (moveFunctionNode(*LeftNode, LeftBucket, RightBucket, Signatures, RNG)) + ++NumMovedDataVertices; + if (moveFunctionNode(*RightNode, LeftBucket, RightBucket, Signatures, RNG)) + ++NumMovedDataVertices; + } + return NumMovedDataVertices; +} + +bool BalancedPartitioning::moveFunctionNode(BPFunctionNode &N, + unsigned LeftBucket, + unsigned RightBucket, + SignaturesT &Signatures, + std::mt19937 &RNG) const { + // Sometimes we skip the move. This helps to escape local optima + if (std::uniform_real_distribution<float>(0.f, 1.f)(RNG) <= + Config.SkipProbability) + return false; + + bool FromLeftToRight = (N.Bucket == LeftBucket); + // Update the current bucket + N.Bucket = (FromLeftToRight ? RightBucket : LeftBucket); + + // Update signatures and invalidate gain cache + if (FromLeftToRight) { + for (auto &UN : N.UtilityNodes) { + auto &Signature = Signatures[UN]; + Signature.LeftCount--; + Signature.RightCount++; + Signature.CachedGainIsValid = false; + } + } else { + for (auto &UN : N.UtilityNodes) { + auto &Signature = Signatures[UN]; + Signature.LeftCount++; + Signature.RightCount--; + Signature.CachedGainIsValid = false; + } + } + return true; +} + +void BalancedPartitioning::split(const FunctionNodeRange Nodes, + unsigned StartBucket) const { + unsigned NumNodes = std::distance(Nodes.begin(), Nodes.end()); + auto NodesMid = Nodes.begin() + (NumNodes + 1) / 2; + + std::nth_element(Nodes.begin(), NodesMid, Nodes.end(), [](auto &L, auto &R) { + return L.InputOrderIndex < R.InputOrderIndex; + }); + + for (auto &N : llvm::make_range(Nodes.begin(), NodesMid)) + N.Bucket = StartBucket; + for (auto &N : llvm::make_range(NodesMid, Nodes.end())) + N.Bucket = StartBucket + 1; +} + +float BalancedPartitioning::moveGain(const BPFunctionNode &N, + bool FromLeftToRight, + const SignaturesT &Signatures) { + float Gain = 0.f; + for (auto &UN : N.UtilityNodes) + Gain += (FromLeftToRight ? Signatures[UN].CachedGainLR + : Signatures[UN].CachedGainRL); + return Gain; +} + +float BalancedPartitioning::logCost(unsigned X, unsigned Y) const { + return -(X * log2Cached(X + 1) + Y * log2Cached(Y + 1)); +} + +float BalancedPartitioning::log2Cached(unsigned i) const { + return (i < LOG_CACHE_SIZE) ? Log2Cache[i] : std::log2(i); +} diff --git a/llvm/lib/Support/BinaryStreamWriter.cpp b/llvm/lib/Support/BinaryStreamWriter.cpp index dc4ea200c7be..3d87a30a86a1 100644 --- a/llvm/lib/Support/BinaryStreamWriter.cpp +++ b/llvm/lib/Support/BinaryStreamWriter.cpp @@ -8,6 +8,7 @@ #include "llvm/Support/BinaryStreamWriter.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/Support/BinaryStreamReader.h" #include "llvm/Support/BinaryStreamRef.h" #include "llvm/Support/LEB128.h" diff --git a/llvm/lib/Support/BlockFrequency.cpp b/llvm/lib/Support/BlockFrequency.cpp index 702165ac480b..a4a1e477d940 100644 --- a/llvm/lib/Support/BlockFrequency.cpp +++ b/llvm/lib/Support/BlockFrequency.cpp @@ -12,7 +12,6 @@ #include "llvm/Support/BlockFrequency.h" #include "llvm/Support/BranchProbability.h" -#include <cassert> using namespace llvm; @@ -37,47 +36,3 @@ BlockFrequency BlockFrequency::operator/(BranchProbability Prob) const { Freq /= Prob; return Freq; } - -BlockFrequency &BlockFrequency::operator+=(BlockFrequency Freq) { - uint64_t Before = Freq.Frequency; - Frequency += Freq.Frequency; - - // If overflow, set frequency to the maximum value. - if (Frequency < Before) - Frequency = UINT64_MAX; - - return *this; -} - -BlockFrequency BlockFrequency::operator+(BlockFrequency Freq) const { - BlockFrequency NewFreq(Frequency); - NewFreq += Freq; - return NewFreq; -} - -BlockFrequency &BlockFrequency::operator-=(BlockFrequency Freq) { - // If underflow, set frequency to 0. - if (Frequency <= Freq.Frequency) - Frequency = 0; - else - Frequency -= Freq.Frequency; - return *this; -} - -BlockFrequency BlockFrequency::operator-(BlockFrequency Freq) const { - BlockFrequency NewFreq(Frequency); - NewFreq -= Freq; - return NewFreq; -} - -BlockFrequency &BlockFrequency::operator>>=(const unsigned count) { - // Frequency can never be 0 by design. - assert(Frequency != 0); - - // Shift right by count. - Frequency >>= count; - - // Saturate to 1 if we are 0. - Frequency |= Frequency == 0; - return *this; -} diff --git a/llvm/lib/Support/Chrono.cpp b/llvm/lib/Support/Chrono.cpp index 8c28d45d8822..859ece8f5500 100644 --- a/llvm/lib/Support/Chrono.cpp +++ b/llvm/lib/Support/Chrono.cpp @@ -74,7 +74,7 @@ void format_provider<TimePoint<>>::format(const TimePoint<> &T, raw_ostream &OS, continue; case 'N': // Nanoseconds, from date(1). FStream << llvm::format( - "%.6lu", (long)duration_cast<nanoseconds>(Fractional).count()); + "%.9lu", (long)duration_cast<nanoseconds>(Fractional).count()); ++I; continue; case '%': // Consume %%, so %%f parses as (%%)f not %(%f) diff --git a/llvm/lib/Support/CommandLine.cpp b/llvm/lib/Support/CommandLine.cpp index 66632504d6fb..d3efb8b67be5 100644 --- a/llvm/lib/Support/CommandLine.cpp +++ b/llvm/lib/Support/CommandLine.cpp @@ -208,8 +208,7 @@ public: bool HadErrors = false; if (O->hasArgStr()) { // If it's a DefaultOption, check to make sure it isn't already there. - if (O->isDefaultOption() && - SC->OptionsMap.find(O->ArgStr) != SC->OptionsMap.end()) + if (O->isDefaultOption() && SC->OptionsMap.contains(O->ArgStr)) return; // Add argument to the argument map! @@ -2758,7 +2757,7 @@ StringMap<Option *> &cl::getRegisteredOptions(SubCommand &Sub) { initCommonOptions(); auto &Subs = GlobalParser->RegisteredSubCommands; (void)Subs; - assert(is_contained(Subs, &Sub)); + assert(Subs.contains(&Sub)); return Sub.OptionsMap; } diff --git a/llvm/lib/Support/ConvertEBCDIC.cpp b/llvm/lib/Support/ConvertEBCDIC.cpp new file mode 100644 index 000000000000..08eeaa52a6c9 --- /dev/null +++ b/llvm/lib/Support/ConvertEBCDIC.cpp @@ -0,0 +1,123 @@ +//===--- ConvertEBCDIC.cpp - UTF8/EBCDIC CharSet Conversion -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file provides utility functions for converting between EBCDIC-1047 and +/// UTF-8. +/// +/// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/ConvertEBCDIC.h" + +using namespace llvm; + +static const unsigned char ISO88591ToIBM1047[256] = { + 0x00, 0x01, 0x02, 0x03, 0x37, 0x2d, 0x2e, 0x2f, 0x16, 0x05, 0x15, 0x0b, + 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x3c, 0x3d, 0x32, 0x26, + 0x18, 0x19, 0x3f, 0x27, 0x1c, 0x1d, 0x1e, 0x1f, 0x40, 0x5a, 0x7f, 0x7b, + 0x5b, 0x6c, 0x50, 0x7d, 0x4d, 0x5d, 0x5c, 0x4e, 0x6b, 0x60, 0x4b, 0x61, + 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0x7a, 0x5e, + 0x4c, 0x7e, 0x6e, 0x6f, 0x7c, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, + 0xc8, 0xc9, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xe2, + 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xad, 0xe0, 0xbd, 0x5f, 0x6d, + 0x79, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, + 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, + 0xa7, 0xa8, 0xa9, 0xc0, 0x4f, 0xd0, 0xa1, 0x07, 0x20, 0x21, 0x22, 0x23, + 0x24, 0x25, 0x06, 0x17, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x09, 0x0a, 0x1b, + 0x30, 0x31, 0x1a, 0x33, 0x34, 0x35, 0x36, 0x08, 0x38, 0x39, 0x3a, 0x3b, + 0x04, 0x14, 0x3e, 0xff, 0x41, 0xaa, 0x4a, 0xb1, 0x9f, 0xb2, 0x6a, 0xb5, + 0xbb, 0xb4, 0x9a, 0x8a, 0xb0, 0xca, 0xaf, 0xbc, 0x90, 0x8f, 0xea, 0xfa, + 0xbe, 0xa0, 0xb6, 0xb3, 0x9d, 0xda, 0x9b, 0x8b, 0xb7, 0xb8, 0xb9, 0xab, + 0x64, 0x65, 0x62, 0x66, 0x63, 0x67, 0x9e, 0x68, 0x74, 0x71, 0x72, 0x73, + 0x78, 0x75, 0x76, 0x77, 0xac, 0x69, 0xed, 0xee, 0xeb, 0xef, 0xec, 0xbf, + 0x80, 0xfd, 0xfe, 0xfb, 0xfc, 0xba, 0xae, 0x59, 0x44, 0x45, 0x42, 0x46, + 0x43, 0x47, 0x9c, 0x48, 0x54, 0x51, 0x52, 0x53, 0x58, 0x55, 0x56, 0x57, + 0x8c, 0x49, 0xcd, 0xce, 0xcb, 0xcf, 0xcc, 0xe1, 0x70, 0xdd, 0xde, 0xdb, + 0xdc, 0x8d, 0x8e, 0xdf}; + +static const unsigned char IBM1047ToISO88591[256] = { + 0x00, 0x01, 0x02, 0x03, 0x9c, 0x09, 0x86, 0x7f, 0x97, 0x8d, 0x8e, 0x0b, + 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x9d, 0x0a, 0x08, 0x87, + 0x18, 0x19, 0x92, 0x8f, 0x1c, 0x1d, 0x1e, 0x1f, 0x80, 0x81, 0x82, 0x83, + 0x84, 0x85, 0x17, 0x1b, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x05, 0x06, 0x07, + 0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04, 0x98, 0x99, 0x9a, 0x9b, + 0x14, 0x15, 0x9e, 0x1a, 0x20, 0xa0, 0xe2, 0xe4, 0xe0, 0xe1, 0xe3, 0xe5, + 0xe7, 0xf1, 0xa2, 0x2e, 0x3c, 0x28, 0x2b, 0x7c, 0x26, 0xe9, 0xea, 0xeb, + 0xe8, 0xed, 0xee, 0xef, 0xec, 0xdf, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0x5e, + 0x2d, 0x2f, 0xc2, 0xc4, 0xc0, 0xc1, 0xc3, 0xc5, 0xc7, 0xd1, 0xa6, 0x2c, + 0x25, 0x5f, 0x3e, 0x3f, 0xf8, 0xc9, 0xca, 0xcb, 0xc8, 0xcd, 0xce, 0xcf, + 0xcc, 0x60, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22, 0xd8, 0x61, 0x62, 0x63, + 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xab, 0xbb, 0xf0, 0xfd, 0xfe, 0xb1, + 0xb0, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0xaa, 0xba, + 0xe6, 0xb8, 0xc6, 0xa4, 0xb5, 0x7e, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, + 0x79, 0x7a, 0xa1, 0xbf, 0xd0, 0x5b, 0xde, 0xae, 0xac, 0xa3, 0xa5, 0xb7, + 0xa9, 0xa7, 0xb6, 0xbc, 0xbd, 0xbe, 0xdd, 0xa8, 0xaf, 0x5d, 0xb4, 0xd7, + 0x7b, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0xad, 0xf4, + 0xf6, 0xf2, 0xf3, 0xf5, 0x7d, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, + 0x51, 0x52, 0xb9, 0xfb, 0xfc, 0xf9, 0xfa, 0xff, 0x5c, 0xf7, 0x53, 0x54, + 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0xb2, 0xd4, 0xd6, 0xd2, 0xd3, 0xd5, + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0xb3, 0xdb, + 0xdc, 0xd9, 0xda, 0x9f}; + +std::error_code +ConverterEBCDIC::convertToEBCDIC(StringRef Source, + SmallVectorImpl<char> &Result) { + assert(Result.empty() && "Result must be empty!"); + const unsigned char *Table = ISO88591ToIBM1047; + const unsigned char *Ptr = + reinterpret_cast<const unsigned char *>(Source.data()); + size_t Length = Source.size(); + Result.reserve(Length); + while (Length--) { + unsigned char Ch = *Ptr++; + // Handle UTF-8 2-byte-sequences in input. + if (Ch >= 128) { + // Only two-byte sequences can be decoded. + if (Ch != 0xc2 && Ch != 0xc3) + return std::make_error_code(std::errc::illegal_byte_sequence); + // Is buffer truncated? + if (!Length) + return std::make_error_code(std::errc::invalid_argument); + unsigned char Ch2 = *Ptr++; + // Is second byte well-formed? + if ((Ch2 & 0xc0) != 0x80) + return std::make_error_code(std::errc::illegal_byte_sequence); + Ch = Ch2 | (Ch << 6); + Length--; + } + // Translate the character. + Ch = Table[Ch]; + Result.push_back(static_cast<char>(Ch)); + } + return std::error_code(); +} + +void ConverterEBCDIC::convertToUTF8(StringRef Source, + SmallVectorImpl<char> &Result) { + assert(Result.empty() && "Result must be empty!"); + + const unsigned char *Table = IBM1047ToISO88591; + const unsigned char *Ptr = + reinterpret_cast<const unsigned char *>(Source.data()); + size_t Length = Source.size(); + Result.reserve(Length); + while (Length--) { + unsigned char Ch = *Ptr++; + // Translate the character. + Ch = Table[Ch]; + // Handle UTF-8 2-byte-sequences in output. + if (Ch >= 128) { + // First byte prefixed with either 0xc2 or 0xc3. + Result.push_back(static_cast<char>(0xc0 | (Ch >> 6))); + // Second byte is either the same as the ASCII byte or ASCII byte -64. + Ch = Ch & 0xbf; + } + Result.push_back(static_cast<char>(Ch)); + } +} diff --git a/llvm/lib/Support/ConvertUTFWrapper.cpp b/llvm/lib/Support/ConvertUTFWrapper.cpp index 9bf3f8f8b897..3fa7365e72d3 100644 --- a/llvm/lib/Support/ConvertUTFWrapper.cpp +++ b/llvm/lib/Support/ConvertUTFWrapper.cpp @@ -102,7 +102,7 @@ bool convertUTF16ToUTF8String(ArrayRef<char> SrcBytes, std::string &Out) { if (Src[0] == UNI_UTF16_BYTE_ORDER_MARK_SWAPPED) { ByteSwapped.insert(ByteSwapped.end(), Src, SrcEnd); for (UTF16 &I : ByteSwapped) - I = llvm::ByteSwap_16(I); + I = llvm::byteswap<uint16_t>(I); Src = &ByteSwapped[0]; SrcEnd = &ByteSwapped[ByteSwapped.size() - 1] + 1; } @@ -160,7 +160,7 @@ bool convertUTF32ToUTF8String(ArrayRef<char> SrcBytes, std::string &Out) { if (Src[0] == UNI_UTF32_BYTE_ORDER_MARK_SWAPPED) { ByteSwapped.insert(ByteSwapped.end(), Src, SrcEnd); for (UTF32 &I : ByteSwapped) - I = llvm::ByteSwap_32(I); + I = llvm::byteswap<uint32_t>(I); Src = &ByteSwapped[0]; SrcEnd = &ByteSwapped[ByteSwapped.size() - 1] + 1; } diff --git a/llvm/lib/Support/CrashRecoveryContext.cpp b/llvm/lib/Support/CrashRecoveryContext.cpp index e96a9b59d834..f53aea177d61 100644 --- a/llvm/lib/Support/CrashRecoveryContext.cpp +++ b/llvm/lib/Support/CrashRecoveryContext.cpp @@ -431,7 +431,10 @@ bool CrashRecoveryContext::RunSafely(function_ref<void()> Fn) { [[noreturn]] void CrashRecoveryContext::HandleExit(int RetCode) { #if defined(_WIN32) - // SEH and VEH + // Since the exception code is actually of NTSTATUS type, we use the + // Microsoft-recommended 0xE prefix, to signify that this is a user error. + // This value is a combination of the customer field (bit 29) and severity + // field (bits 30-31) in the NTSTATUS specification. ::RaiseException(0xE0000000 | RetCode, 0, 0, NULL); #else // On Unix we don't need to raise an exception, we go directly to @@ -445,10 +448,10 @@ bool CrashRecoveryContext::RunSafely(function_ref<void()> Fn) { bool CrashRecoveryContext::isCrash(int RetCode) { #if defined(_WIN32) - // On Windows, the high bits are reserved for kernel return codes. Values - // starting with 0x80000000 are reserved for "warnings"; values of 0xC0000000 - // and up are for "errors". In practice, both are interpreted as a - // non-continuable signal. + // On Windows, the code is interpreted as NTSTATUS. The two high bits + // represent the severity. Values starting with 0x80000000 are reserved for + // "warnings"; values of 0xC0000000 and up are for "errors". In practice, both + // are interpreted as a non-continuable signal. unsigned Code = ((unsigned)RetCode & 0xF0000000) >> 28; if (Code != 0xC && Code != 8) return false; diff --git a/llvm/lib/Support/DataExtractor.cpp b/llvm/lib/Support/DataExtractor.cpp index 8cf312191153..59a44f4071b5 100644 --- a/llvm/lib/Support/DataExtractor.cpp +++ b/llvm/lib/Support/DataExtractor.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/DataExtractor.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/Support/Errc.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/LEB128.h" diff --git a/llvm/lib/Support/DebugOptions.h b/llvm/lib/Support/DebugOptions.h index 75e557d7d8d7..db727d5a584c 100644 --- a/llvm/lib/Support/DebugOptions.h +++ b/llvm/lib/Support/DebugOptions.h @@ -11,6 +11,9 @@ // //===----------------------------------------------------------------------===// +#ifndef LLVM_SUPPORT_DEBUGOPTIONS_H +#define LLVM_SUPPORT_DEBUGOPTIONS_H + namespace llvm { // These are invoked internally before parsing command line options. @@ -27,3 +30,5 @@ void initDebugOptions(); void initRandomSeedOptions(); } // namespace llvm + +#endif // LLVM_SUPPORT_DEBUGOPTIONS_H diff --git a/llvm/lib/Support/DivisionByConstantInfo.cpp b/llvm/lib/Support/DivisionByConstantInfo.cpp index e7072d94e49c..8150bd83c79f 100644 --- a/llvm/lib/Support/DivisionByConstantInfo.cpp +++ b/llvm/lib/Support/DivisionByConstantInfo.cpp @@ -132,7 +132,7 @@ UnsignedDivisionByConstantInfo::get(const APInt &D, unsigned LeadingZeros, (Q1.ult(Delta) || (Q1 == Delta && R1.isZero()))); if (Retval.IsAdd && !D[0] && AllowEvenDivisorOptimization) { - unsigned PreShift = D.countTrailingZeros(); + unsigned PreShift = D.countr_zero(); APInt ShiftedD = D.lshr(PreShift); Retval = UnsignedDivisionByConstantInfo::get(ShiftedD, LeadingZeros + PreShift); diff --git a/llvm/lib/Support/ELFAttributeParser.cpp b/llvm/lib/Support/ELFAttributeParser.cpp index a5a0676b1077..2e90b70dc83f 100644 --- a/llvm/lib/Support/ELFAttributeParser.cpp +++ b/llvm/lib/Support/ELFAttributeParser.cpp @@ -127,10 +127,14 @@ Error ELFAttributeParser::parseSubsection(uint32_t length) { sw->printString("Vendor", vendorName); } - // Ignore unrecognized vendor-name. - if (vendorName.lower() != vendor) - return createStringError(errc::invalid_argument, - "unrecognized vendor-name: " + vendorName); + // Handle a subsection with an unrecognized vendor-name by skipping + // over it to the next subsection. ADDENDA32 in the Arm ABI defines + // that vendor attribute sections must not affect compatibility, so + // this should always be safe. + if (vendorName.lower() != vendor) { + cursor.seek(end); + return Error::success(); + } while (cursor.tell() < end) { /// Tag_File | Tag_Section | Tag_Symbol uleb128:byte-size diff --git a/llvm/lib/Support/Errno.cpp b/llvm/lib/Support/Errno.cpp index 7f665be8db6c..60a7e536b6c5 100644 --- a/llvm/lib/Support/Errno.cpp +++ b/llvm/lib/Support/Errno.cpp @@ -55,17 +55,11 @@ std::string StrError(int errnum) { #elif HAVE_DECL_STRERROR_S // "Windows Secure API" strerror_s(buffer, MaxErrStrLen - 1, errnum); str = buffer; -#elif defined(HAVE_STRERROR) +#else // Copy the thread un-safe result of strerror into // the buffer as fast as possible to minimize impact // of collision of strerror in multiple threads. str = strerror(errnum); -#else - // Strange that this system doesn't even have strerror - // but, oh well, just use a generic message - raw_string_ostream stream(str); - stream << "Error #" << errnum; - stream.flush(); #endif return str; } diff --git a/llvm/lib/Support/Error.cpp b/llvm/lib/Support/Error.cpp index fbe86f2b59e1..21d591530b41 100644 --- a/llvm/lib/Support/Error.cpp +++ b/llvm/lib/Support/Error.cpp @@ -7,6 +7,8 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/Error.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/ErrorHandling.h" #include <system_error> @@ -70,6 +72,15 @@ void logAllUnhandledErrors(Error E, raw_ostream &OS, Twine ErrorBanner) { }); } +/// Write all error messages (if any) in E to a string. The newline character +/// is used to separate error messages. +std::string toString(Error E) { + SmallVector<std::string, 2> Errors; + handleAllErrors(std::move(E), [&Errors](const ErrorInfoBase &EI) { + Errors.push_back(EI.message()); + }); + return join(Errors.begin(), Errors.end(), "\n"); +} std::error_code ErrorList::convertToErrorCode() const { return std::error_code(static_cast<int>(ErrorErrorCode::MultipleErrors), @@ -149,7 +160,7 @@ void report_fatal_error(Error Err, bool GenCrashDiag) { raw_string_ostream ErrStream(ErrMsg); logAllUnhandledErrors(std::move(Err), ErrStream); } - report_fatal_error(Twine(ErrMsg)); + report_fatal_error(Twine(ErrMsg), GenCrashDiag); } } // end namespace llvm diff --git a/llvm/lib/Support/FileUtilities.cpp b/llvm/lib/Support/FileUtilities.cpp index d01a41a46489..dbd6c324cf4d 100644 --- a/llvm/lib/Support/FileUtilities.cpp +++ b/llvm/lib/Support/FileUtilities.cpp @@ -169,7 +169,7 @@ static bool CompareNumbers(const char *&F1P, const char *&F2P, /// DiffFilesWithTolerance - Compare the two files specified, returning 0 if the /// files match, 1 if they are different, and 2 if there is a file error. This -/// function differs from DiffFiles in that you can specify an absolete and +/// function differs from DiffFiles in that you can specify an absolute and /// relative FP error that is allowed to exist. If you specify a string to fill /// in for the error option, it will set the string to an error message if an /// error occurs, allowing the caller to distinguish between a failed diff and a @@ -267,64 +267,6 @@ int llvm::DiffFilesWithTolerance(StringRef NameA, return CompareFailed; } -void llvm::AtomicFileWriteError::log(raw_ostream &OS) const { - OS << "atomic_write_error: "; - switch (Error) { - case atomic_write_error::failed_to_create_uniq_file: - OS << "failed_to_create_uniq_file"; - return; - case atomic_write_error::output_stream_error: - OS << "output_stream_error"; - return; - case atomic_write_error::failed_to_rename_temp_file: - OS << "failed_to_rename_temp_file"; - return; - } - llvm_unreachable("unknown atomic_write_error value in " - "failed_to_rename_temp_file::log()"); -} - -llvm::Error llvm::writeFileAtomically(StringRef TempPathModel, - StringRef FinalPath, StringRef Buffer) { - return writeFileAtomically(TempPathModel, FinalPath, - [&Buffer](llvm::raw_ostream &OS) { - OS.write(Buffer.data(), Buffer.size()); - return llvm::Error::success(); - }); -} - -llvm::Error llvm::writeFileAtomically( - StringRef TempPathModel, StringRef FinalPath, - std::function<llvm::Error(llvm::raw_ostream &)> Writer) { - SmallString<128> GeneratedUniqPath; - int TempFD; - if (sys::fs::createUniqueFile(TempPathModel, TempFD, GeneratedUniqPath)) { - return llvm::make_error<AtomicFileWriteError>( - atomic_write_error::failed_to_create_uniq_file); - } - llvm::FileRemover RemoveTmpFileOnFail(GeneratedUniqPath); - - raw_fd_ostream OS(TempFD, /*shouldClose=*/true); - if (llvm::Error Err = Writer(OS)) { - return Err; - } - - OS.close(); - if (OS.has_error()) { - OS.clear_error(); - return llvm::make_error<AtomicFileWriteError>( - atomic_write_error::output_stream_error); - } - - if (sys::fs::rename(/*from=*/GeneratedUniqPath, /*to=*/FinalPath)) { - return llvm::make_error<AtomicFileWriteError>( - atomic_write_error::failed_to_rename_temp_file); - } - - RemoveTmpFileOnFail.releaseFile(); - return Error::success(); -} - Expected<FilePermissionsApplier> FilePermissionsApplier::create(StringRef InputFilename) { sys::fs::file_status Status; @@ -389,5 +331,3 @@ Error FilePermissionsApplier::apply( return Error::success(); } - -char llvm::AtomicFileWriteError::ID; diff --git a/llvm/lib/Support/FloatingPointMode.cpp b/llvm/lib/Support/FloatingPointMode.cpp new file mode 100644 index 000000000000..9543884ff46e --- /dev/null +++ b/llvm/lib/Support/FloatingPointMode.cpp @@ -0,0 +1,95 @@ +//===- FloatingPointMode.cpp ------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/FloatingPointMode.h" +#include "llvm/ADT/StringExtras.h" + +using namespace llvm; + +FPClassTest llvm::fneg(FPClassTest Mask) { + FPClassTest NewMask = Mask & fcNan; + if (Mask & fcNegInf) + NewMask |= fcPosInf; + if (Mask & fcNegNormal) + NewMask |= fcPosNormal; + if (Mask & fcNegSubnormal) + NewMask |= fcPosSubnormal; + if (Mask & fcNegZero) + NewMask |= fcPosZero; + if (Mask & fcPosZero) + NewMask |= fcNegZero; + if (Mask & fcPosSubnormal) + NewMask |= fcNegSubnormal; + if (Mask & fcPosNormal) + NewMask |= fcNegNormal; + if (Mask & fcPosInf) + NewMask |= fcNegInf; + return NewMask; +} + +FPClassTest llvm::fabs(FPClassTest Mask) { + FPClassTest NewMask = Mask & fcNan; + if (Mask & fcPosZero) + NewMask |= fcZero; + if (Mask & fcPosSubnormal) + NewMask |= fcSubnormal; + if (Mask & fcPosNormal) + NewMask |= fcNormal; + if (Mask & fcPosInf) + NewMask |= fcInf; + return NewMask; +} + +// Every bitfield has a unique name and one or more aliasing names that cover +// multiple bits. Names should be listed in order of preference, with higher +// popcounts listed first. +// +// Bits are consumed as printed. Each field should only be represented in one +// printed field. +static constexpr std::pair<FPClassTest, StringLiteral> NoFPClassName[] = { + {fcAllFlags, "all"}, + {fcNan, "nan"}, + {fcSNan, "snan"}, + {fcQNan, "qnan"}, + {fcInf, "inf"}, + {fcNegInf, "ninf"}, + {fcPosInf, "pinf"}, + {fcZero, "zero"}, + {fcNegZero, "nzero"}, + {fcPosZero, "pzero"}, + {fcSubnormal, "sub"}, + {fcNegSubnormal, "nsub"}, + {fcPosSubnormal, "psub"}, + {fcNormal, "norm"}, + {fcNegNormal, "nnorm"}, + {fcPosNormal, "pnorm"} +}; + +raw_ostream &llvm::operator<<(raw_ostream &OS, FPClassTest Mask) { + OS << '('; + + if (Mask == fcNone) { + OS << "none)"; + return OS; + } + + ListSeparator LS(" "); + for (auto [BitTest, Name] : NoFPClassName) { + if ((Mask & BitTest) == BitTest) { + OS << LS << Name; + + // Clear the bits so we don't print any aliased names later. + Mask &= ~BitTest; + } + } + + assert(Mask == 0 && "didn't print some mask bits"); + + OS << ')'; + return OS; +} diff --git a/llvm/lib/Support/FoldingSet.cpp b/llvm/lib/Support/FoldingSet.cpp index ece31b971c1c..419bf6740768 100644 --- a/llvm/lib/Support/FoldingSet.cpp +++ b/llvm/lib/Support/FoldingSet.cpp @@ -269,7 +269,7 @@ void FoldingSetBase::reserve(unsigned EltCount, const FoldingSetInfo &Info) { // range of 1.0 - 2.0. if(EltCount < capacity()) return; - GrowBucketCount(PowerOf2Floor(EltCount), Info); + GrowBucketCount(llvm::bit_floor(EltCount), Info); } /// FindNodeOrInsertPos - Look up the node specified by ID. If it exists, diff --git a/llvm/lib/Support/ItaniumManglingCanonicalizer.cpp b/llvm/lib/Support/ItaniumManglingCanonicalizer.cpp deleted file mode 100644 index d95d84f7837e..000000000000 --- a/llvm/lib/Support/ItaniumManglingCanonicalizer.cpp +++ /dev/null @@ -1,307 +0,0 @@ -//===----------------- ItaniumManglingCanonicalizer.cpp -------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "llvm/Support/ItaniumManglingCanonicalizer.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/FoldingSet.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/Demangle/ItaniumDemangle.h" -#include "llvm/Support/Allocator.h" - -using namespace llvm; -using llvm::itanium_demangle::ForwardTemplateReference; -using llvm::itanium_demangle::Node; -using llvm::itanium_demangle::NodeKind; -using llvm::itanium_demangle::StringView; - -namespace { -struct FoldingSetNodeIDBuilder { - llvm::FoldingSetNodeID &ID; - void operator()(const Node *P) { ID.AddPointer(P); } - void operator()(StringView Str) { - ID.AddString(llvm::StringRef(Str.begin(), Str.size())); - } - template <typename T> - std::enable_if_t<std::is_integral_v<T> || std::is_enum_v<T>> operator()(T V) { - ID.AddInteger((unsigned long long)V); - } - void operator()(itanium_demangle::NodeArray A) { - ID.AddInteger(A.size()); - for (const Node *N : A) - (*this)(N); - } -}; - -template<typename ...T> -void profileCtor(llvm::FoldingSetNodeID &ID, Node::Kind K, T ...V) { - FoldingSetNodeIDBuilder Builder = {ID}; - Builder(K); - int VisitInOrder[] = { - (Builder(V), 0) ..., - 0 // Avoid empty array if there are no arguments. - }; - (void)VisitInOrder; -} - -// FIXME: Convert this to a generic lambda when possible. -template<typename NodeT> struct ProfileSpecificNode { - FoldingSetNodeID &ID; - template<typename ...T> void operator()(T ...V) { - profileCtor(ID, NodeKind<NodeT>::Kind, V...); - } -}; - -struct ProfileNode { - FoldingSetNodeID &ID; - template<typename NodeT> void operator()(const NodeT *N) { - N->match(ProfileSpecificNode<NodeT>{ID}); - } -}; - -template<> void ProfileNode::operator()(const ForwardTemplateReference *N) { - llvm_unreachable("should never canonicalize a ForwardTemplateReference"); -} - -void profileNode(llvm::FoldingSetNodeID &ID, const Node *N) { - N->visit(ProfileNode{ID}); -} - -class FoldingNodeAllocator { - class alignas(alignof(Node *)) NodeHeader : public llvm::FoldingSetNode { - public: - // 'Node' in this context names the injected-class-name of the base class. - itanium_demangle::Node *getNode() { - return reinterpret_cast<itanium_demangle::Node *>(this + 1); - } - void Profile(llvm::FoldingSetNodeID &ID) { profileNode(ID, getNode()); } - }; - - BumpPtrAllocator RawAlloc; - llvm::FoldingSet<NodeHeader> Nodes; - -public: - void reset() {} - - template <typename T, typename... Args> - std::pair<Node *, bool> getOrCreateNode(bool CreateNewNodes, Args &&... As) { - // FIXME: Don't canonicalize forward template references for now, because - // they contain state (the resolved template node) that's not known at their - // point of creation. - if (std::is_same<T, ForwardTemplateReference>::value) { - // Note that we don't use if-constexpr here and so we must still write - // this code in a generic form. - return {new (RawAlloc.Allocate(sizeof(T), alignof(T))) - T(std::forward<Args>(As)...), - true}; - } - - llvm::FoldingSetNodeID ID; - profileCtor(ID, NodeKind<T>::Kind, As...); - - void *InsertPos; - if (NodeHeader *Existing = Nodes.FindNodeOrInsertPos(ID, InsertPos)) - return {static_cast<T*>(Existing->getNode()), false}; - - if (!CreateNewNodes) - return {nullptr, true}; - - static_assert(alignof(T) <= alignof(NodeHeader), - "underaligned node header for specific node kind"); - void *Storage = - RawAlloc.Allocate(sizeof(NodeHeader) + sizeof(T), alignof(NodeHeader)); - NodeHeader *New = new (Storage) NodeHeader; - T *Result = new (New->getNode()) T(std::forward<Args>(As)...); - Nodes.InsertNode(New, InsertPos); - return {Result, true}; - } - - template<typename T, typename... Args> - Node *makeNode(Args &&...As) { - return getOrCreateNode<T>(true, std::forward<Args>(As)...).first; - } - - void *allocateNodeArray(size_t sz) { - return RawAlloc.Allocate(sizeof(Node *) * sz, alignof(Node *)); - } -}; - -class CanonicalizerAllocator : public FoldingNodeAllocator { - Node *MostRecentlyCreated = nullptr; - Node *TrackedNode = nullptr; - bool TrackedNodeIsUsed = false; - bool CreateNewNodes = true; - llvm::SmallDenseMap<Node*, Node*, 32> Remappings; - - template<typename T, typename ...Args> Node *makeNodeSimple(Args &&...As) { - std::pair<Node *, bool> Result = - getOrCreateNode<T>(CreateNewNodes, std::forward<Args>(As)...); - if (Result.second) { - // Node is new. Make a note of that. - MostRecentlyCreated = Result.first; - } else if (Result.first) { - // Node is pre-existing; check if it's in our remapping table. - if (auto *N = Remappings.lookup(Result.first)) { - Result.first = N; - assert(Remappings.find(Result.first) == Remappings.end() && - "should never need multiple remap steps"); - } - if (Result.first == TrackedNode) - TrackedNodeIsUsed = true; - } - return Result.first; - } - - /// Helper to allow makeNode to be partially-specialized on T. - template<typename T> struct MakeNodeImpl { - CanonicalizerAllocator &Self; - template<typename ...Args> Node *make(Args &&...As) { - return Self.makeNodeSimple<T>(std::forward<Args>(As)...); - } - }; - -public: - template<typename T, typename ...Args> Node *makeNode(Args &&...As) { - return MakeNodeImpl<T>{*this}.make(std::forward<Args>(As)...); - } - - void reset() { MostRecentlyCreated = nullptr; } - - void setCreateNewNodes(bool CNN) { CreateNewNodes = CNN; } - - void addRemapping(Node *A, Node *B) { - // Note, we don't need to check whether B is also remapped, because if it - // was we would have already remapped it when building it. - Remappings.insert(std::make_pair(A, B)); - } - - bool isMostRecentlyCreated(Node *N) const { return MostRecentlyCreated == N; } - - void trackUsesOf(Node *N) { - TrackedNode = N; - TrackedNodeIsUsed = false; - } - bool trackedNodeIsUsed() const { return TrackedNodeIsUsed; } -}; - -// FIXME: Also expand built-in substitutions? - -using CanonicalizingDemangler = - itanium_demangle::ManglingParser<CanonicalizerAllocator>; -} // namespace - -struct ItaniumManglingCanonicalizer::Impl { - CanonicalizingDemangler Demangler = {nullptr, nullptr}; -}; - -ItaniumManglingCanonicalizer::ItaniumManglingCanonicalizer() : P(new Impl) {} -ItaniumManglingCanonicalizer::~ItaniumManglingCanonicalizer() { delete P; } - -ItaniumManglingCanonicalizer::EquivalenceError -ItaniumManglingCanonicalizer::addEquivalence(FragmentKind Kind, StringRef First, - StringRef Second) { - auto &Alloc = P->Demangler.ASTAllocator; - Alloc.setCreateNewNodes(true); - - auto Parse = [&](StringRef Str) { - P->Demangler.reset(Str.begin(), Str.end()); - Node *N = nullptr; - switch (Kind) { - // A <name>, with minor extensions to allow arbitrary namespace and - // template names that can't easily be written as <name>s. - case FragmentKind::Name: - // Very special case: allow "St" as a shorthand for "3std". It's not - // valid as a <name> mangling, but is nonetheless the most natural - // way to name the 'std' namespace. - if (Str.size() == 2 && P->Demangler.consumeIf("St")) - N = P->Demangler.make<itanium_demangle::NameType>("std"); - // We permit substitutions to name templates without their template - // arguments. This mostly just falls out, as almost all template names - // are valid as <name>s, but we also want to parse <substitution>s as - // <name>s, even though they're not. - else if (Str.startswith("S")) - // Parse the substitution and optional following template arguments. - N = P->Demangler.parseType(); - else - N = P->Demangler.parseName(); - break; - - // A <type>. - case FragmentKind::Type: - N = P->Demangler.parseType(); - break; - - // An <encoding>. - case FragmentKind::Encoding: - N = P->Demangler.parseEncoding(); - break; - } - - // If we have trailing junk, the mangling is invalid. - if (P->Demangler.numLeft() != 0) - N = nullptr; - - // If any node was created after N, then we cannot safely remap it because - // it might already be in use by another node. - return std::make_pair(N, Alloc.isMostRecentlyCreated(N)); - }; - - Node *FirstNode, *SecondNode; - bool FirstIsNew, SecondIsNew; - - std::tie(FirstNode, FirstIsNew) = Parse(First); - if (!FirstNode) - return EquivalenceError::InvalidFirstMangling; - - Alloc.trackUsesOf(FirstNode); - std::tie(SecondNode, SecondIsNew) = Parse(Second); - if (!SecondNode) - return EquivalenceError::InvalidSecondMangling; - - // If they're already equivalent, there's nothing to do. - if (FirstNode == SecondNode) - return EquivalenceError::Success; - - if (FirstIsNew && !Alloc.trackedNodeIsUsed()) - Alloc.addRemapping(FirstNode, SecondNode); - else if (SecondIsNew) - Alloc.addRemapping(SecondNode, FirstNode); - else - return EquivalenceError::ManglingAlreadyUsed; - - return EquivalenceError::Success; -} - -static ItaniumManglingCanonicalizer::Key -parseMaybeMangledName(CanonicalizingDemangler &Demangler, StringRef Mangling, - bool CreateNewNodes) { - Demangler.ASTAllocator.setCreateNewNodes(CreateNewNodes); - Demangler.reset(Mangling.begin(), Mangling.end()); - // Attempt demangling only for names that look like C++ mangled names. - // Otherwise, treat them as extern "C" names. We permit the latter to - // be remapped by (eg) - // encoding 6memcpy 7memmove - // consistent with how they are encoded as local-names inside a C++ mangling. - Node *N; - if (Mangling.startswith("_Z") || Mangling.startswith("__Z") || - Mangling.startswith("___Z") || Mangling.startswith("____Z")) - N = Demangler.parse(); - else - N = Demangler.make<itanium_demangle::NameType>( - StringView(Mangling.data(), Mangling.size())); - return reinterpret_cast<ItaniumManglingCanonicalizer::Key>(N); -} - -ItaniumManglingCanonicalizer::Key -ItaniumManglingCanonicalizer::canonicalize(StringRef Mangling) { - return parseMaybeMangledName(P->Demangler, Mangling, true); -} - -ItaniumManglingCanonicalizer::Key -ItaniumManglingCanonicalizer::lookup(StringRef Mangling) { - return parseMaybeMangledName(P->Demangler, Mangling, false); -} diff --git a/llvm/lib/Support/JSON.cpp b/llvm/lib/Support/JSON.cpp index 0e7f7bf1d999..c672a43b033e 100644 --- a/llvm/lib/Support/JSON.cpp +++ b/llvm/lib/Support/JSON.cpp @@ -8,12 +8,14 @@ #include "llvm/Support/JSON.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/Support/ConvertUTF.h" #include "llvm/Support/Error.h" #include "llvm/Support/Format.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/NativeFormatting.h" +#include "llvm/Support/raw_ostream.h" #include <cctype> +#include <cerrno> #include <optional> namespace llvm { diff --git a/llvm/lib/Support/KnownBits.cpp b/llvm/lib/Support/KnownBits.cpp index 745c46fb6ffb..097c22d33dd1 100644 --- a/llvm/lib/Support/KnownBits.cpp +++ b/llvm/lib/Support/KnownBits.cpp @@ -105,7 +105,7 @@ KnownBits KnownBits::sextInReg(unsigned SrcBitWidth) const { KnownBits KnownBits::makeGE(const APInt &Val) const { // Count the number of leading bit positions where our underlying value is // known to be less than or equal to Val. - unsigned N = (Zero | Val).countLeadingOnes(); + unsigned N = (Zero | Val).countl_one(); // For each of those bit positions, if Val has a 1 in that bit then our // underlying value must also have a 1. @@ -129,7 +129,7 @@ KnownBits KnownBits::umax(const KnownBits &LHS, const KnownBits &RHS) { // are common to these two values are also known in the result. KnownBits L = LHS.makeGE(RHS.getMinValue()); KnownBits R = RHS.makeGE(LHS.getMinValue()); - return KnownBits::commonBits(L, R); + return L.intersectWith(R); } KnownBits KnownBits::umin(const KnownBits &LHS, const KnownBits &RHS) { @@ -164,169 +164,189 @@ KnownBits KnownBits::smin(const KnownBits &LHS, const KnownBits &RHS) { return Flip(umax(Flip(LHS), Flip(RHS))); } -KnownBits KnownBits::shl(const KnownBits &LHS, const KnownBits &RHS) { +static unsigned getMaxShiftAmount(const APInt &MaxValue, unsigned BitWidth) { + if (isPowerOf2_32(BitWidth)) + return MaxValue.extractBitsAsZExtValue(Log2_32(BitWidth), 0); + // This is only an approximate upper bound. + return MaxValue.getLimitedValue(BitWidth - 1); +} + +KnownBits KnownBits::shl(const KnownBits &LHS, const KnownBits &RHS, bool NUW, + bool NSW, bool ShAmtNonZero) { unsigned BitWidth = LHS.getBitWidth(); - KnownBits Known(BitWidth); + auto ShiftByConst = [&](const KnownBits &LHS, unsigned ShiftAmt) { + KnownBits Known; + bool ShiftedOutZero, ShiftedOutOne; + Known.Zero = LHS.Zero.ushl_ov(ShiftAmt, ShiftedOutZero); + Known.Zero.setLowBits(ShiftAmt); + Known.One = LHS.One.ushl_ov(ShiftAmt, ShiftedOutOne); + + // All cases returning poison have been handled by MaxShiftAmount already. + if (NSW) { + if (NUW && ShiftAmt != 0) + // NUW means we can assume anything shifted out was a zero. + ShiftedOutZero = true; + + if (ShiftedOutZero) + Known.makeNonNegative(); + else if (ShiftedOutOne) + Known.makeNegative(); + } + return Known; + }; - // If the shift amount is a valid constant then transform LHS directly. - if (RHS.isConstant() && RHS.getConstant().ult(BitWidth)) { - unsigned Shift = RHS.getConstant().getZExtValue(); - Known = LHS; - Known.Zero <<= Shift; - Known.One <<= Shift; - // Low bits are known zero. - Known.Zero.setLowBits(Shift); + // Fast path for a common case when LHS is completely unknown. + KnownBits Known(BitWidth); + unsigned MinShiftAmount = RHS.getMinValue().getLimitedValue(BitWidth); + if (MinShiftAmount == 0 && ShAmtNonZero) + MinShiftAmount = 1; + if (LHS.isUnknown()) { + Known.Zero.setLowBits(MinShiftAmount); + if (NUW && NSW && MinShiftAmount != 0) + Known.makeNonNegative(); return Known; } - // No matter the shift amount, the trailing zeros will stay zero. - unsigned MinTrailingZeros = LHS.countMinTrailingZeros(); - - // Minimum shift amount low bits are known zero. - APInt MinShiftAmount = RHS.getMinValue(); - if (MinShiftAmount.ult(BitWidth)) { - MinTrailingZeros += MinShiftAmount.getZExtValue(); - MinTrailingZeros = std::min(MinTrailingZeros, BitWidth); + // Determine maximum shift amount, taking NUW/NSW flags into account. + APInt MaxValue = RHS.getMaxValue(); + unsigned MaxShiftAmount = getMaxShiftAmount(MaxValue, BitWidth); + if (NUW && NSW) + MaxShiftAmount = std::min(MaxShiftAmount, LHS.countMaxLeadingZeros() - 1); + if (NUW) + MaxShiftAmount = std::min(MaxShiftAmount, LHS.countMaxLeadingZeros()); + if (NSW) + MaxShiftAmount = std::min( + MaxShiftAmount, + std::max(LHS.countMaxLeadingZeros(), LHS.countMaxLeadingOnes()) - 1); + + // Fast path for common case where the shift amount is unknown. + if (MinShiftAmount == 0 && MaxShiftAmount == BitWidth - 1 && + isPowerOf2_32(BitWidth)) { + Known.Zero.setLowBits(LHS.countMinTrailingZeros()); + if (LHS.isAllOnes()) + Known.One.setSignBit(); + if (NSW) { + if (LHS.isNonNegative()) + Known.makeNonNegative(); + if (LHS.isNegative()) + Known.makeNegative(); + } + return Known; } - // If the maximum shift is in range, then find the common bits from all - // possible shifts. - APInt MaxShiftAmount = RHS.getMaxValue(); - if (MaxShiftAmount.ult(BitWidth) && !LHS.isUnknown()) { - uint64_t ShiftAmtZeroMask = (~RHS.Zero).getZExtValue(); - uint64_t ShiftAmtOneMask = RHS.One.getZExtValue(); - assert(MinShiftAmount.ult(MaxShiftAmount) && "Illegal shift range"); - Known.Zero.setAllBits(); - Known.One.setAllBits(); - for (uint64_t ShiftAmt = MinShiftAmount.getZExtValue(), - MaxShiftAmt = MaxShiftAmount.getZExtValue(); - ShiftAmt <= MaxShiftAmt; ++ShiftAmt) { - // Skip if the shift amount is impossible. - if ((ShiftAmtZeroMask & ShiftAmt) != ShiftAmt || - (ShiftAmtOneMask | ShiftAmt) != ShiftAmt) - continue; - KnownBits SpecificShift; - SpecificShift.Zero = LHS.Zero << ShiftAmt; - SpecificShift.One = LHS.One << ShiftAmt; - Known = KnownBits::commonBits(Known, SpecificShift); - if (Known.isUnknown()) - break; - } + // Find the common bits from all possible shifts. + unsigned ShiftAmtZeroMask = RHS.Zero.zextOrTrunc(32).getZExtValue(); + unsigned ShiftAmtOneMask = RHS.One.zextOrTrunc(32).getZExtValue(); + Known.Zero.setAllBits(); + Known.One.setAllBits(); + for (unsigned ShiftAmt = MinShiftAmount; ShiftAmt <= MaxShiftAmount; + ++ShiftAmt) { + // Skip if the shift amount is impossible. + if ((ShiftAmtZeroMask & ShiftAmt) != 0 || + (ShiftAmtOneMask | ShiftAmt) != ShiftAmt) + continue; + Known = Known.intersectWith(ShiftByConst(LHS, ShiftAmt)); + if (Known.isUnknown()) + break; } - Known.Zero.setLowBits(MinTrailingZeros); + // All shift amounts may result in poison. + if (Known.hasConflict()) + Known.setAllZero(); return Known; } -KnownBits KnownBits::lshr(const KnownBits &LHS, const KnownBits &RHS) { +KnownBits KnownBits::lshr(const KnownBits &LHS, const KnownBits &RHS, + bool ShAmtNonZero) { unsigned BitWidth = LHS.getBitWidth(); - KnownBits Known(BitWidth); - - if (RHS.isConstant() && RHS.getConstant().ult(BitWidth)) { - unsigned Shift = RHS.getConstant().getZExtValue(); - Known = LHS; - Known.Zero.lshrInPlace(Shift); - Known.One.lshrInPlace(Shift); + auto ShiftByConst = [&](const KnownBits &LHS, unsigned ShiftAmt) { + KnownBits Known = LHS; + Known.Zero.lshrInPlace(ShiftAmt); + Known.One.lshrInPlace(ShiftAmt); // High bits are known zero. - Known.Zero.setHighBits(Shift); + Known.Zero.setHighBits(ShiftAmt); return Known; - } - - // No matter the shift amount, the leading zeros will stay zero. - unsigned MinLeadingZeros = LHS.countMinLeadingZeros(); + }; - // Minimum shift amount high bits are known zero. - APInt MinShiftAmount = RHS.getMinValue(); - if (MinShiftAmount.ult(BitWidth)) { - MinLeadingZeros += MinShiftAmount.getZExtValue(); - MinLeadingZeros = std::min(MinLeadingZeros, BitWidth); + // Fast path for a common case when LHS is completely unknown. + KnownBits Known(BitWidth); + unsigned MinShiftAmount = RHS.getMinValue().getLimitedValue(BitWidth); + if (MinShiftAmount == 0 && ShAmtNonZero) + MinShiftAmount = 1; + if (LHS.isUnknown()) { + Known.Zero.setHighBits(MinShiftAmount); + return Known; } - // If the maximum shift is in range, then find the common bits from all - // possible shifts. - APInt MaxShiftAmount = RHS.getMaxValue(); - if (MaxShiftAmount.ult(BitWidth) && !LHS.isUnknown()) { - uint64_t ShiftAmtZeroMask = (~RHS.Zero).getZExtValue(); - uint64_t ShiftAmtOneMask = RHS.One.getZExtValue(); - assert(MinShiftAmount.ult(MaxShiftAmount) && "Illegal shift range"); - Known.Zero.setAllBits(); - Known.One.setAllBits(); - for (uint64_t ShiftAmt = MinShiftAmount.getZExtValue(), - MaxShiftAmt = MaxShiftAmount.getZExtValue(); - ShiftAmt <= MaxShiftAmt; ++ShiftAmt) { - // Skip if the shift amount is impossible. - if ((ShiftAmtZeroMask & ShiftAmt) != ShiftAmt || - (ShiftAmtOneMask | ShiftAmt) != ShiftAmt) - continue; - KnownBits SpecificShift = LHS; - SpecificShift.Zero.lshrInPlace(ShiftAmt); - SpecificShift.One.lshrInPlace(ShiftAmt); - Known = KnownBits::commonBits(Known, SpecificShift); - if (Known.isUnknown()) - break; - } + // Find the common bits from all possible shifts. + APInt MaxValue = RHS.getMaxValue(); + unsigned MaxShiftAmount = getMaxShiftAmount(MaxValue, BitWidth); + unsigned ShiftAmtZeroMask = RHS.Zero.zextOrTrunc(32).getZExtValue(); + unsigned ShiftAmtOneMask = RHS.One.zextOrTrunc(32).getZExtValue(); + Known.Zero.setAllBits(); + Known.One.setAllBits(); + for (unsigned ShiftAmt = MinShiftAmount; ShiftAmt <= MaxShiftAmount; + ++ShiftAmt) { + // Skip if the shift amount is impossible. + if ((ShiftAmtZeroMask & ShiftAmt) != 0 || + (ShiftAmtOneMask | ShiftAmt) != ShiftAmt) + continue; + Known = Known.intersectWith(ShiftByConst(LHS, ShiftAmt)); + if (Known.isUnknown()) + break; } - Known.Zero.setHighBits(MinLeadingZeros); + // All shift amounts may result in poison. + if (Known.hasConflict()) + Known.setAllZero(); return Known; } -KnownBits KnownBits::ashr(const KnownBits &LHS, const KnownBits &RHS) { +KnownBits KnownBits::ashr(const KnownBits &LHS, const KnownBits &RHS, + bool ShAmtNonZero) { unsigned BitWidth = LHS.getBitWidth(); - KnownBits Known(BitWidth); - - if (RHS.isConstant() && RHS.getConstant().ult(BitWidth)) { - unsigned Shift = RHS.getConstant().getZExtValue(); - Known = LHS; - Known.Zero.ashrInPlace(Shift); - Known.One.ashrInPlace(Shift); + auto ShiftByConst = [&](const KnownBits &LHS, unsigned ShiftAmt) { + KnownBits Known = LHS; + Known.Zero.ashrInPlace(ShiftAmt); + Known.One.ashrInPlace(ShiftAmt); return Known; - } - - // No matter the shift amount, the leading sign bits will stay. - unsigned MinLeadingZeros = LHS.countMinLeadingZeros(); - unsigned MinLeadingOnes = LHS.countMinLeadingOnes(); + }; - // Minimum shift amount high bits are known sign bits. - APInt MinShiftAmount = RHS.getMinValue(); - if (MinShiftAmount.ult(BitWidth)) { - if (MinLeadingZeros) { - MinLeadingZeros += MinShiftAmount.getZExtValue(); - MinLeadingZeros = std::min(MinLeadingZeros, BitWidth); - } - if (MinLeadingOnes) { - MinLeadingOnes += MinShiftAmount.getZExtValue(); - MinLeadingOnes = std::min(MinLeadingOnes, BitWidth); + // Fast path for a common case when LHS is completely unknown. + KnownBits Known(BitWidth); + unsigned MinShiftAmount = RHS.getMinValue().getLimitedValue(BitWidth); + if (MinShiftAmount == 0 && ShAmtNonZero) + MinShiftAmount = 1; + if (LHS.isUnknown()) { + if (MinShiftAmount == BitWidth) { + // Always poison. Return zero because we don't like returning conflict. + Known.setAllZero(); + return Known; } + return Known; } - // If the maximum shift is in range, then find the common bits from all - // possible shifts. - APInt MaxShiftAmount = RHS.getMaxValue(); - if (MaxShiftAmount.ult(BitWidth) && !LHS.isUnknown()) { - uint64_t ShiftAmtZeroMask = (~RHS.Zero).getZExtValue(); - uint64_t ShiftAmtOneMask = RHS.One.getZExtValue(); - assert(MinShiftAmount.ult(MaxShiftAmount) && "Illegal shift range"); - Known.Zero.setAllBits(); - Known.One.setAllBits(); - for (uint64_t ShiftAmt = MinShiftAmount.getZExtValue(), - MaxShiftAmt = MaxShiftAmount.getZExtValue(); - ShiftAmt <= MaxShiftAmt; ++ShiftAmt) { - // Skip if the shift amount is impossible. - if ((ShiftAmtZeroMask & ShiftAmt) != ShiftAmt || - (ShiftAmtOneMask | ShiftAmt) != ShiftAmt) - continue; - KnownBits SpecificShift = LHS; - SpecificShift.Zero.ashrInPlace(ShiftAmt); - SpecificShift.One.ashrInPlace(ShiftAmt); - Known = KnownBits::commonBits(Known, SpecificShift); - if (Known.isUnknown()) - break; - } + // Find the common bits from all possible shifts. + APInt MaxValue = RHS.getMaxValue(); + unsigned MaxShiftAmount = getMaxShiftAmount(MaxValue, BitWidth); + unsigned ShiftAmtZeroMask = RHS.Zero.zextOrTrunc(32).getZExtValue(); + unsigned ShiftAmtOneMask = RHS.One.zextOrTrunc(32).getZExtValue(); + Known.Zero.setAllBits(); + Known.One.setAllBits(); + for (unsigned ShiftAmt = MinShiftAmount; ShiftAmt <= MaxShiftAmount; + ++ShiftAmt) { + // Skip if the shift amount is impossible. + if ((ShiftAmtZeroMask & ShiftAmt) != 0 || + (ShiftAmtOneMask | ShiftAmt) != ShiftAmt) + continue; + Known = Known.intersectWith(ShiftByConst(LHS, ShiftAmt)); + if (Known.isUnknown()) + break; } - Known.Zero.setHighBits(MinLeadingZeros); - Known.One.setHighBits(MinLeadingOnes); + // All shift amounts may result in poison. + if (Known.hasConflict()) + Known.setAllZero(); return Known; } @@ -399,19 +419,219 @@ KnownBits KnownBits::abs(bool IntMinIsPoison) const { // Absolute value preserves trailing zero count. KnownBits KnownAbs(getBitWidth()); - KnownAbs.Zero.setLowBits(countMinTrailingZeros()); - // We only know that the absolute values's MSB will be zero if INT_MIN is - // poison, or there is a set bit that isn't the sign bit (otherwise it could - // be INT_MIN). - if (IntMinIsPoison || (!One.isZero() && !One.isMinSignedValue())) - KnownAbs.Zero.setSignBit(); + // If the input is negative, then abs(x) == -x. + if (isNegative()) { + KnownBits Tmp = *this; + // Special case for IntMinIsPoison. We know the sign bit is set and we know + // all the rest of the bits except one to be zero. Since we have + // IntMinIsPoison, that final bit MUST be a one, as otherwise the input is + // INT_MIN. + if (IntMinIsPoison && (Zero.popcount() + 2) == getBitWidth()) + Tmp.One.setBit(countMinTrailingZeros()); + + KnownAbs = computeForAddSub( + /*Add*/ false, IntMinIsPoison, + KnownBits::makeConstant(APInt(getBitWidth(), 0)), Tmp); + + // One more special case for IntMinIsPoison. If we don't know any ones other + // than the signbit, we know for certain that all the unknowns can't be + // zero. So if we know high zero bits, but have unknown low bits, we know + // for certain those high-zero bits will end up as one. This is because, + // the low bits can't be all zeros, so the +1 in (~x + 1) cannot carry up + // to the high bits. If we know a known INT_MIN input skip this. The result + // is poison anyways. + if (IntMinIsPoison && Tmp.countMinPopulation() == 1 && + Tmp.countMaxPopulation() != 1) { + Tmp.One.clearSignBit(); + Tmp.Zero.setSignBit(); + KnownAbs.One.setBits(getBitWidth() - Tmp.countMinLeadingZeros(), + getBitWidth() - 1); + } + + } else { + unsigned MaxTZ = countMaxTrailingZeros(); + unsigned MinTZ = countMinTrailingZeros(); + + KnownAbs.Zero.setLowBits(MinTZ); + // If we know the lowest set 1, then preserve it. + if (MaxTZ == MinTZ && MaxTZ < getBitWidth()) + KnownAbs.One.setBit(MaxTZ); + + // We only know that the absolute values's MSB will be zero if INT_MIN is + // poison, or there is a set bit that isn't the sign bit (otherwise it could + // be INT_MIN). + if (IntMinIsPoison || (!One.isZero() && !One.isMinSignedValue())) { + KnownAbs.One.clearSignBit(); + KnownAbs.Zero.setSignBit(); + } + } - // FIXME: Handle known negative input? - // FIXME: Calculate the negated Known bits and combine them? + assert(!KnownAbs.hasConflict() && "Bad Output"); return KnownAbs; } +static KnownBits computeForSatAddSub(bool Add, bool Signed, + const KnownBits &LHS, + const KnownBits &RHS) { + assert(!LHS.hasConflict() && !RHS.hasConflict() && "Bad inputs"); + // We don't see NSW even for sadd/ssub as we want to check if the result has + // signed overflow. + KnownBits Res = KnownBits::computeForAddSub(Add, /*NSW*/ false, LHS, RHS); + unsigned BitWidth = Res.getBitWidth(); + auto SignBitKnown = [&](const KnownBits &K) { + return K.Zero[BitWidth - 1] || K.One[BitWidth - 1]; + }; + std::optional<bool> Overflow; + + if (Signed) { + // If we can actually detect overflow do so. Otherwise leave Overflow as + // nullopt (we assume it may have happened). + if (SignBitKnown(LHS) && SignBitKnown(RHS) && SignBitKnown(Res)) { + if (Add) { + // sadd.sat + Overflow = (LHS.isNonNegative() == RHS.isNonNegative() && + Res.isNonNegative() != LHS.isNonNegative()); + } else { + // ssub.sat + Overflow = (LHS.isNonNegative() != RHS.isNonNegative() && + Res.isNonNegative() != LHS.isNonNegative()); + } + } + } else if (Add) { + // uadd.sat + bool Of; + (void)LHS.getMaxValue().uadd_ov(RHS.getMaxValue(), Of); + if (!Of) { + Overflow = false; + } else { + (void)LHS.getMinValue().uadd_ov(RHS.getMinValue(), Of); + if (Of) + Overflow = true; + } + } else { + // usub.sat + bool Of; + (void)LHS.getMinValue().usub_ov(RHS.getMaxValue(), Of); + if (!Of) { + Overflow = false; + } else { + (void)LHS.getMaxValue().usub_ov(RHS.getMinValue(), Of); + if (Of) + Overflow = true; + } + } + + if (Signed) { + if (Add) { + if (LHS.isNonNegative() && RHS.isNonNegative()) { + // Pos + Pos -> Pos + Res.One.clearSignBit(); + Res.Zero.setSignBit(); + } + if (LHS.isNegative() && RHS.isNegative()) { + // Neg + Neg -> Neg + Res.One.setSignBit(); + Res.Zero.clearSignBit(); + } + } else { + if (LHS.isNegative() && RHS.isNonNegative()) { + // Neg - Pos -> Neg + Res.One.setSignBit(); + Res.Zero.clearSignBit(); + } else if (LHS.isNonNegative() && RHS.isNegative()) { + // Pos - Neg -> Pos + Res.One.clearSignBit(); + Res.Zero.setSignBit(); + } + } + } else { + // Add: Leading ones of either operand are preserved. + // Sub: Leading zeros of LHS and leading ones of RHS are preserved + // as leading zeros in the result. + unsigned LeadingKnown; + if (Add) + LeadingKnown = + std::max(LHS.countMinLeadingOnes(), RHS.countMinLeadingOnes()); + else + LeadingKnown = + std::max(LHS.countMinLeadingZeros(), RHS.countMinLeadingOnes()); + + // We select between the operation result and all-ones/zero + // respectively, so we can preserve known ones/zeros. + APInt Mask = APInt::getHighBitsSet(BitWidth, LeadingKnown); + if (Add) { + Res.One |= Mask; + Res.Zero &= ~Mask; + } else { + Res.Zero |= Mask; + Res.One &= ~Mask; + } + } + + if (Overflow) { + // We know whether or not we overflowed. + if (!(*Overflow)) { + // No overflow. + assert(!Res.hasConflict() && "Bad Output"); + return Res; + } + + // We overflowed + APInt C; + if (Signed) { + // sadd.sat / ssub.sat + assert(SignBitKnown(LHS) && + "We somehow know overflow without knowing input sign"); + C = LHS.isNegative() ? APInt::getSignedMinValue(BitWidth) + : APInt::getSignedMaxValue(BitWidth); + } else if (Add) { + // uadd.sat + C = APInt::getMaxValue(BitWidth); + } else { + // uadd.sat + C = APInt::getMinValue(BitWidth); + } + + Res.One = C; + Res.Zero = ~C; + assert(!Res.hasConflict() && "Bad Output"); + return Res; + } + + // We don't know if we overflowed. + if (Signed) { + // sadd.sat/ssub.sat + // We can keep our information about the sign bits. + Res.Zero.clearLowBits(BitWidth - 1); + Res.One.clearLowBits(BitWidth - 1); + } else if (Add) { + // uadd.sat + // We need to clear all the known zeros as we can only use the leading ones. + Res.Zero.clearAllBits(); + } else { + // usub.sat + // We need to clear all the known ones as we can only use the leading zero. + Res.One.clearAllBits(); + } + + assert(!Res.hasConflict() && "Bad Output"); + return Res; +} + +KnownBits KnownBits::sadd_sat(const KnownBits &LHS, const KnownBits &RHS) { + return computeForSatAddSub(/*Add*/ true, /*Signed*/ true, LHS, RHS); +} +KnownBits KnownBits::ssub_sat(const KnownBits &LHS, const KnownBits &RHS) { + return computeForSatAddSub(/*Add*/ false, /*Signed*/ true, LHS, RHS); +} +KnownBits KnownBits::uadd_sat(const KnownBits &LHS, const KnownBits &RHS) { + return computeForSatAddSub(/*Add*/ true, /*Signed*/ false, LHS, RHS); +} +KnownBits KnownBits::usub_sat(const KnownBits &LHS, const KnownBits &RHS) { + return computeForSatAddSub(/*Add*/ false, /*Signed*/ false, LHS, RHS); +} + KnownBits KnownBits::mul(const KnownBits &LHS, const KnownBits &RHS, bool NoUndefSelfMultiply) { unsigned BitWidth = LHS.getBitWidth(); @@ -432,7 +652,7 @@ KnownBits KnownBits::mul(const KnownBits &LHS, const KnownBits &RHS, // fit in the bitwidth (it must not overflow). bool HasOverflow; APInt UMaxResult = UMaxLHS.umul_ov(UMaxRHS, HasOverflow); - unsigned LeadZ = HasOverflow ? 0 : UMaxResult.countLeadingZeros(); + unsigned LeadZ = HasOverflow ? 0 : UMaxResult.countl_zero(); // The result of the bottom bits of an integer multiply can be // inferred by looking at the bottom bits of both operands and @@ -481,8 +701,8 @@ KnownBits KnownBits::mul(const KnownBits &LHS, const KnownBits &RHS, // How many times we'd be able to divide each argument by 2 (shr by 1). // This gives us the number of trailing zeros on the multiplication result. - unsigned TrailBitsKnown0 = (LHS.Zero | LHS.One).countTrailingOnes(); - unsigned TrailBitsKnown1 = (RHS.Zero | RHS.One).countTrailingOnes(); + unsigned TrailBitsKnown0 = (LHS.Zero | LHS.One).countr_one(); + unsigned TrailBitsKnown1 = (RHS.Zero | RHS.One).countr_one(); unsigned TrailZero0 = LHS.countMinTrailingZeros(); unsigned TrailZero1 = RHS.countMinTrailingZeros(); unsigned TrailZ = TrailZero0 + TrailZero1; @@ -528,34 +748,151 @@ KnownBits KnownBits::mulhu(const KnownBits &LHS, const KnownBits &RHS) { return mul(WideLHS, WideRHS).extractBits(BitWidth, BitWidth); } -KnownBits KnownBits::udiv(const KnownBits &LHS, const KnownBits &RHS) { +static KnownBits divComputeLowBit(KnownBits Known, const KnownBits &LHS, + const KnownBits &RHS, bool Exact) { + + if (!Exact) + return Known; + + // If LHS is Odd, the result is Odd no matter what. + // Odd / Odd -> Odd + // Odd / Even -> Impossible (because its exact division) + if (LHS.One[0]) + Known.One.setBit(0); + + int MinTZ = + (int)LHS.countMinTrailingZeros() - (int)RHS.countMaxTrailingZeros(); + int MaxTZ = + (int)LHS.countMaxTrailingZeros() - (int)RHS.countMinTrailingZeros(); + if (MinTZ >= 0) { + // Result has at least MinTZ trailing zeros. + Known.Zero.setLowBits(MinTZ); + if (MinTZ == MaxTZ) { + // Result has exactly MinTZ trailing zeros. + Known.One.setBit(MinTZ); + } + } else if (MaxTZ < 0) { + // Poison Result + Known.setAllZero(); + } + + // In the KnownBits exhaustive tests, we have poison inputs for exact values + // a LOT. If we have a conflict, just return all zeros. + if (Known.hasConflict()) + Known.setAllZero(); + + return Known; +} + +KnownBits KnownBits::sdiv(const KnownBits &LHS, const KnownBits &RHS, + bool Exact) { + // Equivalent of `udiv`. We must have caught this before it was folded. + if (LHS.isNonNegative() && RHS.isNonNegative()) + return udiv(LHS, RHS, Exact); + + unsigned BitWidth = LHS.getBitWidth(); + assert(!LHS.hasConflict() && !RHS.hasConflict() && "Bad inputs"); + KnownBits Known(BitWidth); + + if (LHS.isZero() || RHS.isZero()) { + // Result is either known Zero or UB. Return Zero either way. + // Checking this earlier saves us a lot of special cases later on. + Known.setAllZero(); + return Known; + } + + std::optional<APInt> Res; + if (LHS.isNegative() && RHS.isNegative()) { + // Result non-negative. + APInt Denom = RHS.getSignedMaxValue(); + APInt Num = LHS.getSignedMinValue(); + // INT_MIN/-1 would be a poison result (impossible). Estimate the division + // as signed max (we will only set sign bit in the result). + Res = (Num.isMinSignedValue() && Denom.isAllOnes()) + ? APInt::getSignedMaxValue(BitWidth) + : Num.sdiv(Denom); + } else if (LHS.isNegative() && RHS.isNonNegative()) { + // Result is negative if Exact OR -LHS u>= RHS. + if (Exact || (-LHS.getSignedMaxValue()).uge(RHS.getSignedMaxValue())) { + APInt Denom = RHS.getSignedMinValue(); + APInt Num = LHS.getSignedMinValue(); + Res = Denom.isZero() ? Num : Num.sdiv(Denom); + } + } else if (LHS.isStrictlyPositive() && RHS.isNegative()) { + // Result is negative if Exact OR LHS u>= -RHS. + if (Exact || LHS.getSignedMinValue().uge(-RHS.getSignedMinValue())) { + APInt Denom = RHS.getSignedMaxValue(); + APInt Num = LHS.getSignedMaxValue(); + Res = Num.sdiv(Denom); + } + } + + if (Res) { + if (Res->isNonNegative()) { + unsigned LeadZ = Res->countLeadingZeros(); + Known.Zero.setHighBits(LeadZ); + } else { + unsigned LeadO = Res->countLeadingOnes(); + Known.One.setHighBits(LeadO); + } + } + + Known = divComputeLowBit(Known, LHS, RHS, Exact); + + assert(!Known.hasConflict() && "Bad Output"); + return Known; +} + +KnownBits KnownBits::udiv(const KnownBits &LHS, const KnownBits &RHS, + bool Exact) { unsigned BitWidth = LHS.getBitWidth(); assert(!LHS.hasConflict() && !RHS.hasConflict()); KnownBits Known(BitWidth); - // For the purposes of computing leading zeros we can conservatively - // treat a udiv as a logical right shift by the power of 2 known to - // be less than the denominator. - unsigned LeadZ = LHS.countMinLeadingZeros(); - unsigned RHSMaxLeadingZeros = RHS.countMaxLeadingZeros(); + if (LHS.isZero() || RHS.isZero()) { + // Result is either known Zero or UB. Return Zero either way. + // Checking this earlier saves us a lot of special cases later on. + Known.setAllZero(); + return Known; + } - if (RHSMaxLeadingZeros != BitWidth) - LeadZ = std::min(BitWidth, LeadZ + BitWidth - RHSMaxLeadingZeros - 1); + // We can figure out the minimum number of upper zero bits by doing + // MaxNumerator / MinDenominator. If the Numerator gets smaller or Denominator + // gets larger, the number of upper zero bits increases. + APInt MinDenom = RHS.getMinValue(); + APInt MaxNum = LHS.getMaxValue(); + APInt MaxRes = MinDenom.isZero() ? MaxNum : MaxNum.udiv(MinDenom); + + unsigned LeadZ = MaxRes.countLeadingZeros(); Known.Zero.setHighBits(LeadZ); + Known = divComputeLowBit(Known, LHS, RHS, Exact); + + assert(!Known.hasConflict() && "Bad Output"); return Known; } -KnownBits KnownBits::urem(const KnownBits &LHS, const KnownBits &RHS) { +KnownBits KnownBits::remGetLowBits(const KnownBits &LHS, const KnownBits &RHS) { unsigned BitWidth = LHS.getBitWidth(); + if (!RHS.isZero() && RHS.Zero[0]) { + // rem X, Y where Y[0:N] is zero will preserve X[0:N] in the result. + unsigned RHSZeros = RHS.countMinTrailingZeros(); + APInt Mask = APInt::getLowBitsSet(BitWidth, RHSZeros); + APInt OnesMask = LHS.One & Mask; + APInt ZerosMask = LHS.Zero & Mask; + return KnownBits(ZerosMask, OnesMask); + } + return KnownBits(BitWidth); +} + +KnownBits KnownBits::urem(const KnownBits &LHS, const KnownBits &RHS) { assert(!LHS.hasConflict() && !RHS.hasConflict()); - KnownBits Known(BitWidth); + KnownBits Known = remGetLowBits(LHS, RHS); if (RHS.isConstant() && RHS.getConstant().isPowerOf2()) { - // The upper bits are all zero, the lower ones are unchanged. - APInt LowBits = RHS.getConstant() - 1; - Known.Zero = LHS.Zero | ~LowBits; - Known.One = LHS.One & LowBits; + // NB: Low bits set in `remGetLowBits`. + APInt HighBits = ~(RHS.getConstant() - 1); + Known.Zero |= HighBits; return Known; } @@ -568,16 +905,12 @@ KnownBits KnownBits::urem(const KnownBits &LHS, const KnownBits &RHS) { } KnownBits KnownBits::srem(const KnownBits &LHS, const KnownBits &RHS) { - unsigned BitWidth = LHS.getBitWidth(); assert(!LHS.hasConflict() && !RHS.hasConflict()); - KnownBits Known(BitWidth); + KnownBits Known = remGetLowBits(LHS, RHS); if (RHS.isConstant() && RHS.getConstant().isPowerOf2()) { - // The low bits of the first operand are unchanged by the srem. + // NB: Low bits are set in `remGetLowBits`. APInt LowBits = RHS.getConstant() - 1; - Known.Zero = LHS.Zero & LowBits; - Known.One = LHS.One & LowBits; - // If the first operand is non-negative or has all low bits zero, then // the upper bits are all zero. if (LHS.isNonNegative() || LowBits.isSubsetOf(LHS.Zero)) @@ -623,8 +956,40 @@ KnownBits &KnownBits::operator^=(const KnownBits &RHS) { return *this; } +KnownBits KnownBits::blsi() const { + unsigned BitWidth = getBitWidth(); + KnownBits Known(Zero, APInt(BitWidth, 0)); + unsigned Max = countMaxTrailingZeros(); + Known.Zero.setBitsFrom(std::min(Max + 1, BitWidth)); + unsigned Min = countMinTrailingZeros(); + if (Max == Min && Max < BitWidth) + Known.One.setBit(Max); + return Known; +} + +KnownBits KnownBits::blsmsk() const { + unsigned BitWidth = getBitWidth(); + KnownBits Known(BitWidth); + unsigned Max = countMaxTrailingZeros(); + Known.Zero.setBitsFrom(std::min(Max + 1, BitWidth)); + unsigned Min = countMinTrailingZeros(); + Known.One.setLowBits(std::min(Min + 1, BitWidth)); + return Known; +} + void KnownBits::print(raw_ostream &OS) const { - OS << "{Zero=" << Zero << ", One=" << One << "}"; + unsigned BitWidth = getBitWidth(); + for (unsigned I = 0; I < BitWidth; ++I) { + unsigned N = BitWidth - I - 1; + if (Zero[N] && One[N]) + OS << "!"; + else if (Zero[N]) + OS << "0"; + else if (One[N]) + OS << "1"; + else + OS << "?"; + } } void KnownBits::dump() const { print(dbgs()); diff --git a/llvm/lib/Support/LowLevelType.cpp b/llvm/lib/Support/LowLevelType.cpp deleted file mode 100644 index 0282cd9bd79e..000000000000 --- a/llvm/lib/Support/LowLevelType.cpp +++ /dev/null @@ -1,59 +0,0 @@ -//===-- llvm/Support/LowLevelType.cpp -------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -/// \file This file implements the more header-heavy bits of the LLT class to -/// avoid polluting users' namespaces. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Support/LowLevelTypeImpl.h" -#include "llvm/Support/raw_ostream.h" -using namespace llvm; - -LLT::LLT(MVT VT) { - if (VT.isVector()) { - bool asVector = VT.getVectorMinNumElements() > 1; - init(/*IsPointer=*/false, asVector, /*IsScalar=*/!asVector, - VT.getVectorElementCount(), VT.getVectorElementType().getSizeInBits(), - /*AddressSpace=*/0); - } else if (VT.isValid()) { - // Aggregates are no different from real scalars as far as GlobalISel is - // concerned. - init(/*IsPointer=*/false, /*IsVector=*/false, /*IsScalar=*/true, - ElementCount::getFixed(0), VT.getSizeInBits(), /*AddressSpace=*/0); - } else { - IsScalar = false; - IsPointer = false; - IsVector = false; - RawData = 0; - } -} - -void LLT::print(raw_ostream &OS) const { - if (isVector()) { - OS << "<"; - OS << getElementCount() << " x " << getElementType() << ">"; - } else if (isPointer()) - OS << "p" << getAddressSpace(); - else if (isValid()) { - assert(isScalar() && "unexpected type"); - OS << "s" << getScalarSizeInBits(); - } else - OS << "LLT_invalid"; -} - -const constexpr LLT::BitFieldInfo LLT::ScalarSizeFieldInfo; -const constexpr LLT::BitFieldInfo LLT::PointerSizeFieldInfo; -const constexpr LLT::BitFieldInfo LLT::PointerAddressSpaceFieldInfo; -const constexpr LLT::BitFieldInfo LLT::VectorElementsFieldInfo; -const constexpr LLT::BitFieldInfo LLT::VectorScalableFieldInfo; -const constexpr LLT::BitFieldInfo LLT::VectorSizeFieldInfo; -const constexpr LLT::BitFieldInfo LLT::PointerVectorElementsFieldInfo; -const constexpr LLT::BitFieldInfo LLT::PointerVectorScalableFieldInfo; -const constexpr LLT::BitFieldInfo LLT::PointerVectorSizeFieldInfo; -const constexpr LLT::BitFieldInfo LLT::PointerVectorAddressSpaceFieldInfo; diff --git a/llvm/lib/Support/MemoryBuffer.cpp b/llvm/lib/Support/MemoryBuffer.cpp index 0bb11725d2fc..4cc4fe019b75 100644 --- a/llvm/lib/Support/MemoryBuffer.cpp +++ b/llvm/lib/Support/MemoryBuffer.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/MemoryBuffer.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/Config/config.h" #include "llvm/Support/Alignment.h" @@ -22,6 +23,7 @@ #include "llvm/Support/Process.h" #include "llvm/Support/Program.h" #include "llvm/Support/SmallVectorMemoryBuffer.h" +#include <algorithm> #include <cassert> #include <cstring> #include <new> @@ -132,10 +134,13 @@ MemoryBuffer::getMemBuffer(MemoryBufferRef Ref, bool RequiresNullTerminator) { static ErrorOr<std::unique_ptr<WritableMemoryBuffer>> getMemBufferCopyImpl(StringRef InputData, const Twine &BufferName) { - auto Buf = WritableMemoryBuffer::getNewUninitMemBuffer(InputData.size(), BufferName); + auto Buf = + WritableMemoryBuffer::getNewUninitMemBuffer(InputData.size(), BufferName); if (!Buf) return make_error_code(errc::not_enough_memory); - memcpy(Buf->getBufferStart(), InputData.data(), InputData.size()); + // Calling memcpy with null src/dst is UB, and an empty StringRef is + // represented with {nullptr, 0}. + llvm::copy(InputData, Buf->getBufferStart()); return std::move(Buf); } diff --git a/llvm/lib/Support/NativeFormatting.cpp b/llvm/lib/Support/NativeFormatting.cpp index 6e8137c405b8..3b9273e1eaad 100644 --- a/llvm/lib/Support/NativeFormatting.cpp +++ b/llvm/lib/Support/NativeFormatting.cpp @@ -58,10 +58,7 @@ static void write_unsigned_impl(raw_ostream &S, T N, size_t MinDigits, static_assert(std::is_unsigned_v<T>, "Value is not unsigned!"); char NumberBuffer[128]; - std::memset(NumberBuffer, '0', sizeof(NumberBuffer)); - - size_t Len = 0; - Len = format_to_buffer(N, NumberBuffer); + size_t Len = format_to_buffer(N, NumberBuffer); if (IsNegative) S << '-'; diff --git a/llvm/lib/Support/PGOOptions.cpp b/llvm/lib/Support/PGOOptions.cpp new file mode 100644 index 000000000000..04d50cc70d91 --- /dev/null +++ b/llvm/lib/Support/PGOOptions.cpp @@ -0,0 +1,58 @@ +//===------ PGOOptions.cpp -- PGO option tunables --------------*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/PGOOptions.h" +#include "llvm/Support/VirtualFileSystem.h" + +using namespace llvm; + +PGOOptions::PGOOptions(std::string ProfileFile, std::string CSProfileGenFile, + std::string ProfileRemappingFile, + std::string MemoryProfile, + IntrusiveRefCntPtr<vfs::FileSystem> FS, PGOAction Action, + CSPGOAction CSAction, bool DebugInfoForProfiling, + bool PseudoProbeForProfiling) + : ProfileFile(ProfileFile), CSProfileGenFile(CSProfileGenFile), + ProfileRemappingFile(ProfileRemappingFile), MemoryProfile(MemoryProfile), + Action(Action), CSAction(CSAction), + DebugInfoForProfiling(DebugInfoForProfiling || + (Action == SampleUse && !PseudoProbeForProfiling)), + PseudoProbeForProfiling(PseudoProbeForProfiling), FS(std::move(FS)) { + // Note, we do allow ProfileFile.empty() for Action=IRUse LTO can + // callback with IRUse action without ProfileFile. + + // If there is a CSAction, PGOAction cannot be IRInstr or SampleUse. + assert(this->CSAction == NoCSAction || + (this->Action != IRInstr && this->Action != SampleUse)); + + // For CSIRInstr, CSProfileGenFile also needs to be nonempty. + assert(this->CSAction != CSIRInstr || !this->CSProfileGenFile.empty()); + + // If CSAction is CSIRUse, PGOAction needs to be IRUse as they share + // a profile. + assert(this->CSAction != CSIRUse || this->Action == IRUse); + + // Cannot optimize with MemProf profile during IR instrumentation. + assert(this->MemoryProfile.empty() || this->Action != PGOOptions::IRInstr); + + // If neither Action nor CSAction nor MemoryProfile are set, + // DebugInfoForProfiling or PseudoProbeForProfiling needs to be true. + assert(this->Action != NoAction || this->CSAction != NoCSAction || + !this->MemoryProfile.empty() || this->DebugInfoForProfiling || + this->PseudoProbeForProfiling); + + // If we need to use the profile, the VFS cannot be nullptr. + assert(this->FS || !(this->Action == IRUse || this->CSAction == CSIRUse || + !this->MemoryProfile.empty())); +} + +PGOOptions::PGOOptions(const PGOOptions &) = default; + +PGOOptions &PGOOptions::operator=(const PGOOptions &O) = default; + +PGOOptions::~PGOOptions() = default; diff --git a/llvm/lib/Support/Parallel.cpp b/llvm/lib/Support/Parallel.cpp index 23ed9d813548..9b14b05b5211 100644 --- a/llvm/lib/Support/Parallel.cpp +++ b/llvm/lib/Support/Parallel.cpp @@ -12,8 +12,8 @@ #include "llvm/Support/Threading.h" #include <atomic> +#include <deque> #include <future> -#include <stack> #include <thread> #include <vector> @@ -24,11 +24,11 @@ namespace parallel { #if LLVM_ENABLE_THREADS #ifdef _WIN32 -static thread_local unsigned threadIndex; +static thread_local unsigned threadIndex = UINT_MAX; -unsigned getThreadIndex() { return threadIndex; } +unsigned getThreadIndex() { GET_THREAD_INDEX_IMPL; } #else -thread_local unsigned threadIndex; +thread_local unsigned threadIndex = UINT_MAX; #endif namespace detail { @@ -39,7 +39,8 @@ namespace { class Executor { public: virtual ~Executor() = default; - virtual void add(std::function<void()> func) = 0; + virtual void add(std::function<void()> func, bool Sequential = false) = 0; + virtual size_t getThreadCount() const = 0; static Executor *getDefaultExecutor(); }; @@ -49,13 +50,16 @@ public: class ThreadPoolExecutor : public Executor { public: explicit ThreadPoolExecutor(ThreadPoolStrategy S = hardware_concurrency()) { - unsigned ThreadCount = S.compute_thread_count(); + ThreadCount = S.compute_thread_count(); // Spawn all but one of the threads in another thread as spawning threads // can take a while. Threads.reserve(ThreadCount); Threads.resize(1); std::lock_guard<std::mutex> Lock(Mutex); - Threads[0] = std::thread([this, ThreadCount, S] { + // Use operator[] before creating the thread to avoid data race in .size() + // in “safe libc++” mode. + auto &Thread0 = Threads[0]; + Thread0 = std::thread([this, S] { for (unsigned I = 1; I < ThreadCount; ++I) { Threads.emplace_back([=] { work(S, I); }); if (Stop) @@ -94,36 +98,61 @@ public: static void call(void *Ptr) { ((ThreadPoolExecutor *)Ptr)->stop(); } }; - void add(std::function<void()> F) override { + void add(std::function<void()> F, bool Sequential = false) override { { std::lock_guard<std::mutex> Lock(Mutex); - WorkStack.push(std::move(F)); + if (Sequential) + WorkQueueSequential.emplace_front(std::move(F)); + else + WorkQueue.emplace_back(std::move(F)); } Cond.notify_one(); } + size_t getThreadCount() const override { return ThreadCount; } + private: + bool hasSequentialTasks() const { + return !WorkQueueSequential.empty() && !SequentialQueueIsLocked; + } + + bool hasGeneralTasks() const { return !WorkQueue.empty(); } + void work(ThreadPoolStrategy S, unsigned ThreadID) { threadIndex = ThreadID; S.apply_thread_strategy(ThreadID); while (true) { std::unique_lock<std::mutex> Lock(Mutex); - Cond.wait(Lock, [&] { return Stop || !WorkStack.empty(); }); + Cond.wait(Lock, [&] { + return Stop || hasGeneralTasks() || hasSequentialTasks(); + }); if (Stop) break; - auto Task = std::move(WorkStack.top()); - WorkStack.pop(); + bool Sequential = hasSequentialTasks(); + if (Sequential) + SequentialQueueIsLocked = true; + else + assert(hasGeneralTasks()); + + auto &Queue = Sequential ? WorkQueueSequential : WorkQueue; + auto Task = std::move(Queue.back()); + Queue.pop_back(); Lock.unlock(); Task(); + if (Sequential) + SequentialQueueIsLocked = false; } } std::atomic<bool> Stop{false}; - std::stack<std::function<void()>> WorkStack; + std::atomic<bool> SequentialQueueIsLocked{false}; + std::deque<std::function<void()>> WorkQueue; + std::deque<std::function<void()>> WorkQueueSequential; std::mutex Mutex; std::condition_variable Cond; std::promise<void> ThreadsCreated; std::vector<std::thread> Threads; + unsigned ThreadCount; }; Executor *Executor::getDefaultExecutor() { @@ -153,54 +182,53 @@ Executor *Executor::getDefaultExecutor() { } } // namespace } // namespace detail -#endif -static std::atomic<int> TaskGroupInstances; +size_t getThreadCount() { + return detail::Executor::getDefaultExecutor()->getThreadCount(); +} +#endif // Latch::sync() called by the dtor may cause one thread to block. If is a dead // lock if all threads in the default executor are blocked. To prevent the dead -// lock, only allow the first TaskGroup to run tasks parallelly. In the scenario +// lock, only allow the root TaskGroup to run tasks parallelly. In the scenario // of nested parallel_for_each(), only the outermost one runs parallelly. -TaskGroup::TaskGroup() : Parallel(TaskGroupInstances++ == 0) {} +TaskGroup::TaskGroup() +#if LLVM_ENABLE_THREADS + : Parallel((parallel::strategy.ThreadsRequested != 1) && + (threadIndex == UINT_MAX)) {} +#else + : Parallel(false) {} +#endif TaskGroup::~TaskGroup() { // We must ensure that all the workloads have finished before decrementing the // instances count. L.sync(); - --TaskGroupInstances; } -void TaskGroup::spawn(std::function<void()> F) { +void TaskGroup::spawn(std::function<void()> F, bool Sequential) { #if LLVM_ENABLE_THREADS if (Parallel) { L.inc(); - detail::Executor::getDefaultExecutor()->add([&, F = std::move(F)] { - F(); - L.dec(); - }); + detail::Executor::getDefaultExecutor()->add( + [&, F = std::move(F)] { + F(); + L.dec(); + }, + Sequential); return; } #endif F(); } -void TaskGroup::execute(std::function<void()> F) { - if (parallel::strategy.ThreadsRequested == 1) - F(); - else - spawn(F); -} } // namespace parallel } // namespace llvm void llvm::parallelFor(size_t Begin, size_t End, llvm::function_ref<void(size_t)> Fn) { - // If we have zero or one items, then do not incur the overhead of spinning up - // a task group. They are surprisingly expensive, and because they do not - // support nested parallelism, a single entry task group can block parallel - // execution underneath them. #if LLVM_ENABLE_THREADS - auto NumItems = End - Begin; - if (NumItems > 1 && parallel::strategy.ThreadsRequested != 1) { + if (parallel::strategy.ThreadsRequested != 1) { + auto NumItems = End - Begin; // Limit the number of tasks to MaxTasksPerGroup to limit job scheduling // overhead on large inputs. auto TaskSize = NumItems / parallel::detail::MaxTasksPerGroup; @@ -214,8 +242,12 @@ void llvm::parallelFor(size_t Begin, size_t End, Fn(I); }); } - for (; Begin != End; ++Begin) - Fn(Begin); + if (Begin != End) { + TG.spawn([=, &Fn] { + for (size_t I = Begin; I != End; ++I) + Fn(I); + }); + } return; } #endif diff --git a/llvm/lib/Support/Path.cpp b/llvm/lib/Support/Path.cpp index 152d902f52e6..7a57c104ef10 100644 --- a/llvm/lib/Support/Path.cpp +++ b/llvm/lib/Support/Path.cpp @@ -13,6 +13,7 @@ #include "llvm/Support/Path.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/ScopeExit.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/Config/config.h" #include "llvm/Config/llvm-config.h" #include "llvm/Support/Endian.h" @@ -22,6 +23,7 @@ #include "llvm/Support/Process.h" #include "llvm/Support/Signals.h" #include <cctype> +#include <cerrno> #if !defined(_MSC_VER) && !defined(__MINGW32__) #include <unistd.h> @@ -1202,18 +1204,10 @@ Error readNativeFileToEOF(file_t FileHandle, SmallVectorImpl<char> &Buffer, #include "Windows/Path.inc" #endif -bool IsLLVMDriver = false; - namespace llvm { namespace sys { namespace fs { -std::string getMainExecutable(const char *Argv0, void *MainAddr) { - if (IsLLVMDriver) - return sys::path::stem(Argv0).str(); - return getMainExecutableImpl(Argv0, MainAddr); -} - TempFile::TempFile(StringRef Name, int FD) : TmpName(std::string(Name)), FD(FD) {} TempFile::TempFile(TempFile &&Other) { *this = std::move(Other); } diff --git a/llvm/lib/Support/PrettyStackTrace.cpp b/llvm/lib/Support/PrettyStackTrace.cpp index fa91405fee10..f9f1b8a419b8 100644 --- a/llvm/lib/Support/PrettyStackTrace.cpp +++ b/llvm/lib/Support/PrettyStackTrace.cpp @@ -64,8 +64,7 @@ static LLVM_THREAD_LOCAL PrettyStackTraceEntry *PrettyStackTraceHead = nullptr; // the current thread". If the user happens to overflow an 'unsigned' with // SIGINFO requests, it's possible that some threads will stop responding to it, // but the program won't crash. -static volatile std::atomic<unsigned> GlobalSigInfoGenerationCounter = - ATOMIC_VAR_INIT(1); +static volatile std::atomic<unsigned> GlobalSigInfoGenerationCounter = 1; static LLVM_THREAD_LOCAL unsigned ThreadLocalSigInfoGenerationCounter = 0; namespace llvm { diff --git a/llvm/lib/Support/RISCVISAInfo.cpp b/llvm/lib/Support/RISCVISAInfo.cpp index 1b1bff023d2f..70fab8010831 100644 --- a/llvm/lib/Support/RISCVISAInfo.cpp +++ b/llvm/lib/Support/RISCVISAInfo.cpp @@ -1,4 +1,4 @@ -//===-- RISCVISAInfo.cpp - RISCV Arch String Parser -------------*- C++ -*-===// +//===-- RISCVISAInfo.cpp - RISC-V Arch String Parser ------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -16,6 +16,7 @@ #include "llvm/Support/raw_ostream.h" #include <array> +#include <atomic> #include <optional> #include <string> #include <vector> @@ -33,107 +34,189 @@ struct RISCVSupportedExtension { const char *Name; /// Supported version. RISCVExtensionVersion Version; + + bool operator<(const RISCVSupportedExtension &RHS) const { + return StringRef(Name) < StringRef(RHS.Name); + } }; } // end anonymous namespace static constexpr StringLiteral AllStdExts = "mafdqlcbkjtpvnh"; +static const char *RISCVGImplications[] = { + "i", "m", "a", "f", "d", "zicsr", "zifencei" +}; + +// NOTE: This table should be sorted alphabetically by extension name. static const RISCVSupportedExtension SupportedExtensions[] = { - {"i", RISCVExtensionVersion{2, 0}}, - {"e", RISCVExtensionVersion{1, 9}}, - {"m", RISCVExtensionVersion{2, 0}}, - {"a", RISCVExtensionVersion{2, 0}}, - {"f", RISCVExtensionVersion{2, 0}}, - {"d", RISCVExtensionVersion{2, 0}}, + {"a", RISCVExtensionVersion{2, 1}}, {"c", RISCVExtensionVersion{2, 0}}, - + {"d", RISCVExtensionVersion{2, 2}}, + {"e", RISCVExtensionVersion{2, 0}}, + {"f", RISCVExtensionVersion{2, 2}}, {"h", RISCVExtensionVersion{1, 0}}, + {"i", RISCVExtensionVersion{2, 1}}, + {"m", RISCVExtensionVersion{2, 0}}, - {"zihintpause", RISCVExtensionVersion{2, 0}}, + {"svinval", RISCVExtensionVersion{1, 0}}, + {"svnapot", RISCVExtensionVersion{1, 0}}, + {"svpbmt", RISCVExtensionVersion{1, 0}}, - {"zfhmin", RISCVExtensionVersion{1, 0}}, - {"zfh", RISCVExtensionVersion{1, 0}}, + {"v", RISCVExtensionVersion{1, 0}}, - {"zfinx", RISCVExtensionVersion{1, 0}}, - {"zdinx", RISCVExtensionVersion{1, 0}}, - {"zhinxmin", RISCVExtensionVersion{1, 0}}, - {"zhinx", RISCVExtensionVersion{1, 0}}, + // vendor-defined ('X') extensions + {"xcvbitmanip", RISCVExtensionVersion{1, 0}}, + {"xcvmac", RISCVExtensionVersion{1, 0}}, + {"xsfcie", RISCVExtensionVersion{1, 0}}, + {"xsfvcp", RISCVExtensionVersion{1, 0}}, + {"xtheadba", RISCVExtensionVersion{1, 0}}, + {"xtheadbb", RISCVExtensionVersion{1, 0}}, + {"xtheadbs", RISCVExtensionVersion{1, 0}}, + {"xtheadcmo", RISCVExtensionVersion{1, 0}}, + {"xtheadcondmov", RISCVExtensionVersion{1, 0}}, + {"xtheadfmemidx", RISCVExtensionVersion{1, 0}}, + {"xtheadmac", RISCVExtensionVersion{1, 0}}, + {"xtheadmemidx", RISCVExtensionVersion{1, 0}}, + {"xtheadmempair", RISCVExtensionVersion{1, 0}}, + {"xtheadsync", RISCVExtensionVersion{1, 0}}, + {"xtheadvdot", RISCVExtensionVersion{1, 0}}, + {"xventanacondops", RISCVExtensionVersion{1, 0}}, + + {"zawrs", RISCVExtensionVersion{1, 0}}, {"zba", RISCVExtensionVersion{1, 0}}, {"zbb", RISCVExtensionVersion{1, 0}}, {"zbc", RISCVExtensionVersion{1, 0}}, - {"zbs", RISCVExtensionVersion{1, 0}}, - {"zbkb", RISCVExtensionVersion{1, 0}}, {"zbkc", RISCVExtensionVersion{1, 0}}, {"zbkx", RISCVExtensionVersion{1, 0}}, + {"zbs", RISCVExtensionVersion{1, 0}}, + + {"zca", RISCVExtensionVersion{1, 0}}, + {"zcb", RISCVExtensionVersion{1, 0}}, + {"zcd", RISCVExtensionVersion{1, 0}}, + {"zce", RISCVExtensionVersion{1, 0}}, + {"zcf", RISCVExtensionVersion{1, 0}}, + {"zcmp", RISCVExtensionVersion{1, 0}}, + {"zcmt", RISCVExtensionVersion{1, 0}}, + + {"zdinx", RISCVExtensionVersion{1, 0}}, + + {"zfh", RISCVExtensionVersion{1, 0}}, + {"zfhmin", RISCVExtensionVersion{1, 0}}, + {"zfinx", RISCVExtensionVersion{1, 0}}, + + {"zhinx", RISCVExtensionVersion{1, 0}}, + {"zhinxmin", RISCVExtensionVersion{1, 0}}, + + {"zicbom", RISCVExtensionVersion{1, 0}}, + {"zicbop", RISCVExtensionVersion{1, 0}}, + {"zicboz", RISCVExtensionVersion{1, 0}}, + {"zicntr", RISCVExtensionVersion{1, 0}}, + {"zicsr", RISCVExtensionVersion{2, 0}}, + {"zifencei", RISCVExtensionVersion{2, 0}}, + {"zihintpause", RISCVExtensionVersion{2, 0}}, + {"zihpm", RISCVExtensionVersion{1, 0}}, + + {"zk", RISCVExtensionVersion{1, 0}}, + {"zkn", RISCVExtensionVersion{1, 0}}, {"zknd", RISCVExtensionVersion{1, 0}}, {"zkne", RISCVExtensionVersion{1, 0}}, {"zknh", RISCVExtensionVersion{1, 0}}, - {"zksed", RISCVExtensionVersion{1, 0}}, - {"zksh", RISCVExtensionVersion{1, 0}}, {"zkr", RISCVExtensionVersion{1, 0}}, - {"zkn", RISCVExtensionVersion{1, 0}}, {"zks", RISCVExtensionVersion{1, 0}}, + {"zksed", RISCVExtensionVersion{1, 0}}, + {"zksh", RISCVExtensionVersion{1, 0}}, {"zkt", RISCVExtensionVersion{1, 0}}, - {"zk", RISCVExtensionVersion{1, 0}}, {"zmmul", RISCVExtensionVersion{1, 0}}, - {"v", RISCVExtensionVersion{1, 0}}, - {"zvl32b", RISCVExtensionVersion{1, 0}}, - {"zvl64b", RISCVExtensionVersion{1, 0}}, - {"zvl128b", RISCVExtensionVersion{1, 0}}, - {"zvl256b", RISCVExtensionVersion{1, 0}}, - {"zvl512b", RISCVExtensionVersion{1, 0}}, - {"zvl1024b", RISCVExtensionVersion{1, 0}}, - {"zvl2048b", RISCVExtensionVersion{1, 0}}, - {"zvl4096b", RISCVExtensionVersion{1, 0}}, - {"zvl8192b", RISCVExtensionVersion{1, 0}}, - {"zvl16384b", RISCVExtensionVersion{1, 0}}, - {"zvl32768b", RISCVExtensionVersion{1, 0}}, - {"zvl65536b", RISCVExtensionVersion{1, 0}}, - {"zve32x", RISCVExtensionVersion{1, 0}}, {"zve32f", RISCVExtensionVersion{1, 0}}, - {"zve64x", RISCVExtensionVersion{1, 0}}, - {"zve64f", RISCVExtensionVersion{1, 0}}, + {"zve32x", RISCVExtensionVersion{1, 0}}, {"zve64d", RISCVExtensionVersion{1, 0}}, + {"zve64f", RISCVExtensionVersion{1, 0}}, + {"zve64x", RISCVExtensionVersion{1, 0}}, - {"zicbom", RISCVExtensionVersion{1, 0}}, - {"zicboz", RISCVExtensionVersion{1, 0}}, - {"zicbop", RISCVExtensionVersion{1, 0}}, + {"zvfh", RISCVExtensionVersion{1, 0}}, - {"svnapot", RISCVExtensionVersion{1, 0}}, - {"svpbmt", RISCVExtensionVersion{1, 0}}, - {"svinval", RISCVExtensionVersion{1, 0}}, - {"xventanacondops", RISCVExtensionVersion{1, 0}}, - {"xtheadvdot", RISCVExtensionVersion{1, 0}}, + {"zvl1024b", RISCVExtensionVersion{1, 0}}, + {"zvl128b", RISCVExtensionVersion{1, 0}}, + {"zvl16384b", RISCVExtensionVersion{1, 0}}, + {"zvl2048b", RISCVExtensionVersion{1, 0}}, + {"zvl256b", RISCVExtensionVersion{1, 0}}, + {"zvl32768b", RISCVExtensionVersion{1, 0}}, + {"zvl32b", RISCVExtensionVersion{1, 0}}, + {"zvl4096b", RISCVExtensionVersion{1, 0}}, + {"zvl512b", RISCVExtensionVersion{1, 0}}, + {"zvl64b", RISCVExtensionVersion{1, 0}}, + {"zvl65536b", RISCVExtensionVersion{1, 0}}, + {"zvl8192b", RISCVExtensionVersion{1, 0}}, }; +// NOTE: This table should be sorted alphabetically by extension name. static const RISCVSupportedExtension SupportedExperimentalExtensions[] = { + {"smaia", RISCVExtensionVersion{1, 0}}, + {"ssaia", RISCVExtensionVersion{1, 0}}, + + {"zacas", RISCVExtensionVersion{1, 0}}, + + {"zfa", RISCVExtensionVersion{0, 2}}, + {"zfbfmin", RISCVExtensionVersion{0, 6}}, + + {"zicond", RISCVExtensionVersion{1, 0}}, + {"zihintntl", RISCVExtensionVersion{0, 2}}, - {"zca", RISCVExtensionVersion{0, 70}}, - {"zcd", RISCVExtensionVersion{0, 70}}, - {"zcf", RISCVExtensionVersion{0, 70}}, - {"zvfh", RISCVExtensionVersion{0, 1}}, - {"zawrs", RISCVExtensionVersion{1, 0}}, {"ztso", RISCVExtensionVersion{0, 1}}, + + {"zvbb", RISCVExtensionVersion{1, 0}}, + {"zvbc", RISCVExtensionVersion{1, 0}}, + + {"zvfbfmin", RISCVExtensionVersion{0, 6}}, + {"zvfbfwma", RISCVExtensionVersion{0, 6}}, + + // vector crypto + {"zvkg", RISCVExtensionVersion{1, 0}}, + {"zvkn", RISCVExtensionVersion{1, 0}}, + {"zvknc", RISCVExtensionVersion{1, 0}}, + {"zvkned", RISCVExtensionVersion{1, 0}}, + {"zvkng", RISCVExtensionVersion{1, 0}}, + {"zvknha", RISCVExtensionVersion{1, 0}}, + {"zvknhb", RISCVExtensionVersion{1, 0}}, + {"zvks", RISCVExtensionVersion{1, 0}}, + {"zvksc", RISCVExtensionVersion{1, 0}}, + {"zvksed", RISCVExtensionVersion{1, 0}}, + {"zvksg", RISCVExtensionVersion{1, 0}}, + {"zvksh", RISCVExtensionVersion{1, 0}}, + {"zvkt", RISCVExtensionVersion{1, 0}}, }; +static void verifyTables() { +#ifndef NDEBUG + static std::atomic<bool> TableChecked(false); + if (!TableChecked.load(std::memory_order_relaxed)) { + assert(llvm::is_sorted(SupportedExtensions) && + "Extensions are not sorted by name"); + assert(llvm::is_sorted(SupportedExperimentalExtensions) && + "Experimental extensions are not sorted by name"); + TableChecked.store(true, std::memory_order_relaxed); + } +#endif +} + static bool stripExperimentalPrefix(StringRef &Ext) { return Ext.consume_front("experimental-"); } -// This function finds the first character that doesn't belong to a version +// This function finds the last character that doesn't belong to a version // (e.g. zba1p0 is extension 'zba' of version '1p0'). So the function will // consume [0-9]*p[0-9]* starting from the backward. An extension name will not // end with a digit or the letter 'p', so this function will parse correctly. // NOTE: This function is NOT able to take empty strings or strings that only // have version numbers and no extension name. It assumes the extension name // will be at least more than one character. -static size_t findFirstNonVersionCharacter(StringRef Ext) { +static size_t findLastNonVersionCharacter(StringRef Ext) { assert(!Ext.empty() && "Already guarded by if-statement in ::parseArchString"); @@ -149,11 +232,12 @@ static size_t findFirstNonVersionCharacter(StringRef Ext) { } namespace { -struct FindByName { - FindByName(StringRef Ext) : Ext(Ext){}; - StringRef Ext; - bool operator()(const RISCVSupportedExtension &ExtInfo) { - return ExtInfo.Name == Ext; +struct LessExtName { + bool operator()(const RISCVSupportedExtension &LHS, StringRef RHS) { + return StringRef(LHS.Name) < RHS; + } + bool operator()(StringRef LHS, const RISCVSupportedExtension &RHS) { + return LHS < StringRef(RHS.Name); } }; } // namespace @@ -164,12 +248,12 @@ findDefaultVersion(StringRef ExtName) { // TODO: We might set default version based on profile or ISA spec. for (auto &ExtInfo : {ArrayRef(SupportedExtensions), ArrayRef(SupportedExperimentalExtensions)}) { - auto ExtensionInfoIterator = llvm::find_if(ExtInfo, FindByName(ExtName)); + auto I = llvm::lower_bound(ExtInfo, ExtName, LessExtName()); - if (ExtensionInfoIterator == ExtInfo.end()) { + if (I == ExtInfo.end() || I->Name != ExtName) continue; - } - return ExtensionInfoIterator->Version; + + return I->Version; } return std::nullopt; } @@ -177,15 +261,12 @@ findDefaultVersion(StringRef ExtName) { void RISCVISAInfo::addExtension(StringRef ExtName, unsigned MajorVersion, unsigned MinorVersion) { RISCVExtensionInfo Ext; - Ext.ExtName = ExtName.str(); Ext.MajorVersion = MajorVersion; Ext.MinorVersion = MinorVersion; Exts[ExtName.str()] = Ext; } static StringRef getExtensionTypeDesc(StringRef Ext) { - if (Ext.startswith("sx")) - return "non-standard supervisor-level extension"; if (Ext.startswith("s")) return "standard supervisor-level extension"; if (Ext.startswith("x")) @@ -196,8 +277,6 @@ static StringRef getExtensionTypeDesc(StringRef Ext) { } static StringRef getExtensionType(StringRef Ext) { - if (Ext.startswith("sx")) - return "sx"; if (Ext.startswith("s")) return "s"; if (Ext.startswith("x")) @@ -209,36 +288,50 @@ static StringRef getExtensionType(StringRef Ext) { static std::optional<RISCVExtensionVersion> isExperimentalExtension(StringRef Ext) { - auto ExtIterator = - llvm::find_if(SupportedExperimentalExtensions, FindByName(Ext)); - if (ExtIterator == std::end(SupportedExperimentalExtensions)) + auto I = + llvm::lower_bound(SupportedExperimentalExtensions, Ext, LessExtName()); + if (I == std::end(SupportedExperimentalExtensions) || I->Name != Ext) return std::nullopt; - return ExtIterator->Version; + return I->Version; } bool RISCVISAInfo::isSupportedExtensionFeature(StringRef Ext) { bool IsExperimental = stripExperimentalPrefix(Ext); - if (IsExperimental) - return llvm::any_of(SupportedExperimentalExtensions, FindByName(Ext)); - else - return llvm::any_of(SupportedExtensions, FindByName(Ext)); + ArrayRef<RISCVSupportedExtension> ExtInfo = + IsExperimental ? ArrayRef(SupportedExperimentalExtensions) + : ArrayRef(SupportedExtensions); + + auto I = llvm::lower_bound(ExtInfo, Ext, LessExtName()); + return I != ExtInfo.end() && I->Name == Ext; } bool RISCVISAInfo::isSupportedExtension(StringRef Ext) { - return llvm::any_of(SupportedExtensions, FindByName(Ext)) || - llvm::any_of(SupportedExperimentalExtensions, FindByName(Ext)); + verifyTables(); + + for (auto ExtInfo : {ArrayRef(SupportedExtensions), + ArrayRef(SupportedExperimentalExtensions)}) { + auto I = llvm::lower_bound(ExtInfo, Ext, LessExtName()); + if (I != ExtInfo.end() && I->Name == Ext) + return true; + } + + return false; } bool RISCVISAInfo::isSupportedExtension(StringRef Ext, unsigned MajorVersion, unsigned MinorVersion) { - auto FindByNameAndVersion = [=](const RISCVSupportedExtension &ExtInfo) { - return ExtInfo.Name == Ext && (MajorVersion == ExtInfo.Version.Major) && - (MinorVersion == ExtInfo.Version.Minor); - }; - return llvm::any_of(SupportedExtensions, FindByNameAndVersion) || - llvm::any_of(SupportedExperimentalExtensions, FindByNameAndVersion); + for (auto ExtInfo : {ArrayRef(SupportedExtensions), + ArrayRef(SupportedExperimentalExtensions)}) { + auto Range = + std::equal_range(ExtInfo.begin(), ExtInfo.end(), Ext, LessExtName()); + for (auto I = Range.first, E = Range.second; I != E; ++I) + if (I->Version.Major == MajorVersion && I->Version.Minor == MinorVersion) + return true; + } + + return false; } bool RISCVISAInfo::hasExtension(StringRef Ext) const { @@ -250,78 +343,71 @@ bool RISCVISAInfo::hasExtension(StringRef Ext) const { return Exts.count(Ext.str()) != 0; } +// We rank extensions in the following order: +// -Single letter extensions in canonical order. +// -Unknown single letter extensions in alphabetical order. +// -Multi-letter extensions starting with 'z' sorted by canonical order of +// the second letter then sorted alphabetically. +// -Multi-letter extensions starting with 's' in alphabetical order. +// -(TODO) Multi-letter extensions starting with 'zxm' in alphabetical order. +// -X extensions in alphabetical order. +// These flags are used to indicate the category. The first 6 bits store the +// single letter extension rank for single letter and multi-letter extensions +// starting with 'z'. +enum RankFlags { + RF_Z_EXTENSION = 1 << 6, + RF_S_EXTENSION = 1 << 7, + RF_X_EXTENSION = 1 << 8, +}; + // Get the rank for single-letter extension, lower value meaning higher // priority. -static int singleLetterExtensionRank(char Ext) { +static unsigned singleLetterExtensionRank(char Ext) { + assert(Ext >= 'a' && Ext <= 'z'); switch (Ext) { case 'i': - return -2; + return 0; case 'e': - return -1; - default: - break; + return 1; } size_t Pos = AllStdExts.find(Ext); - int Rank; - if (Pos == StringRef::npos) - // If we got an unknown extension letter, then give it an alphabetical - // order, but after all known standard extensions. - Rank = AllStdExts.size() + (Ext - 'a'); - else - Rank = Pos; + if (Pos != StringRef::npos) + return Pos + 2; // Skip 'e' and 'i' from above. - return Rank; + // If we got an unknown extension letter, then give it an alphabetical + // order, but after all known standard extensions. + return 2 + AllStdExts.size() + (Ext - 'a'); } // Get the rank for multi-letter extension, lower value meaning higher // priority/order in canonical order. -static int multiLetterExtensionRank(const std::string &ExtName) { - assert(ExtName.length() >= 2); - int HighOrder; - int LowOrder = 0; - // The order between multi-char extensions: s -> h -> z -> x. - char ExtClass = ExtName[0]; - switch (ExtClass) { +static unsigned getExtensionRank(const std::string &ExtName) { + assert(ExtName.size() >= 1); + switch (ExtName[0]) { case 's': - HighOrder = 0; - break; + return RF_S_EXTENSION; case 'z': - HighOrder = 1; + assert(ExtName.size() >= 2); // `z` extension must be sorted by canonical order of second letter. // e.g. zmx has higher rank than zax. - LowOrder = singleLetterExtensionRank(ExtName[1]); - break; + return RF_Z_EXTENSION | singleLetterExtensionRank(ExtName[1]); case 'x': - HighOrder = 2; - break; + return RF_X_EXTENSION; default: - llvm_unreachable("Unknown prefix for multi-char extension"); - return -1; + assert(ExtName.size() == 1); + return singleLetterExtensionRank(ExtName[0]); } - - return (HighOrder << 8) + LowOrder; } // Compare function for extension. // Only compare the extension name, ignore version comparison. bool RISCVISAInfo::compareExtension(const std::string &LHS, const std::string &RHS) { - size_t LHSLen = LHS.length(); - size_t RHSLen = RHS.length(); - if (LHSLen == 1 && RHSLen != 1) - return true; + unsigned LHSRank = getExtensionRank(LHS); + unsigned RHSRank = getExtensionRank(RHS); - if (LHSLen != 1 && RHSLen == 1) - return false; - - if (LHSLen == 1 && RHSLen == 1) - return singleLetterExtensionRank(LHS[0]) < - singleLetterExtensionRank(RHS[0]); - - // Both are multi-char ext here. - int LHSRank = multiLetterExtensionRank(LHS); - int RHSRank = multiLetterExtensionRank(RHS); + // If the ranks differ, pick the lower rank. if (LHSRank != RHSRank) return LHSRank < RHSRank; @@ -485,11 +571,12 @@ RISCVISAInfo::parseFeatures(unsigned XLen, ? ArrayRef(SupportedExperimentalExtensions) : ArrayRef(SupportedExtensions); auto ExtensionInfoIterator = - llvm::find_if(ExtensionInfos, FindByName(ExtName)); + llvm::lower_bound(ExtensionInfos, ExtName, LessExtName()); // Not all features is related to ISA extension, like `relax` or // `save-restore`, skip those feature. - if (ExtensionInfoIterator == ExtensionInfos.end()) + if (ExtensionInfoIterator == ExtensionInfos.end() || + ExtensionInfoIterator->Name != ExtName) continue; if (Add) @@ -503,6 +590,67 @@ RISCVISAInfo::parseFeatures(unsigned XLen, } llvm::Expected<std::unique_ptr<RISCVISAInfo>> +RISCVISAInfo::parseNormalizedArchString(StringRef Arch) { + if (llvm::any_of(Arch, isupper)) { + return createStringError(errc::invalid_argument, + "string must be lowercase"); + } + // Must start with a valid base ISA name. + unsigned XLen; + if (Arch.startswith("rv32i") || Arch.startswith("rv32e")) + XLen = 32; + else if (Arch.startswith("rv64i") || Arch.startswith("rv64e")) + XLen = 64; + else + return createStringError(errc::invalid_argument, + "arch string must begin with valid base ISA"); + std::unique_ptr<RISCVISAInfo> ISAInfo(new RISCVISAInfo(XLen)); + // Discard rv32/rv64 prefix. + Arch = Arch.substr(4); + + // Each extension is of the form ${name}${major_version}p${minor_version} + // and separated by _. Split by _ and then extract the name and version + // information for each extension. + SmallVector<StringRef, 8> Split; + Arch.split(Split, '_'); + for (StringRef Ext : Split) { + StringRef Prefix, MinorVersionStr; + std::tie(Prefix, MinorVersionStr) = Ext.rsplit('p'); + if (MinorVersionStr.empty()) + return createStringError(errc::invalid_argument, + "extension lacks version in expected format"); + unsigned MajorVersion, MinorVersion; + if (MinorVersionStr.getAsInteger(10, MinorVersion)) + return createStringError(errc::invalid_argument, + "failed to parse minor version number"); + + // Split Prefix into the extension name and the major version number + // (the trailing digits of Prefix). + int TrailingDigits = 0; + StringRef ExtName = Prefix; + while (!ExtName.empty()) { + if (!isDigit(ExtName.back())) + break; + ExtName = ExtName.drop_back(1); + TrailingDigits++; + } + if (!TrailingDigits) + return createStringError(errc::invalid_argument, + "extension lacks version in expected format"); + + StringRef MajorVersionStr = Prefix.take_back(TrailingDigits); + if (MajorVersionStr.getAsInteger(10, MajorVersion)) + return createStringError(errc::invalid_argument, + "failed to parse major version number"); + ISAInfo->addExtension(ExtName, MajorVersion, MinorVersion); + } + ISAInfo->updateFLen(); + ISAInfo->updateMinVLen(); + ISAInfo->updateMaxELen(); + return std::move(ISAInfo); +} + +llvm::Expected<std::unique_ptr<RISCVISAInfo>> RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension, bool ExperimentalExtensionVersionCheck, bool IgnoreUnknown) { @@ -515,8 +663,9 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension, bool HasRV64 = Arch.startswith("rv64"); // ISA string must begin with rv32 or rv64. if (!(Arch.startswith("rv32") || HasRV64) || (Arch.size() < 5)) { - return createStringError(errc::invalid_argument, - "string must begin with rv32{i,e,g} or rv64{i,g}"); + return createStringError( + errc::invalid_argument, + "string must begin with rv32{i,e,g} or rv64{i,e,g}"); } unsigned XLen = HasRV64 ? 64 : 32; @@ -532,27 +681,27 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension, default: return createStringError(errc::invalid_argument, "first letter should be 'e', 'i' or 'g'"); - case 'e': { - // Extension 'e' is not allowed in rv64. - if (HasRV64) - return createStringError( - errc::invalid_argument, - "standard user-level extension 'e' requires 'rv32'"); - break; - } + case 'e': case 'i': break; case 'g': - // g = imafd + // g expands to extensions in RISCVGImplications. + if (Arch.size() > 5 && isDigit(Arch[5])) + return createStringError(errc::invalid_argument, + "version not supported for 'g'"); StdExts = StdExts.drop_front(4); break; } + if (Arch.back() == '_') + return createStringError(errc::invalid_argument, + "extension name missing after separator '_'"); + // Skip rvxxx StringRef Exts = Arch.substr(5); // Remove multi-letter standard extensions, non-standard extensions and - // supervisor-level extensions. They have 'z', 'x', 's', 'sx' prefixes. + // supervisor-level extensions. They have 'z', 'x', 's' prefixes. // Parse them at the end. // Find the very first occurrence of 's', 'x' or 'z'. StringRef OtherExts; @@ -563,36 +712,48 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension, } unsigned Major, Minor, ConsumeLength; - if (auto E = getExtensionVersion(std::string(1, Baseline), Exts, Major, Minor, - ConsumeLength, EnableExperimentalExtension, - ExperimentalExtensionVersionCheck)) - return std::move(E); - if (Baseline == 'g') { + // Versions for g are disallowed, and this was checked for previously. + ConsumeLength = 0; + // No matter which version is given to `g`, we always set imafd to default // version since the we don't have clear version scheme for that on // ISA spec. - for (const auto *Ext : {"i", "m", "a", "f", "d"}) + for (const auto *Ext : RISCVGImplications) { if (auto Version = findDefaultVersion(Ext)) ISAInfo->addExtension(Ext, Version->Major, Version->Minor); else llvm_unreachable("Default extension version not found?"); - } else + } + } else { // Baseline is `i` or `e` - ISAInfo->addExtension(std::string(1, Baseline), Major, Minor); + if (auto E = getExtensionVersion( + StringRef(&Baseline, 1), Exts, Major, Minor, ConsumeLength, + EnableExperimentalExtension, ExperimentalExtensionVersionCheck)) { + if (!IgnoreUnknown) + return std::move(E); + // If IgnoreUnknown, then ignore an unrecognised version of the baseline + // ISA and just use the default supported version. + consumeError(std::move(E)); + auto Version = findDefaultVersion(StringRef(&Baseline, 1)); + Major = Version->Major; + Minor = Version->Minor; + } + + ISAInfo->addExtension(StringRef(&Baseline, 1), Major, Minor); + } // Consume the base ISA version number and any '_' between rvxxx and the // first extension Exts = Exts.drop_front(ConsumeLength); Exts.consume_front("_"); - // TODO: Use version number when setting target features - auto StdExtsItr = StdExts.begin(); auto StdExtsEnd = StdExts.end(); - auto GoToNextExt = [](StringRef::iterator &I, unsigned ConsumeLength) { + auto GoToNextExt = [](StringRef::iterator &I, unsigned ConsumeLength, + StringRef::iterator E) { I += 1 + ConsumeLength; - if (*I == '_') + if (I != E && *I == '_') ++I; }; for (auto I = Exts.begin(), E = Exts.end(); I != E;) { @@ -619,38 +780,37 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension, // Move to next char to prevent repeated letter. ++StdExtsItr; - std::string Next; + StringRef Next; unsigned Major, Minor, ConsumeLength; if (std::next(I) != E) - Next = std::string(std::next(I), E); - if (auto E = getExtensionVersion(std::string(1, C), Next, Major, Minor, + Next = StringRef(std::next(I), E - std::next(I)); + if (auto E = getExtensionVersion(StringRef(&C, 1), Next, Major, Minor, ConsumeLength, EnableExperimentalExtension, ExperimentalExtensionVersionCheck)) { if (IgnoreUnknown) { consumeError(std::move(E)); - GoToNextExt(I, ConsumeLength); + GoToNextExt(I, ConsumeLength, Exts.end()); continue; } return std::move(E); } // The order is OK, then push it into features. - // TODO: Use version number when setting target features // Currently LLVM supports only "mafdcvh". if (!isSupportedExtension(StringRef(&C, 1))) { if (IgnoreUnknown) { - GoToNextExt(I, ConsumeLength); + GoToNextExt(I, ConsumeLength, Exts.end()); continue; } return createStringError(errc::invalid_argument, "unsupported standard user-level extension '%c'", C); } - ISAInfo->addExtension(std::string(1, C), Major, Minor); + ISAInfo->addExtension(StringRef(&C, 1), Major, Minor); // Consume full extension name and version, including any optional '_' // between this extension and the next - GoToNextExt(I, ConsumeLength); + GoToNextExt(I, ConsumeLength, Exts.end()); } // Handle other types of extensions other than the standard @@ -658,9 +818,9 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension, // Parse the ISA string containing non-standard user-level // extensions, standard supervisor-level extensions and // non-standard supervisor-level extensions. - // These extensions start with 'z', 'x', 's', 'sx' prefixes, follow a - // canonical order, might have a version number (major, minor) - // and are separated by a single underscore '_'. + // These extensions start with 'z', 's', 'x' prefixes, might have a version + // number (major, minor) and are separated by a single underscore '_'. We do + // not enforce a canonical order for them. // Set the hardware features for the extensions that are supported. // Multi-letter extensions are seperated by a single underscore @@ -669,9 +829,6 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension, OtherExts.split(Split, '_'); SmallVector<StringRef, 8> AllExts; - std::array<StringRef, 4> Prefix{"z", "x", "s", "sx"}; - auto I = Prefix.begin(); - auto E = Prefix.end(); if (Split.size() > 1 || Split[0] != "") { for (StringRef Ext : Split) { if (Ext.empty()) @@ -680,7 +837,7 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension, StringRef Type = getExtensionType(Ext); StringRef Desc = getExtensionTypeDesc(Ext); - auto Pos = findFirstNonVersionCharacter(Ext) + 1; + auto Pos = findLastNonVersionCharacter(Ext) + 1; StringRef Name(Ext.substr(0, Pos)); StringRef Vers(Ext.substr(Pos)); @@ -691,18 +848,6 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension, "invalid extension prefix '" + Ext + "'"); } - // Check ISA extensions are specified in the canonical order. - while (I != E && *I != Type) - ++I; - - if (I == E) { - if (IgnoreUnknown) - continue; - return createStringError(errc::invalid_argument, - "%s not given in canonical order '%s'", - Desc.str().c_str(), Ext.str().c_str()); - } - if (!IgnoreUnknown && Name.size() == Type.size()) { return createStringError(errc::invalid_argument, "%s name missing after '%s'", @@ -726,6 +871,9 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension, Desc.str().c_str(), Name.str().c_str()); } + if (IgnoreUnknown && !isSupportedExtension(Name)) + continue; + ISAInfo->addExtension(Name, Major, Minor); // Extension format is correct, keep parsing the extensions. // TODO: Save Type, Name, Major, Minor to avoid parsing them later. @@ -745,51 +893,55 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension, } Error RISCVISAInfo::checkDependency() { - bool IsRv32 = XLen == 32; - bool HasE = Exts.count("e") != 0; - bool HasD = Exts.count("d") != 0; + bool HasC = Exts.count("c") != 0; bool HasF = Exts.count("f") != 0; bool HasZfinx = Exts.count("zfinx") != 0; - bool HasZdinx = Exts.count("zdinx") != 0; bool HasVector = Exts.count("zve32x") != 0; - bool HasZve32f = Exts.count("zve32f") != 0; - bool HasZve64d = Exts.count("zve64d") != 0; bool HasZvl = MinVLen != 0; + bool HasZcmt = Exts.count("zcmt") != 0; - if (HasE && !IsRv32) + if (HasF && HasZfinx) + return createStringError(errc::invalid_argument, + "'f' and 'zfinx' extensions are incompatible"); + + if (HasZvl && !HasVector) return createStringError( errc::invalid_argument, - "standard user-level extension 'e' requires 'rv32'"); + "'zvl*b' requires 'v' or 'zve*' extension to also be specified"); - // It's illegal to specify the 'd' (double-precision floating point) - // extension without also specifying the 'f' (single precision - // floating-point) extension. - // TODO: This has been removed in later specs, which specify that D implies F - if (HasD && !HasF) - return createStringError(errc::invalid_argument, - "d requires f extension to also be specified"); + if (Exts.count("zvbb") && !HasVector) + return createStringError( + errc::invalid_argument, + "'zvbb' requires 'v' or 'zve*' extension to also be specified"); - if (HasZve32f && !HasF && !HasZfinx) + if (Exts.count("zvbc") && !Exts.count("zve64x")) return createStringError( errc::invalid_argument, - "zve32f requires f or zfinx extension to also be specified"); + "'zvbc' requires 'v' or 'zve64*' extension to also be specified"); - if (HasZve64d && !HasD && !HasZdinx) + if ((Exts.count("zvkg") || Exts.count("zvkned") || Exts.count("zvknha") || + Exts.count("zvksed") || Exts.count("zvksh")) && + !HasVector) return createStringError( errc::invalid_argument, - "zve64d requires d or zdinx extension to also be specified"); + "'zvk*' requires 'v' or 'zve*' extension to also be specified"); - if (Exts.count("zvfh") && !Exts.count("zfh") && !Exts.count("zfhmin") && - !Exts.count("zhinx") && !Exts.count("zhinxmin")) + if (Exts.count("zvknhb") && !Exts.count("zve64x")) return createStringError( errc::invalid_argument, - "zvfh requires zfh, zfhmin, zhinx or zhinxmin extension to also be " - "specified"); + "'zvknhb' requires 'v' or 'zve64*' extension to also be specified"); - if (HasZvl && !HasVector) + if ((HasZcmt || Exts.count("zcmp")) && Exts.count("d") && + (HasC || Exts.count("zcd"))) return createStringError( errc::invalid_argument, - "zvl*b requires v or zve* extension to also be specified"); + Twine("'") + (HasZcmt ? "zcmt" : "zcmp") + + "' extension is incompatible with '" + (HasC ? "c" : "zcd") + + "' extension when 'd' extension is enabled"); + + if (XLen != 32 && Exts.count("zcf")) + return createStringError(errc::invalid_argument, + "'zcf' is only supported for 'rv32'"); // Additional dependency checks. // TODO: The 'q' extension requires rv64. @@ -798,34 +950,58 @@ Error RISCVISAInfo::checkDependency() { return Error::success(); } -static const char *ImpliedExtsV[] = {"zvl128b", "zve64d", "f", "d"}; -static const char *ImpliedExtsZfhmin[] = {"f"}; -static const char *ImpliedExtsZfh[] = {"f"}; +static const char *ImpliedExtsD[] = {"f"}; +static const char *ImpliedExtsF[] = {"zicsr"}; +static const char *ImpliedExtsV[] = {"zvl128b", "zve64d"}; +static const char *ImpliedExtsXTHeadVdot[] = {"v"}; +static const char *ImpliedExtsXsfvcp[] = {"zve32x"}; +static const char *ImpliedExtsZacas[] = {"a"}; +static const char *ImpliedExtsZcb[] = {"zca"}; +static const char *ImpliedExtsZcd[] = {"zca"}; +static const char *ImpliedExtsZce[] = {"zcb", "zcmp", "zcmt"}; +static const char *ImpliedExtsZcf[] = {"zca"}; +static const char *ImpliedExtsZcmp[] = {"zca"}; +static const char *ImpliedExtsZcmt[] = {"zca"}; static const char *ImpliedExtsZdinx[] = {"zfinx"}; -static const char *ImpliedExtsZhinxmin[] = {"zfinx"}; +static const char *ImpliedExtsZfa[] = {"f"}; +static const char *ImpliedExtsZfbfmin[] = {"f"}; +static const char *ImpliedExtsZfh[] = {"f"}; +static const char *ImpliedExtsZfhmin[] = {"f"}; +static const char *ImpliedExtsZfinx[] = {"zicsr"}; static const char *ImpliedExtsZhinx[] = {"zfinx"}; -static const char *ImpliedExtsZve64d[] = {"zve64f"}; +static const char *ImpliedExtsZhinxmin[] = {"zfinx"}; +static const char *ImpliedExtsZicntr[] = {"zicsr"}; +static const char *ImpliedExtsZihpm[] = {"zicsr"}; +static const char *ImpliedExtsZk[] = {"zkn", "zkt", "zkr"}; +static const char *ImpliedExtsZkn[] = {"zbkb", "zbkc", "zbkx", + "zkne", "zknd", "zknh"}; +static const char *ImpliedExtsZks[] = {"zbkb", "zbkc", "zbkx", "zksed", "zksh"}; +static const char *ImpliedExtsZve32f[] = {"zve32x", "f"}; +static const char *ImpliedExtsZve32x[] = {"zvl32b", "zicsr"}; +static const char *ImpliedExtsZve64d[] = {"zve64f", "d"}; static const char *ImpliedExtsZve64f[] = {"zve64x", "zve32f"}; static const char *ImpliedExtsZve64x[] = {"zve32x", "zvl64b"}; -static const char *ImpliedExtsZve32f[] = {"zve32x"}; -static const char *ImpliedExtsZve32x[] = {"zvl32b"}; -static const char *ImpliedExtsZvl65536b[] = {"zvl32768b"}; -static const char *ImpliedExtsZvl32768b[] = {"zvl16384b"}; +static const char *ImpliedExtsZvfbfmin[] = {"zve32f"}; +static const char *ImpliedExtsZvfbfwma[] = {"zve32f"}; +static const char *ImpliedExtsZvfh[] = {"zve32f", "zfhmin"}; +static const char *ImpliedExtsZvkn[] = {"zvbb", "zvkned", "zvknhb", "zvkt"}; +static const char *ImpliedExtsZvknc[] = {"zvbc", "zvkn"}; +static const char *ImpliedExtsZvkng[] = {"zvkg", "zvkn"}; +static const char *ImpliedExtsZvknhb[] = {"zvknha"}; +static const char *ImpliedExtsZvks[] = {"zvbb", "zvksed", "zvksh", "zvkt"}; +static const char *ImpliedExtsZvksc[] = {"zvbc", "zvks"}; +static const char *ImpliedExtsZvksg[] = {"zvkg", "zvks"}; +static const char *ImpliedExtsZvl1024b[] = {"zvl512b"}; +static const char *ImpliedExtsZvl128b[] = {"zvl64b"}; static const char *ImpliedExtsZvl16384b[] = {"zvl8192b"}; -static const char *ImpliedExtsZvl8192b[] = {"zvl4096b"}; -static const char *ImpliedExtsZvl4096b[] = {"zvl2048b"}; static const char *ImpliedExtsZvl2048b[] = {"zvl1024b"}; -static const char *ImpliedExtsZvl1024b[] = {"zvl512b"}; -static const char *ImpliedExtsZvl512b[] = {"zvl256b"}; static const char *ImpliedExtsZvl256b[] = {"zvl128b"}; -static const char *ImpliedExtsZvl128b[] = {"zvl64b"}; +static const char *ImpliedExtsZvl32768b[] = {"zvl16384b"}; +static const char *ImpliedExtsZvl4096b[] = {"zvl2048b"}; +static const char *ImpliedExtsZvl512b[] = {"zvl256b"}; static const char *ImpliedExtsZvl64b[] = {"zvl32b"}; -static const char *ImpliedExtsZk[] = {"zkn", "zkt", "zkr"}; -static const char *ImpliedExtsZkn[] = {"zbkb", "zbkc", "zbkx", - "zkne", "zknd", "zknh"}; -static const char *ImpliedExtsZks[] = {"zbkb", "zbkc", "zbkx", "zksed", "zksh"}; -static const char *ImpliedExtsZvfh[] = {"zve32f"}; -static const char *ImpliedExtsXTHeadVdot[] = {"v"}; +static const char *ImpliedExtsZvl65536b[] = {"zvl32768b"}; +static const char *ImpliedExtsZvl8192b[] = {"zvl4096b"}; struct ImpliedExtsEntry { StringLiteral Name; @@ -840,13 +1016,28 @@ struct ImpliedExtsEntry { // Note: The table needs to be sorted by name. static constexpr ImpliedExtsEntry ImpliedExts[] = { + {{"d"}, {ImpliedExtsD}}, + {{"f"}, {ImpliedExtsF}}, {{"v"}, {ImpliedExtsV}}, + {{"xsfvcp"}, {ImpliedExtsXsfvcp}}, {{"xtheadvdot"}, {ImpliedExtsXTHeadVdot}}, + {{"zacas"}, {ImpliedExtsZacas}}, + {{"zcb"}, {ImpliedExtsZcb}}, + {{"zcd"}, {ImpliedExtsZcd}}, + {{"zce"}, {ImpliedExtsZce}}, + {{"zcf"}, {ImpliedExtsZcf}}, + {{"zcmp"}, {ImpliedExtsZcmp}}, + {{"zcmt"}, {ImpliedExtsZcmt}}, {{"zdinx"}, {ImpliedExtsZdinx}}, + {{"zfa"}, {ImpliedExtsZfa}}, + {{"zfbfmin"}, {ImpliedExtsZfbfmin}}, {{"zfh"}, {ImpliedExtsZfh}}, {{"zfhmin"}, {ImpliedExtsZfhmin}}, + {{"zfinx"}, {ImpliedExtsZfinx}}, {{"zhinx"}, {ImpliedExtsZhinx}}, {{"zhinxmin"}, {ImpliedExtsZhinxmin}}, + {{"zicntr"}, {ImpliedExtsZicntr}}, + {{"zihpm"}, {ImpliedExtsZihpm}}, {{"zk"}, {ImpliedExtsZk}}, {{"zkn"}, {ImpliedExtsZkn}}, {{"zks"}, {ImpliedExtsZks}}, @@ -855,7 +1046,16 @@ static constexpr ImpliedExtsEntry ImpliedExts[] = { {{"zve64d"}, {ImpliedExtsZve64d}}, {{"zve64f"}, {ImpliedExtsZve64f}}, {{"zve64x"}, {ImpliedExtsZve64x}}, + {{"zvfbfmin"}, {ImpliedExtsZvfbfmin}}, + {{"zvfbfwma"}, {ImpliedExtsZvfbfwma}}, {{"zvfh"}, {ImpliedExtsZvfh}}, + {{"zvkn"}, {ImpliedExtsZvkn}}, + {{"zvknc"}, {ImpliedExtsZvknc}}, + {{"zvkng"}, {ImpliedExtsZvkng}}, + {{"zvknhb"}, {ImpliedExtsZvknhb}}, + {{"zvks"}, {ImpliedExtsZvks}}, + {{"zvksc"}, {ImpliedExtsZvksc}}, + {{"zvksg"}, {ImpliedExtsZvksg}}, {{"zvl1024b"}, {ImpliedExtsZvl1024b}}, {{"zvl128b"}, {ImpliedExtsZvl128b}}, {{"zvl16384b"}, {ImpliedExtsZvl16384b}}, @@ -903,6 +1103,13 @@ void RISCVISAInfo::updateImplication() { } } } + + // Add Zcf if Zce and F are enabled on RV32. + if (XLen == 32 && Exts.count("zce") && Exts.count("f") && + !Exts.count("zcf")) { + auto Version = findDefaultVersion("zcf"); + addExtension("zcf", Version->Major, Version->Minor); + } } struct CombinedExtsEntry { @@ -914,6 +1121,12 @@ static constexpr CombinedExtsEntry CombineIntoExts[] = { {{"zk"}, {ImpliedExtsZk}}, {{"zkn"}, {ImpliedExtsZkn}}, {{"zks"}, {ImpliedExtsZks}}, + {{"zvkn"}, {ImpliedExtsZvkn}}, + {{"zvknc"}, {ImpliedExtsZvknc}}, + {{"zvkng"}, {ImpliedExtsZvkng}}, + {{"zvks"}, {ImpliedExtsZvks}}, + {{"zvksc"}, {ImpliedExtsZvksc}}, + {{"zvksg"}, {ImpliedExtsZvksg}}, }; void RISCVISAInfo::updateCombination() { @@ -999,6 +1212,8 @@ std::vector<std::string> RISCVISAInfo::toFeatureVector() const { std::string ExtName = Ext.first; if (ExtName == "i") // i is not recognized in clang -cc1 continue; + if (!isSupportedExtension(ExtName)) + continue; std::string Feature = isExperimentalExtension(ExtName) ? "+experimental-" + ExtName : "+" + ExtName; @@ -1030,6 +1245,8 @@ StringRef RISCVISAInfo::computeDefaultABI() const { } else if (XLen == 64) { if (hasExtension("d")) return "lp64d"; + if (hasExtension("e")) + return "lp64e"; return "lp64"; } llvm_unreachable("Invalid XLEN"); diff --git a/llvm/lib/Support/Regex.cpp b/llvm/lib/Support/Regex.cpp index 7a804a1a2297..dfbd373e4a98 100644 --- a/llvm/lib/Support/Regex.cpp +++ b/llvm/lib/Support/Regex.cpp @@ -14,14 +14,11 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" +#include "regex_impl.h" + #include <cassert> #include <string> -// Important this comes last because it defines "_REGEX_H_". At least on -// Darwin, if included before any header that (transitively) includes -// xlocale.h, this will cause trouble, because of missing regex-related types. -#include "regex_impl.h" - using namespace llvm; Regex::Regex() : preg(nullptr), error(REG_BADPAT) {} diff --git a/llvm/lib/Support/ScaledNumber.cpp b/llvm/lib/Support/ScaledNumber.cpp index 54d4cc33410b..85d7afbea5c6 100644 --- a/llvm/lib/Support/ScaledNumber.cpp +++ b/llvm/lib/Support/ScaledNumber.cpp @@ -44,7 +44,7 @@ std::pair<uint64_t, int16_t> ScaledNumbers::multiply64(uint64_t LHS, return std::make_pair(Lower, 0); // Shift as little as possible to maximize precision. - unsigned LeadingZeros = countLeadingZeros(Upper); + unsigned LeadingZeros = llvm::countl_zero(Upper); int Shift = 64 - LeadingZeros; if (LeadingZeros) Upper = Upper << LeadingZeros | Lower >> Shift; @@ -62,7 +62,7 @@ std::pair<uint32_t, int16_t> ScaledNumbers::divide32(uint32_t Dividend, // Use 64-bit math and canonicalize the dividend to gain precision. uint64_t Dividend64 = Dividend; int Shift = 0; - if (int Zeros = countLeadingZeros(Dividend64)) { + if (int Zeros = llvm::countl_zero(Dividend64)) { Shift -= Zeros; Dividend64 <<= Zeros; } @@ -84,7 +84,7 @@ std::pair<uint64_t, int16_t> ScaledNumbers::divide64(uint64_t Dividend, // Minimize size of divisor. int Shift = 0; - if (int Zeros = countTrailingZeros(Divisor)) { + if (int Zeros = llvm::countr_zero(Divisor)) { Shift -= Zeros; Divisor >>= Zeros; } @@ -94,7 +94,7 @@ std::pair<uint64_t, int16_t> ScaledNumbers::divide64(uint64_t Dividend, return std::make_pair(Dividend, Shift); // Maximize size of dividend. - if (int Zeros = countLeadingZeros(Dividend)) { + if (int Zeros = llvm::countl_zero(Dividend)) { Shift -= Zeros; Dividend <<= Zeros; } diff --git a/llvm/lib/Support/SpecialCaseList.cpp b/llvm/lib/Support/SpecialCaseList.cpp index 0fb65accbf1d..64f66e0f8179 100644 --- a/llvm/lib/Support/SpecialCaseList.cpp +++ b/llvm/lib/Support/SpecialCaseList.cpp @@ -37,7 +37,6 @@ bool SpecialCaseList::Matcher::insert(std::string Regexp, Strings[Regexp] = LineNumber; return true; } - Trigrams.insert(Regexp); // Replace * with .* for (size_t pos = 0; (pos = Regexp.find('*', pos)) != std::string::npos; @@ -61,8 +60,6 @@ unsigned SpecialCaseList::Matcher::match(StringRef Query) const { auto It = Strings.find(Query); if (It != Strings.end()) return It->second; - if (Trigrams.isDefinitelyOut(Query)) - return false; for (const auto &RegExKV : RegExes) if (RegExKV.first->match(Query)) return RegExKV.second; @@ -175,7 +172,7 @@ bool SpecialCaseList::parse(const MemoryBuffer *MB, StringRef Category = SplitRegexp.second; // Create this section if it has not been seen before. - if (SectionsMap.find(Section) == SectionsMap.end()) { + if (!SectionsMap.contains(Section)) { std::unique_ptr<Matcher> M = std::make_unique<Matcher>(); std::string REError; if (!M->insert(std::string(Section), LineNo, REError)) { diff --git a/llvm/lib/Support/StringMap.cpp b/llvm/lib/Support/StringMap.cpp index 9b2f96fca2cd..67c05a87959c 100644 --- a/llvm/lib/Support/StringMap.cpp +++ b/llvm/lib/Support/StringMap.cpp @@ -11,8 +11,9 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/StringMap.h" -#include "llvm/Support/DJB.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/ReverseIteration.h" +#include "llvm/Support/xxhash.h" using namespace llvm; @@ -84,7 +85,9 @@ unsigned StringMapImpl::LookupBucketFor(StringRef Name) { // Hash table unallocated so far? if (NumBuckets == 0) init(16); - unsigned FullHashValue = djbHash(Name, 0); + unsigned FullHashValue = xxh3_64bits(Name); + if (shouldReverseIterate()) + FullHashValue = ~FullHashValue; unsigned BucketNo = FullHashValue & (NumBuckets - 1); unsigned *HashTable = getHashTable(TheTable, NumBuckets); @@ -139,7 +142,9 @@ unsigned StringMapImpl::LookupBucketFor(StringRef Name) { int StringMapImpl::FindKey(StringRef Key) const { if (NumBuckets == 0) return -1; // Really empty table? - unsigned FullHashValue = djbHash(Key, 0); + unsigned FullHashValue = xxh3_64bits(Key); + if (shouldReverseIterate()) + FullHashValue = ~FullHashValue; unsigned BucketNo = FullHashValue & (NumBuckets - 1); unsigned *HashTable = getHashTable(TheTable, NumBuckets); diff --git a/llvm/lib/Support/StringRef.cpp b/llvm/lib/Support/StringRef.cpp index fb93940592c7..3cce83a982c4 100644 --- a/llvm/lib/Support/StringRef.cpp +++ b/llvm/lib/Support/StringRef.cpp @@ -191,7 +191,7 @@ size_t StringRef::find(StringRef Str, size_t From) const { size_t StringRef::find_insensitive(StringRef Str, size_t From) const { StringRef This = substr(From); while (This.size() >= Str.size()) { - if (This.startswith_insensitive(Str)) + if (This.starts_with_insensitive(Str)) return From; This = This.drop_front(); ++From; @@ -509,7 +509,7 @@ bool llvm::getAsSignedInteger(StringRef Str, unsigned Radix, return !Str.empty(); } -bool StringRef::getAsInteger(unsigned Radix, APInt &Result) const { +bool StringRef::consumeInteger(unsigned Radix, APInt &Result) { StringRef Str = *this; // Autosense radix if not specified. @@ -529,6 +529,7 @@ bool StringRef::getAsInteger(unsigned Radix, APInt &Result) const { // If it was nothing but zeroes.... if (Str.empty()) { Result = APInt(64, 0); + *this = Str; return false; } @@ -561,12 +562,12 @@ bool StringRef::getAsInteger(unsigned Radix, APInt &Result) const { else if (Str[0] >= 'A' && Str[0] <= 'Z') CharVal = Str[0]-'A'+10; else - return true; + break; // If the parsed value is larger than the integer radix, the string is // invalid. if (CharVal >= Radix) - return true; + break; // Add in this character. if (IsPowerOf2Radix) { @@ -581,9 +582,25 @@ bool StringRef::getAsInteger(unsigned Radix, APInt &Result) const { Str = Str.substr(1); } + // We consider the operation a failure if no characters were consumed + // successfully. + if (size() == Str.size()) + return true; + + *this = Str; return false; } +bool StringRef::getAsInteger(unsigned Radix, APInt &Result) const { + StringRef Str = *this; + if (Str.consumeInteger(Radix, Result)) + return true; + + // For getAsInteger, we require the whole string to be consumed or else we + // consider it a failure. + return !Str.empty(); +} + bool StringRef::getAsDouble(double &Result, bool AllowInexact) const { APFloat F(0.0); auto StatusOrErr = F.convertFromString(*this, APFloat::rmNearestTiesToEven); diff --git a/llvm/lib/Support/SuffixTree.cpp b/llvm/lib/Support/SuffixTree.cpp index 0d419f12cd1d..eaa653078e09 100644 --- a/llvm/lib/Support/SuffixTree.cpp +++ b/llvm/lib/Support/SuffixTree.cpp @@ -12,12 +12,22 @@ #include "llvm/Support/SuffixTree.h" #include "llvm/Support/Allocator.h" -#include <vector> +#include "llvm/Support/Casting.h" +#include "llvm/Support/SuffixTreeNode.h" using namespace llvm; -SuffixTree::SuffixTree(const std::vector<unsigned> &Str) : Str(Str) { - Root = insertInternalNode(nullptr, EmptyIdx, EmptyIdx, 0); +/// \returns the number of elements in the substring associated with \p N. +static size_t numElementsInSubstring(const SuffixTreeNode *N) { + assert(N && "Got a null node?"); + if (auto *Internal = dyn_cast<SuffixTreeInternalNode>(N)) + if (Internal->isRoot()) + return 0; + return N->getEndIdx() - N->getStartIdx() + 1; +} + +SuffixTree::SuffixTree(const ArrayRef<unsigned> &Str) : Str(Str) { + Root = insertRoot(); Active.Node = Root; // Keep track of the number of suffixes we have to add of the current @@ -38,39 +48,38 @@ SuffixTree::SuffixTree(const std::vector<unsigned> &Str) : Str(Str) { setSuffixIndices(); } -SuffixTreeNode *SuffixTree::insertLeaf(SuffixTreeNode &Parent, +SuffixTreeNode *SuffixTree::insertLeaf(SuffixTreeInternalNode &Parent, unsigned StartIdx, unsigned Edge) { - assert(StartIdx <= LeafEndIdx && "String can't start after it ends!"); - - SuffixTreeNode *N = new (NodeAllocator.Allocate()) - SuffixTreeNode(StartIdx, &LeafEndIdx, nullptr); + auto *N = new (LeafNodeAllocator.Allocate()) + SuffixTreeLeafNode(StartIdx, &LeafEndIdx); Parent.Children[Edge] = N; - return N; } -SuffixTreeNode *SuffixTree::insertInternalNode(SuffixTreeNode *Parent, - unsigned StartIdx, - unsigned EndIdx, unsigned Edge) { - +SuffixTreeInternalNode * +SuffixTree::insertInternalNode(SuffixTreeInternalNode *Parent, + unsigned StartIdx, unsigned EndIdx, + unsigned Edge) { assert(StartIdx <= EndIdx && "String can't start after it ends!"); - assert(!(!Parent && StartIdx != EmptyIdx) && + assert(!(!Parent && StartIdx != SuffixTreeNode::EmptyIdx) && "Non-root internal nodes must have parents!"); - - unsigned *E = new (InternalEndIdxAllocator) unsigned(EndIdx); - SuffixTreeNode *N = - new (NodeAllocator.Allocate()) SuffixTreeNode(StartIdx, E, Root); + auto *N = new (InternalNodeAllocator.Allocate()) + SuffixTreeInternalNode(StartIdx, EndIdx, Root); if (Parent) Parent->Children[Edge] = N; - return N; } +SuffixTreeInternalNode *SuffixTree::insertRoot() { + return insertInternalNode(/*Parent = */ nullptr, SuffixTreeNode::EmptyIdx, + SuffixTreeNode::EmptyIdx, /*Edge = */ 0); +} + void SuffixTree::setSuffixIndices() { // List of nodes we need to visit along with the current length of the // string. - std::vector<std::pair<SuffixTreeNode *, unsigned>> ToVisit; + SmallVector<std::pair<SuffixTreeNode *, unsigned>> ToVisit; // Current node being visited. SuffixTreeNode *CurrNode = Root; @@ -81,21 +90,23 @@ void SuffixTree::setSuffixIndices() { while (!ToVisit.empty()) { std::tie(CurrNode, CurrNodeLen) = ToVisit.back(); ToVisit.pop_back(); - CurrNode->ConcatLen = CurrNodeLen; - for (auto &ChildPair : CurrNode->Children) { - assert(ChildPair.second && "Node had a null child!"); - ToVisit.push_back( - {ChildPair.second, CurrNodeLen + ChildPair.second->size()}); - } - + // Length of the current node from the root down to here. + CurrNode->setConcatLen(CurrNodeLen); + if (auto *InternalNode = dyn_cast<SuffixTreeInternalNode>(CurrNode)) + for (auto &ChildPair : InternalNode->Children) { + assert(ChildPair.second && "Node had a null child!"); + ToVisit.push_back( + {ChildPair.second, + CurrNodeLen + numElementsInSubstring(ChildPair.second)}); + } // No children, so we are at the end of the string. - if (CurrNode->Children.size() == 0 && !CurrNode->isRoot()) - CurrNode->SuffixIdx = Str.size() - CurrNodeLen; + if (auto *LeafNode = dyn_cast<SuffixTreeLeafNode>(CurrNode)) + LeafNode->setSuffixIdx(Str.size() - CurrNodeLen); } } unsigned SuffixTree::extend(unsigned EndIdx, unsigned SuffixesToAdd) { - SuffixTreeNode *NeedsLink = nullptr; + SuffixTreeInternalNode *NeedsLink = nullptr; while (SuffixesToAdd > 0) { @@ -118,7 +129,7 @@ unsigned SuffixTree::extend(unsigned EndIdx, unsigned SuffixesToAdd) { // The active node is an internal node, and we visited it, so it must // need a link if it doesn't have one. if (NeedsLink) { - NeedsLink->Link = Active.Node; + NeedsLink->setLink(Active.Node); NeedsLink = nullptr; } } else { @@ -126,16 +137,18 @@ unsigned SuffixTree::extend(unsigned EndIdx, unsigned SuffixesToAdd) { // insert a new node. SuffixTreeNode *NextNode = Active.Node->Children[FirstChar]; - unsigned SubstringLen = NextNode->size(); + unsigned SubstringLen = numElementsInSubstring(NextNode); // Is the current suffix we're trying to insert longer than the size of // the child we want to move to? if (Active.Len >= SubstringLen) { // If yes, then consume the characters we've seen and move to the next // node. + assert(isa<SuffixTreeInternalNode>(NextNode) && + "Expected an internal node?"); Active.Idx += SubstringLen; Active.Len -= SubstringLen; - Active.Node = NextNode; + Active.Node = cast<SuffixTreeInternalNode>(NextNode); continue; } @@ -144,12 +157,12 @@ unsigned SuffixTree::extend(unsigned EndIdx, unsigned SuffixesToAdd) { unsigned LastChar = Str[EndIdx]; // Is the string we're trying to insert a substring of the next node? - if (Str[NextNode->StartIdx + Active.Len] == LastChar) { + if (Str[NextNode->getStartIdx() + Active.Len] == LastChar) { // If yes, then we're done for this step. Remember our insertion point // and move to the next end index. At this point, we have an implicit // suffix tree. if (NeedsLink && !Active.Node->isRoot()) { - NeedsLink->Link = Active.Node; + NeedsLink->setLink(Active.Node); NeedsLink = nullptr; } @@ -171,9 +184,9 @@ unsigned SuffixTree::extend(unsigned EndIdx, unsigned SuffixesToAdd) { // n l // The node s from the diagram - SuffixTreeNode *SplitNode = - insertInternalNode(Active.Node, NextNode->StartIdx, - NextNode->StartIdx + Active.Len - 1, FirstChar); + SuffixTreeInternalNode *SplitNode = insertInternalNode( + Active.Node, NextNode->getStartIdx(), + NextNode->getStartIdx() + Active.Len - 1, FirstChar); // Insert the new node representing the new substring into the tree as // a child of the split node. This is the node l from the diagram. @@ -181,12 +194,12 @@ unsigned SuffixTree::extend(unsigned EndIdx, unsigned SuffixesToAdd) { // Make the old node a child of the split node and update its start // index. This is the node n from the diagram. - NextNode->StartIdx += Active.Len; - SplitNode->Children[Str[NextNode->StartIdx]] = NextNode; + NextNode->incrementStartIdx(Active.Len); + SplitNode->Children[Str[NextNode->getStartIdx()]] = NextNode; // SplitNode is an internal node, update the suffix link. if (NeedsLink) - NeedsLink->Link = SplitNode; + NeedsLink->setLink(SplitNode); NeedsLink = SplitNode; } @@ -202,9 +215,68 @@ unsigned SuffixTree::extend(unsigned EndIdx, unsigned SuffixesToAdd) { } } else { // Start the next phase at the next smallest suffix. - Active.Node = Active.Node->Link; + Active.Node = Active.Node->getLink(); } } return SuffixesToAdd; } + +void SuffixTree::RepeatedSubstringIterator::advance() { + // Clear the current state. If we're at the end of the range, then this + // is the state we want to be in. + RS = RepeatedSubstring(); + N = nullptr; + + // Each leaf node represents a repeat of a string. + SmallVector<unsigned> RepeatedSubstringStarts; + + // Continue visiting nodes until we find one which repeats more than once. + while (!InternalNodesToVisit.empty()) { + RepeatedSubstringStarts.clear(); + auto *Curr = InternalNodesToVisit.back(); + InternalNodesToVisit.pop_back(); + + // Keep track of the length of the string associated with the node. If + // it's too short, we'll quit. + unsigned Length = Curr->getConcatLen(); + + // Iterate over each child, saving internal nodes for visiting, and + // leaf nodes in LeafChildren. Internal nodes represent individual + // strings, which may repeat. + for (auto &ChildPair : Curr->Children) { + // Save all of this node's children for processing. + if (auto *InternalChild = + dyn_cast<SuffixTreeInternalNode>(ChildPair.second)) { + InternalNodesToVisit.push_back(InternalChild); + continue; + } + + if (Length < MinLength) + continue; + + // Have an occurrence of a potentially repeated string. Save it. + auto *Leaf = cast<SuffixTreeLeafNode>(ChildPair.second); + RepeatedSubstringStarts.push_back(Leaf->getSuffixIdx()); + } + + // The root never represents a repeated substring. If we're looking at + // that, then skip it. + if (Curr->isRoot()) + continue; + + // Do we have any repeated substrings? + if (RepeatedSubstringStarts.size() < 2) + continue; + + // Yes. Update the state to reflect this, and then bail out. + N = Curr; + RS.Length = Length; + for (unsigned StartIdx : RepeatedSubstringStarts) + RS.StartIndices.push_back(StartIdx); + break; + } + // At this point, either NewRS is an empty RepeatedSubstring, or it was + // set in the above loop. Similarly, N is either nullptr, or the node + // associated with NewRS. +} diff --git a/llvm/lib/Support/SuffixTreeNode.cpp b/llvm/lib/Support/SuffixTreeNode.cpp new file mode 100644 index 000000000000..113b990fd352 --- /dev/null +++ b/llvm/lib/Support/SuffixTreeNode.cpp @@ -0,0 +1,40 @@ +//===- llvm/ADT/SuffixTreeNode.cpp - Nodes for SuffixTrees --------*- C++ +//-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines nodes for use within a SuffixTree. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/SuffixTreeNode.h" +#include "llvm/Support/Casting.h" + +using namespace llvm; + +unsigned SuffixTreeNode::getStartIdx() const { return StartIdx; } +void SuffixTreeNode::incrementStartIdx(unsigned Inc) { StartIdx += Inc; } +void SuffixTreeNode::setConcatLen(unsigned Len) { ConcatLen = Len; } +unsigned SuffixTreeNode::getConcatLen() const { return ConcatLen; } + +bool SuffixTreeInternalNode::isRoot() const { + return getStartIdx() == EmptyIdx; +} +unsigned SuffixTreeInternalNode::getEndIdx() const { return EndIdx; } +void SuffixTreeInternalNode::setLink(SuffixTreeInternalNode *L) { + assert(L && "Cannot set a null link?"); + Link = L; +} +SuffixTreeInternalNode *SuffixTreeInternalNode::getLink() const { return Link; } + +unsigned SuffixTreeLeafNode::getEndIdx() const { + assert(EndIdx && "EndIdx is empty?"); + return *EndIdx; +} + +unsigned SuffixTreeLeafNode::getSuffixIdx() const { return SuffixIdx; } +void SuffixTreeLeafNode::setSuffixIdx(unsigned Idx) { SuffixIdx = Idx; } diff --git a/llvm/lib/Support/SymbolRemappingReader.cpp b/llvm/lib/Support/SymbolRemappingReader.cpp deleted file mode 100644 index 0082696038e3..000000000000 --- a/llvm/lib/Support/SymbolRemappingReader.cpp +++ /dev/null @@ -1,81 +0,0 @@ -//===- SymbolRemappingReader.cpp - Read symbol remapping file -------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file contains definitions needed for reading and applying symbol -// remapping files. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Support/SymbolRemappingReader.h" -#include "llvm/ADT/StringSwitch.h" -#include "llvm/ADT/Twine.h" -#include "llvm/Support/LineIterator.h" -#include "llvm/Support/MemoryBuffer.h" - -using namespace llvm; - -char SymbolRemappingParseError::ID; - -/// Load a set of name remappings from a text file. -/// -/// See the documentation at the top of the file for an explanation of -/// the expected format. -Error SymbolRemappingReader::read(MemoryBuffer &B) { - line_iterator LineIt(B, /*SkipBlanks=*/true, '#'); - - auto ReportError = [&](Twine Msg) { - return llvm::make_error<SymbolRemappingParseError>( - B.getBufferIdentifier(), LineIt.line_number(), Msg); - }; - - for (; !LineIt.is_at_eof(); ++LineIt) { - StringRef Line = *LineIt; - Line = Line.ltrim(' '); - // line_iterator only detects comments starting in column 1. - if (Line.startswith("#") || Line.empty()) - continue; - - SmallVector<StringRef, 4> Parts; - Line.split(Parts, ' ', /*MaxSplits*/-1, /*KeepEmpty*/false); - - if (Parts.size() != 3) - return ReportError("Expected 'kind mangled_name mangled_name', " - "found '" + Line + "'"); - - using FK = ItaniumManglingCanonicalizer::FragmentKind; - std::optional<FK> FragmentKind = StringSwitch<std::optional<FK>>(Parts[0]) - .Case("name", FK::Name) - .Case("type", FK::Type) - .Case("encoding", FK::Encoding) - .Default(std::nullopt); - if (!FragmentKind) - return ReportError("Invalid kind, expected 'name', 'type', or 'encoding'," - " found '" + Parts[0] + "'"); - - using EE = ItaniumManglingCanonicalizer::EquivalenceError; - switch (Canonicalizer.addEquivalence(*FragmentKind, Parts[1], Parts[2])) { - case EE::Success: - break; - - case EE::ManglingAlreadyUsed: - return ReportError("Manglings '" + Parts[1] + "' and '" + Parts[2] + "' " - "have both been used in prior remappings. Move this " - "remapping earlier in the file."); - - case EE::InvalidFirstMangling: - return ReportError("Could not demangle '" + Parts[1] + "' " - "as a <" + Parts[0] + ">; invalid mangling?"); - - case EE::InvalidSecondMangling: - return ReportError("Could not demangle '" + Parts[2] + "' " - "as a <" + Parts[0] + ">; invalid mangling?"); - } - } - - return Error::success(); -} diff --git a/llvm/lib/Support/ThreadPool.cpp b/llvm/lib/Support/ThreadPool.cpp index 31461e31c65c..4eef339000e1 100644 --- a/llvm/lib/Support/ThreadPool.cpp +++ b/llvm/lib/Support/ThreadPool.cpp @@ -15,6 +15,7 @@ #include "llvm/Config/llvm-config.h" #if LLVM_ENABLE_THREADS +#include "llvm/Support/FormatVariadic.h" #include "llvm/Support/Threading.h" #else #include "llvm/Support/raw_ostream.h" @@ -43,6 +44,7 @@ void ThreadPool::grow(int requested) { while (static_cast<int>(Threads.size()) < newThreadCount) { int ThreadID = Threads.size(); Threads.emplace_back([this, ThreadID] { + set_thread_name(formatv("llvm-worker-{0}", ThreadID)); Strategy.apply_thread_strategy(ThreadID); processTasks(nullptr); }); diff --git a/llvm/lib/Support/Threading.cpp b/llvm/lib/Support/Threading.cpp index 923935bbca10..7cc7ba44cc72 100644 --- a/llvm/lib/Support/Threading.cpp +++ b/llvm/lib/Support/Threading.cpp @@ -83,6 +83,11 @@ unsigned llvm::ThreadPoolStrategy::compute_thread_count() const { // the same interface as std::thread but requests the same stack size as the // main thread (8MB) before creation. const std::optional<unsigned> llvm::thread::DefaultStackSize = 8 * 1024 * 1024; +#elif defined(_AIX) + // On AIX, the default pthread stack size limit is ~192k for 64-bit programs. + // This limit is easily reached when doing link-time thinLTO. AIX library + // developers have used 4MB, so we'll do the same. +const std::optional<unsigned> llvm::thread::DefaultStackSize = 4 * 1024 * 1024; #else const std::optional<unsigned> llvm::thread::DefaultStackSize; #endif diff --git a/llvm/lib/Support/TrigramIndex.cpp b/llvm/lib/Support/TrigramIndex.cpp deleted file mode 100644 index 40a20ccc6583..000000000000 --- a/llvm/lib/Support/TrigramIndex.cpp +++ /dev/null @@ -1,107 +0,0 @@ -//===-- TrigramIndex.cpp - a heuristic for SpecialCaseList ----------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// TrigramIndex implements a heuristic for SpecialCaseList that allows to -// filter out ~99% incoming queries when all regular expressions in the -// SpecialCaseList are simple wildcards with '*' and '.'. If rules are more -// complicated, the check is defeated and it will always pass the queries to a -// full regex. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Support/TrigramIndex.h" -#include "llvm/ADT/StringRef.h" -#include <set> - -using namespace llvm; - -static const char RegexAdvancedMetachars[] = "()^$|+?[]\\{}"; - -static bool isAdvancedMetachar(unsigned Char) { - return strchr(RegexAdvancedMetachars, Char) != nullptr; -} - -void TrigramIndex::insert(const std::string &Regex) { - if (Defeated) return; - std::set<unsigned> Was; - unsigned Cnt = 0; - unsigned Tri = 0; - unsigned Len = 0; - bool Escaped = false; - for (unsigned Char : Regex) { - if (!Escaped) { - // Regular expressions allow escaping symbols by preceding it with '\'. - if (Char == '\\') { - Escaped = true; - continue; - } - if (isAdvancedMetachar(Char)) { - // This is a more complicated regex than we can handle here. - Defeated = true; - return; - } - if (Char == '.' || Char == '*') { - Tri = 0; - Len = 0; - continue; - } - } - if (Escaped && Char >= '1' && Char <= '9') { - Defeated = true; - return; - } - // We have already handled escaping and can reset the flag. - Escaped = false; - Tri = ((Tri << 8) + Char) & 0xFFFFFF; - Len++; - if (Len < 3) - continue; - // We don't want the index to grow too much for the popular trigrams, - // as they are weak signals. It's ok to still require them for the - // rules we have already processed. It's just a small additional - // computational cost. - if (Index[Tri].size() >= 4) - continue; - Cnt++; - if (!Was.count(Tri)) { - // Adding the current rule to the index. - Index[Tri].push_back(Counts.size()); - Was.insert(Tri); - } - } - if (!Cnt) { - // This rule does not have remarkable trigrams to rely on. - // We have to always call the full regex chain. - Defeated = true; - return; - } - Counts.push_back(Cnt); -} - -bool TrigramIndex::isDefinitelyOut(StringRef Query) const { - if (Defeated) - return false; - std::vector<unsigned> CurCounts(Counts.size()); - unsigned Tri = 0; - for (size_t I = 0; I < Query.size(); I++) { - Tri = ((Tri << 8) + Query[I]) & 0xFFFFFF; - if (I < 2) - continue; - const auto &II = Index.find(Tri); - if (II == Index.end()) - continue; - for (size_t J : II->second) { - CurCounts[J]++; - // If we have reached a desired limit, we have to look at the query - // more closely by running a full regex. - if (CurCounts[J] >= Counts[J]) - return false; - } - } - return true; -} diff --git a/llvm/lib/Support/Unix/Path.inc b/llvm/lib/Support/Unix/Path.inc index 3efcad4f2bed..e2aece49cbc5 100644 --- a/llvm/lib/Support/Unix/Path.inc +++ b/llvm/lib/Support/Unix/Path.inc @@ -190,7 +190,7 @@ static char *getprogpath(char ret[PATH_MAX], const char *bin) { /// GetMainExecutable - Return the path to the main executable, given the /// value of argv[0] from program startup. -std::string getMainExecutableImpl(const char *argv0, void *MainAddr) { +std::string getMainExecutable(const char *argv0, void *MainAddr) { #if defined(__APPLE__) // On OS X the executable path is saved to the stack by dyld. Reading it // from there is much faster than calling dladdr, especially for large diff --git a/llvm/lib/Support/Unix/Signals.inc b/llvm/lib/Support/Unix/Signals.inc index 05a7335216f4..fcf5701afcfd 100644 --- a/llvm/lib/Support/Unix/Signals.inc +++ b/llvm/lib/Support/Unix/Signals.inc @@ -62,6 +62,9 @@ #if HAVE_MACH_MACH_H #include <mach/mach.h> #endif +#ifdef __APPLE__ +#include <mach-o/dyld.h> +#endif #if HAVE_LINK_H #include <link.h> #endif @@ -84,13 +87,11 @@ static void InfoSignalHandler(int Sig); // defined below. using SignalHandlerFunctionType = void (*)(); /// The function to call if ctrl-c is pressed. -static std::atomic<SignalHandlerFunctionType> InterruptFunction = - ATOMIC_VAR_INIT(nullptr); -static std::atomic<SignalHandlerFunctionType> InfoSignalFunction = - ATOMIC_VAR_INIT(nullptr); +static std::atomic<SignalHandlerFunctionType> InterruptFunction = nullptr; +static std::atomic<SignalHandlerFunctionType> InfoSignalFunction = nullptr; /// The function to call on SIGPIPE (one-time use only). static std::atomic<SignalHandlerFunctionType> OneShotPipeSignalFunction = - ATOMIC_VAR_INIT(nullptr); + nullptr; namespace { /// Signal-safe removal of files. @@ -98,8 +99,8 @@ namespace { /// themselves is signal-safe. Memory is freed when the head is freed, deletion /// is therefore not signal-safe either. class FileToRemoveList { - std::atomic<char *> Filename = ATOMIC_VAR_INIT(nullptr); - std::atomic<FileToRemoveList *> Next = ATOMIC_VAR_INIT(nullptr); + std::atomic<char *> Filename = nullptr; + std::atomic<FileToRemoveList *> Next = nullptr; FileToRemoveList() = default; // Not signal-safe. @@ -188,7 +189,7 @@ public: Head.exchange(OldHead); } }; -static std::atomic<FileToRemoveList *> FilesToRemove = ATOMIC_VAR_INIT(nullptr); +static std::atomic<FileToRemoveList *> FilesToRemove = nullptr; /// Clean up the list in a signal-friendly manner. /// Recall that signals can fire during llvm_shutdown. If this occurs we should @@ -248,7 +249,7 @@ static const int InfoSigs[] = {SIGUSR1 static const size_t NumSigs = std::size(IntSigs) + std::size(KillSigs) + std::size(InfoSigs) + 1 /* SIGPIPE */; -static std::atomic<unsigned> NumRegisteredSignals = ATOMIC_VAR_INIT(0); +static std::atomic<unsigned> NumRegisteredSignals = 0; static struct { struct sigaction SA; int SigNo; @@ -463,7 +464,7 @@ void llvm::sys::AddSignalHandler(sys::SignalHandlerCallback FnPtr, RegisterHandlers(); } -#if defined(HAVE_BACKTRACE) && ENABLE_BACKTRACES && HAVE_LINK_H && \ +#if ENABLE_BACKTRACES && defined(HAVE_BACKTRACE) && HAVE_LINK_H && \ (defined(__linux__) || defined(__FreeBSD__) || \ defined(__FreeBSD_kernel__) || defined(__NetBSD__)) struct DlIteratePhdrData { @@ -509,16 +510,50 @@ static bool findModulesAndOffsets(void **StackTrace, int Depth, dl_iterate_phdr(dl_iterate_phdr_cb, &data); return true; } +#elif ENABLE_BACKTRACES && defined(__APPLE__) && defined(__LP64__) +static bool findModulesAndOffsets(void **StackTrace, int Depth, + const char **Modules, intptr_t *Offsets, + const char *MainExecutableName, + StringSaver &StrPool) { + uint32_t NumImgs = _dyld_image_count(); + for (uint32_t ImageIndex = 0; ImageIndex < NumImgs; ImageIndex++) { + const char *Name = _dyld_get_image_name(ImageIndex); + intptr_t Slide = _dyld_get_image_vmaddr_slide(ImageIndex); + auto *Header = + (const struct mach_header_64 *)_dyld_get_image_header(ImageIndex); + if (Header == NULL) + continue; + auto Cmd = (const struct load_command *)(&Header[1]); + for (uint32_t CmdNum = 0; CmdNum < Header->ncmds; ++CmdNum) { + uint32_t BaseCmd = Cmd->cmd & ~LC_REQ_DYLD; + if (BaseCmd == LC_SEGMENT_64) { + auto CmdSeg64 = (const struct segment_command_64 *)Cmd; + for (int j = 0; j < Depth; j++) { + if (Modules[j]) + continue; + intptr_t Addr = (intptr_t)StackTrace[j]; + if ((intptr_t)CmdSeg64->vmaddr + Slide <= Addr && + Addr < intptr_t(CmdSeg64->vmaddr + CmdSeg64->vmsize + Slide)) { + Modules[j] = Name; + Offsets[j] = Addr - Slide; + } + } + } + Cmd = (const load_command *)(((const char *)Cmd) + (Cmd->cmdsize)); + } + } + return true; +} #else -/// This platform does not have dl_iterate_phdr, so we do not yet know how to -/// find all loaded DSOs. +/// Backtraces are not enabled or we don't yet know how to find all loaded DSOs +/// on this platform. static bool findModulesAndOffsets(void **StackTrace, int Depth, const char **Modules, intptr_t *Offsets, const char *MainExecutableName, StringSaver &StrPool) { return false; } -#endif // defined(HAVE_BACKTRACE) && ENABLE_BACKTRACES && ... +#endif // ENABLE_BACKTRACES && ... (findModulesAndOffsets variants) #if ENABLE_BACKTRACES && defined(HAVE__UNWIND_BACKTRACE) static int unwindBacktrace(void **StackTrace, int MaxEntries) { @@ -617,13 +652,12 @@ void llvm::sys::PrintStackTrace(raw_ostream &OS, int Depth) { if (dlinfo.dli_sname != nullptr) { OS << ' '; - int res; - char *d = itaniumDemangle(dlinfo.dli_sname, nullptr, nullptr, &res); - if (!d) - OS << dlinfo.dli_sname; - else + if (char *d = itaniumDemangle(dlinfo.dli_sname)) { OS << d; - free(d); + free(d); + } else { + OS << dlinfo.dli_sname; + } OS << format(" + %tu", (static_cast<const char *>(StackTrace[i]) - static_cast<const char *>(dlinfo.dli_saddr))); diff --git a/llvm/lib/Support/VirtualFileSystem.cpp b/llvm/lib/Support/VirtualFileSystem.cpp index a167e0a76795..d381d79fba96 100644 --- a/llvm/lib/Support/VirtualFileSystem.cpp +++ b/llvm/lib/Support/VirtualFileSystem.cpp @@ -43,6 +43,7 @@ #include <cstdint> #include <iterator> #include <limits> +#include <map> #include <memory> #include <optional> #include <string> @@ -257,12 +258,12 @@ public: explicit RealFileSystem(bool LinkCWDToProcess) { if (!LinkCWDToProcess) { SmallString<128> PWD, RealPWD; - if (llvm::sys::fs::current_path(PWD)) - return; // Awful, but nothing to do here. - if (llvm::sys::fs::real_path(PWD, RealPWD)) - WD = {PWD, PWD}; + if (std::error_code EC = llvm::sys::fs::current_path(PWD)) + WD = EC; + else if (llvm::sys::fs::real_path(PWD, RealPWD)) + WD = WorkingDirectory{PWD, PWD}; else - WD = {PWD, RealPWD}; + WD = WorkingDirectory{PWD, RealPWD}; } } @@ -284,10 +285,10 @@ private: // If this FS has its own working dir, use it to make Path absolute. // The returned twine is safe to use as long as both Storage and Path live. Twine adjustPath(const Twine &Path, SmallVectorImpl<char> &Storage) const { - if (!WD) + if (!WD || !*WD) return Path; Path.toVector(Storage); - sys::fs::make_absolute(WD->Resolved, Storage); + sys::fs::make_absolute(WD->get().Resolved, Storage); return Storage; } @@ -297,7 +298,7 @@ private: // The current working directory, with links resolved. (readlink .). SmallString<128> Resolved; }; - std::optional<WorkingDirectory> WD; + std::optional<llvm::ErrorOr<WorkingDirectory>> WD; }; } // namespace @@ -323,8 +324,10 @@ RealFileSystem::openFileForRead(const Twine &Name) { } llvm::ErrorOr<std::string> RealFileSystem::getCurrentWorkingDirectory() const { + if (WD && *WD) + return std::string(WD->get().Specified.str()); if (WD) - return std::string(WD->Specified.str()); + return WD->getError(); SmallString<128> Dir; if (std::error_code EC = llvm::sys::fs::current_path(Dir)) @@ -345,7 +348,7 @@ std::error_code RealFileSystem::setCurrentWorkingDirectory(const Twine &Path) { return std::make_error_code(std::errc::not_a_directory); if (auto Err = llvm::sys::fs::real_path(Absolute, Resolved)) return Err; - WD = {Absolute, Resolved}; + WD = WorkingDirectory{Absolute, Resolved}; return std::error_code(); } @@ -723,7 +726,7 @@ public: class InMemoryDirectory : public InMemoryNode { Status Stat; - llvm::StringMap<std::unique_ptr<InMemoryNode>> Entries; + std::map<std::string, std::unique_ptr<InMemoryNode>> Entries; public: InMemoryDirectory(Status Stat) @@ -739,15 +742,14 @@ public: UniqueID getUniqueID() const { return Stat.getUniqueID(); } InMemoryNode *getChild(StringRef Name) const { - auto I = Entries.find(Name); + auto I = Entries.find(Name.str()); if (I != Entries.end()) return I->second.get(); return nullptr; } InMemoryNode *addChild(StringRef Name, std::unique_ptr<InMemoryNode> Child) { - return Entries.insert(make_pair(Name, std::move(Child))) - .first->second.get(); + return Entries.emplace(Name, std::move(Child)).first->second.get(); } using const_iterator = decltype(Entries)::const_iterator; @@ -2237,6 +2239,14 @@ RedirectingFileSystem::LookupResult::LookupResult( } } +void RedirectingFileSystem::LookupResult::getPath( + llvm::SmallVectorImpl<char> &Result) const { + Result.clear(); + for (Entry *Parent : Parents) + llvm::sys::path::append(Result, Parent->getName()); + llvm::sys::path::append(Result, E->getName()); +} + std::error_code RedirectingFileSystem::makeCanonical(SmallVectorImpl<char> &Path) const { if (std::error_code EC = makeAbsolute(Path)) @@ -2255,11 +2265,14 @@ ErrorOr<RedirectingFileSystem::LookupResult> RedirectingFileSystem::lookupPath(StringRef Path) const { sys::path::const_iterator Start = sys::path::begin(Path); sys::path::const_iterator End = sys::path::end(Path); + llvm::SmallVector<Entry *, 32> Entries; for (const auto &Root : Roots) { ErrorOr<RedirectingFileSystem::LookupResult> Result = - lookupPathImpl(Start, End, Root.get()); - if (Result || Result.getError() != llvm::errc::no_such_file_or_directory) + lookupPathImpl(Start, End, Root.get(), Entries); + if (Result || Result.getError() != llvm::errc::no_such_file_or_directory) { + Result->Parents = std::move(Entries); return Result; + } } return make_error_code(llvm::errc::no_such_file_or_directory); } @@ -2267,7 +2280,8 @@ RedirectingFileSystem::lookupPath(StringRef Path) const { ErrorOr<RedirectingFileSystem::LookupResult> RedirectingFileSystem::lookupPathImpl( sys::path::const_iterator Start, sys::path::const_iterator End, - RedirectingFileSystem::Entry *From) const { + RedirectingFileSystem::Entry *From, + llvm::SmallVectorImpl<Entry *> &Entries) const { assert(!isTraversalComponent(*Start) && !isTraversalComponent(From->getName()) && "Paths should not contain traversal components"); @@ -2296,10 +2310,12 @@ RedirectingFileSystem::lookupPathImpl( auto *DE = cast<RedirectingFileSystem::DirectoryEntry>(From); for (const std::unique_ptr<RedirectingFileSystem::Entry> &DirEntry : llvm::make_range(DE->contents_begin(), DE->contents_end())) { + Entries.push_back(From); ErrorOr<RedirectingFileSystem::LookupResult> Result = - lookupPathImpl(Start, End, DirEntry.get()); + lookupPathImpl(Start, End, DirEntry.get(), Entries); if (Result || Result.getError() != llvm::errc::no_such_file_or_directory) return Result; + Entries.pop_back(); } return make_error_code(llvm::errc::no_such_file_or_directory); @@ -2541,10 +2557,12 @@ RedirectingFileSystem::getRealPath(const Twine &OriginalPath, return P; } - // If we found a DirectoryEntry, still fallthrough to the original path if - // allowed, because directories don't have a single external contents path. - if (Redirection == RedirectKind::Fallthrough) - return ExternalFS->getRealPath(CanonicalPath, Output); + // We found a DirectoryEntry, which does not have a single external contents + // path. Use the canonical virtual path. + if (Redirection == RedirectKind::Fallthrough) { + Result->getPath(Output); + return {}; + } return llvm::errc::invalid_argument; } diff --git a/llvm/lib/Support/Windows/Path.inc b/llvm/lib/Support/Windows/Path.inc index 92cf4fcda5a6..b949b724509f 100644 --- a/llvm/lib/Support/Windows/Path.inc +++ b/llvm/lib/Support/Windows/Path.inc @@ -130,7 +130,7 @@ namespace fs { const file_t kInvalidFile = INVALID_HANDLE_VALUE; -std::string getMainExecutableImpl(const char *argv0, void *MainExecAddr) { +std::string getMainExecutable(const char *argv0, void *MainExecAddr) { SmallVector<wchar_t, MAX_PATH> PathName; PathName.resize_for_overwrite(PathName.capacity()); DWORD Size = ::GetModuleFileNameW(NULL, PathName.data(), PathName.size()); @@ -650,8 +650,6 @@ bool equivalent(file_status A, file_status B) { return A.FileIndexHigh == B.FileIndexHigh && A.FileIndexLow == B.FileIndexLow && A.FileSizeHigh == B.FileSizeHigh && A.FileSizeLow == B.FileSizeLow && - A.LastAccessedTimeHigh == B.LastAccessedTimeHigh && - A.LastAccessedTimeLow == B.LastAccessedTimeLow && A.LastWriteTimeHigh == B.LastWriteTimeHigh && A.LastWriteTimeLow == B.LastWriteTimeLow && A.VolumeSerialNumber == B.VolumeSerialNumber; diff --git a/llvm/lib/Support/Windows/Signals.inc b/llvm/lib/Support/Windows/Signals.inc index ba93afe0803b..cb82f55fc38b 100644 --- a/llvm/lib/Support/Windows/Signals.inc +++ b/llvm/lib/Support/Windows/Signals.inc @@ -10,6 +10,7 @@ // //===----------------------------------------------------------------------===// #include "llvm/Support/ConvertUTF.h" +#include "llvm/Support/ExitCodes.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" #include "llvm/Support/Process.h" @@ -204,6 +205,9 @@ static bool RegisteredUnhandledExceptionFilter = false; static bool CleanupExecuted = false; static PTOP_LEVEL_EXCEPTION_FILTER OldFilter = NULL; +/// The function to call on "SIGPIPE" (one-time use only). +static std::atomic<void (*)()> OneShotPipeSignalFunction(nullptr); + // Windows creates a new thread to execute the console handler when an event // (such as CTRL/C) occurs. This causes concurrency issues with the above // globals which this critical section addresses. @@ -575,11 +579,16 @@ void llvm::sys::SetInfoSignalFunction(void (*Handler)()) { } void llvm::sys::SetOneShotPipeSignalFunction(void (*Handler)()) { - // Unimplemented. + OneShotPipeSignalFunction.exchange(Handler); } void llvm::sys::DefaultOneShotPipeSignalHandler() { - // Unimplemented. + llvm::sys::Process::Exit(EX_IOERR, /*NoCleanup=*/true); +} + +void llvm::sys::CallOneShotPipeSignalHandler() { + if (auto OldOneShotPipeFunction = OneShotPipeSignalFunction.exchange(nullptr)) + OldOneShotPipeFunction(); } /// Add a function to be called when a signal is delivered to the process. The @@ -816,7 +825,15 @@ WriteWindowsDumpFile(PMINIDUMP_EXCEPTION_INFORMATION ExceptionInfo) { } void sys::CleanupOnSignal(uintptr_t Context) { - LLVMUnhandledExceptionFilter((LPEXCEPTION_POINTERS)Context); + LPEXCEPTION_POINTERS EP = (LPEXCEPTION_POINTERS)Context; + // Broken pipe is not a crash. + // + // 0xE0000000 is combined with the return code in the exception raised in + // CrashRecoveryContext::HandleExit(). + unsigned RetCode = EP->ExceptionRecord->ExceptionCode; + if (RetCode == (0xE0000000 | EX_IOERR)) + return; + LLVMUnhandledExceptionFilter(EP); } static LONG WINAPI LLVMUnhandledExceptionFilter(LPEXCEPTION_POINTERS ep) { diff --git a/llvm/lib/Support/Windows/Threading.inc b/llvm/lib/Support/Windows/Threading.inc index aa47484cb5ce..4baf8b8cb82a 100644 --- a/llvm/lib/Support/Windows/Threading.inc +++ b/llvm/lib/Support/Windows/Threading.inc @@ -233,7 +233,7 @@ static ArrayRef<ProcessorGroup> getProcessorGroups() { unsigned CurrentGroupID = (*ActiveGroups)[0]; ProcessorGroup NewG{Groups[CurrentGroupID]}; NewG.Affinity = ProcessAffinityMask; - NewG.UsableThreads = countPopulation(ProcessAffinityMask); + NewG.UsableThreads = llvm::popcount(ProcessAffinityMask); Groups.clear(); Groups.push_back(NewG); } diff --git a/llvm/lib/Support/YAMLParser.cpp b/llvm/lib/Support/YAMLParser.cpp index b85b1eb83ef8..6ac2c6aeeb46 100644 --- a/llvm/lib/Support/YAMLParser.cpp +++ b/llvm/lib/Support/YAMLParser.cpp @@ -2041,8 +2041,11 @@ StringRef ScalarNode::getValue(SmallVectorImpl<char> &Storage) const { } return UnquotedValue; } - // Plain or block. - return Value.rtrim(' '); + // Plain. + // Trim whitespace ('b-char' and 's-white'). + // NOTE: Alternatively we could change the scanner to not include whitespace + // here in the first place. + return Value.rtrim("\x0A\x0D\x20\x09"); } StringRef ScalarNode::unescapeDoubleQuoted( StringRef UnquotedValue diff --git a/llvm/lib/Support/YAMLTraits.cpp b/llvm/lib/Support/YAMLTraits.cpp index 4eb0b3afd563..f21b7a0ca699 100644 --- a/llvm/lib/Support/YAMLTraits.cpp +++ b/llvm/lib/Support/YAMLTraits.cpp @@ -397,17 +397,23 @@ void Input::reportWarning(const SMRange &range, const Twine &message) { std::unique_ptr<Input::HNode> Input::createHNodes(Node *N) { SmallString<128> StringStorage; - if (ScalarNode *SN = dyn_cast<ScalarNode>(N)) { + switch (N->getType()) { + case Node::NK_Scalar: { + ScalarNode *SN = dyn_cast<ScalarNode>(N); StringRef KeyStr = SN->getValue(StringStorage); if (!StringStorage.empty()) { // Copy string to permanent storage KeyStr = StringStorage.str().copy(StringAllocator); } return std::make_unique<ScalarHNode>(N, KeyStr); - } else if (BlockScalarNode *BSN = dyn_cast<BlockScalarNode>(N)) { + } + case Node::NK_BlockScalar: { + BlockScalarNode *BSN = dyn_cast<BlockScalarNode>(N); StringRef ValueCopy = BSN->getValue().copy(StringAllocator); return std::make_unique<ScalarHNode>(N, ValueCopy); - } else if (SequenceNode *SQ = dyn_cast<SequenceNode>(N)) { + } + case Node::NK_Sequence: { + SequenceNode *SQ = dyn_cast<SequenceNode>(N); auto SQHNode = std::make_unique<SequenceHNode>(N); for (Node &SN : *SQ) { auto Entry = createHNodes(&SN); @@ -416,7 +422,9 @@ std::unique_ptr<Input::HNode> Input::createHNodes(Node *N) { SQHNode->Entries.push_back(std::move(Entry)); } return std::move(SQHNode); - } else if (MappingNode *Map = dyn_cast<MappingNode>(N)) { + } + case Node::NK_Mapping: { + MappingNode *Map = dyn_cast<MappingNode>(N); auto mapHNode = std::make_unique<MapHNode>(N); for (KeyValueNode &KVN : *Map) { Node *KeyNode = KVN.getKey(); @@ -435,6 +443,11 @@ std::unique_ptr<Input::HNode> Input::createHNodes(Node *N) { // Copy string to permanent storage KeyStr = StringStorage.str().copy(StringAllocator); } + if (mapHNode->Mapping.count(KeyStr)) + // From YAML spec: "The content of a mapping node is an unordered set of + // key/value node pairs, with the restriction that each of the keys is + // unique." + setError(KeyNode, Twine("duplicated mapping key '") + KeyStr + "'"); auto ValueHNode = createHNodes(Value); if (EC) break; @@ -442,9 +455,10 @@ std::unique_ptr<Input::HNode> Input::createHNodes(Node *N) { std::make_pair(std::move(ValueHNode), KeyNode->getSourceRange()); } return std::move(mapHNode); - } else if (isa<NullNode>(N)) { + } + case Node::NK_Null: return std::make_unique<EmptyHNode>(N); - } else { + default: setError(N, "unknown node kind"); return nullptr; } diff --git a/llvm/lib/Support/Z3Solver.cpp b/llvm/lib/Support/Z3Solver.cpp index a49bedcfd2b0..eb671fe2596d 100644 --- a/llvm/lib/Support/Z3Solver.cpp +++ b/llvm/lib/Support/Z3Solver.cpp @@ -729,7 +729,7 @@ public: const Z3_sort Z3Sort = toZ3Sort(*getBitvectorSort(BitWidth)).Sort; // Slow path, when 64 bits are not enough. - if (LLVM_UNLIKELY(Int.getBitWidth() > 64u)) { + if (LLVM_UNLIKELY(!Int.isRepresentableByInt64())) { SmallString<40> Buffer; Int.toString(Buffer, 10); return newExprRef(Z3Expr( diff --git a/llvm/lib/Support/raw_ostream.cpp b/llvm/lib/Support/raw_ostream.cpp index 92b15f14c62f..a4fc605019c2 100644 --- a/llvm/lib/Support/raw_ostream.cpp +++ b/llvm/lib/Support/raw_ostream.cpp @@ -56,6 +56,7 @@ #ifdef _WIN32 #include "llvm/Support/ConvertUTF.h" +#include "llvm/Support/Signals.h" #include "llvm/Support/Windows/WindowsSupport.h" #endif @@ -83,8 +84,15 @@ raw_ostream::~raw_ostream() { } size_t raw_ostream::preferred_buffer_size() const { +#ifdef _WIN32 + // On Windows BUFSIZ is only 512 which results in more calls to write. This + // overhead can cause significant performance degradation. Therefore use a + // better default. + return (16 * 1024); +#else // BUFSIZ is intended to be a reasonable default. return BUFSIZ; +#endif } void raw_ostream::SetBuffered() { @@ -775,6 +783,15 @@ void raw_fd_ostream::write_impl(const char *Ptr, size_t Size) { ) continue; +#ifdef _WIN32 + // Windows equivalents of SIGPIPE/EPIPE. + DWORD WinLastError = GetLastError(); + if (WinLastError == ERROR_BROKEN_PIPE || + (WinLastError == ERROR_NO_DATA && errno == EINVAL)) { + llvm::sys::CallOneShotPipeSignalHandler(); + errno = EPIPE; + } +#endif // Otherwise it's a non-recoverable error. Note it and quit. error_detected(std::error_code(errno, std::generic_category())); break; @@ -802,8 +819,6 @@ uint64_t raw_fd_ostream::seek(uint64_t off) { flush(); #ifdef _WIN32 pos = ::_lseeki64(FD, off, SEEK_SET); -#elif defined(HAVE_LSEEK64) - pos = ::lseek64(FD, off, SEEK_SET); #else pos = ::lseek(FD, off, SEEK_SET); #endif @@ -992,7 +1007,7 @@ Error llvm::writeToOutput(StringRef OutputFileName, return Write(Out); } - unsigned Mode = sys::fs::all_read | sys::fs::all_write | sys::fs::all_exe; + unsigned Mode = sys::fs::all_read | sys::fs::all_write; Expected<sys::fs::TempFile> Temp = sys::fs::TempFile::create(OutputFileName + ".temp-stream-%%%%%%", Mode); if (!Temp) diff --git a/llvm/lib/Support/regcomp.c b/llvm/lib/Support/regcomp.c index 9d484195a6d6..4e9082cec456 100644 --- a/llvm/lib/Support/regcomp.c +++ b/llvm/lib/Support/regcomp.c @@ -329,7 +329,15 @@ llvm_regcomp(llvm_regex_t *preg, const char *pattern, int cflags) /* set things up */ p->g = g; + /* suppress warning from the following explicit cast. */ +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wcast-qual" +#endif /* __GNUC__ */ p->next = (char *)pattern; /* convenience; we do not modify it */ +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif /* __GNUC__ */ p->end = p->next + len; p->error = 0; p->ncsalloc = 0; diff --git a/llvm/lib/Support/regex_impl.h b/llvm/lib/Support/regex_impl.h index 8ddac7dcf998..8f0c532205ed 100644 --- a/llvm/lib/Support/regex_impl.h +++ b/llvm/lib/Support/regex_impl.h @@ -35,8 +35,8 @@ * @(#)regex.h 8.1 (Berkeley) 6/2/93 */ -#ifndef _REGEX_H_ -#define _REGEX_H_ +#ifndef LLVM_SUPPORT_REGEX_IMPL_H +#define LLVM_SUPPORT_REGEX_IMPL_H #include <sys/types.h> typedef off_t llvm_regoff_t; @@ -105,4 +105,4 @@ size_t llvm_strlcpy(char *dst, const char *src, size_t siz); } #endif -#endif /* !_REGEX_H_ */ +#endif /* LLVM_SUPPORT_REGEX_IMPL_H */ diff --git a/llvm/lib/Support/xxhash.cpp b/llvm/lib/Support/xxhash.cpp index 9a3f5faa336b..577f14189caf 100644 --- a/llvm/lib/Support/xxhash.cpp +++ b/llvm/lib/Support/xxhash.cpp @@ -1,6 +1,6 @@ /* * xxHash - Fast Hash algorithm -* Copyright (C) 2012-2016, Yann Collet +* Copyright (C) 2012-2021, Yann Collet * * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) * @@ -32,10 +32,14 @@ * - xxHash source repository : https://github.com/Cyan4973/xxHash */ -/* based on revision d2df04efcbef7d7f6886d345861e5dfda4edacc1 Removed - * everything but a simple interface for computing XXh64. */ +// xxhash64 is based on commit d2df04efcbef7d7f6886d345861e5dfda4edacc1. Removed +// everything but a simple interface for computing xxh64. + +// xxh3_64bits is based on commit d5891596637d21366b9b1dcf2c0007a3edb26a9e (July +// 2023). #include "llvm/Support/xxhash.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Endian.h" #include <stdlib.h> @@ -47,6 +51,10 @@ static uint64_t rotl64(uint64_t X, size_t R) { return (X << R) | (X >> (64 - R)); } +constexpr uint32_t PRIME32_1 = 0x9E3779B1; +constexpr uint32_t PRIME32_2 = 0x85EBCA77; +constexpr uint32_t PRIME32_3 = 0xC2B2AE3D; + static const uint64_t PRIME64_1 = 11400714785074694791ULL; static const uint64_t PRIME64_2 = 14029467366897019727ULL; static const uint64_t PRIME64_3 = 1609587929392839161ULL; @@ -67,6 +75,15 @@ static uint64_t mergeRound(uint64_t Acc, uint64_t Val) { return Acc; } +static uint64_t XXH64_avalanche(uint64_t hash) { + hash ^= hash >> 33; + hash *= PRIME64_2; + hash ^= hash >> 29; + hash *= PRIME64_3; + hash ^= hash >> 32; + return hash; +} + uint64_t llvm::xxHash64(StringRef Data) { size_t Len = Data.size(); uint64_t Seed = 0; @@ -104,14 +121,15 @@ uint64_t llvm::xxHash64(StringRef Data) { H64 += (uint64_t)Len; - while (P + 8 <= BEnd) { + while (reinterpret_cast<uintptr_t>(P) + 8 <= + reinterpret_cast<uintptr_t>(BEnd)) { uint64_t const K1 = round(0, endian::read64le(P)); H64 ^= K1; H64 = rotl64(H64, 27) * PRIME64_1 + PRIME64_4; P += 8; } - if (P + 4 <= BEnd) { + if (reinterpret_cast<uintptr_t>(P) + 4 <= reinterpret_cast<uintptr_t>(BEnd)) { H64 ^= (uint64_t)(endian::read32le(P)) * PRIME64_1; H64 = rotl64(H64, 23) * PRIME64_2 + PRIME64_3; P += 4; @@ -123,15 +141,267 @@ uint64_t llvm::xxHash64(StringRef Data) { P++; } - H64 ^= H64 >> 33; - H64 *= PRIME64_2; - H64 ^= H64 >> 29; - H64 *= PRIME64_3; - H64 ^= H64 >> 32; - - return H64; + return XXH64_avalanche(H64); } uint64_t llvm::xxHash64(ArrayRef<uint8_t> Data) { return xxHash64({(const char *)Data.data(), Data.size()}); } + +constexpr size_t XXH3_SECRETSIZE_MIN = 136; +constexpr size_t XXH_SECRET_DEFAULT_SIZE = 192; + +/* Pseudorandom data taken directly from FARSH */ +// clang-format off +constexpr uint8_t kSecret[XXH_SECRET_DEFAULT_SIZE] = { + 0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c, + 0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f, + 0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21, + 0xb8, 0x08, 0x46, 0x74, 0xf7, 0x43, 0x24, 0x8e, 0xe0, 0x35, 0x90, 0xe6, 0x81, 0x3a, 0x26, 0x4c, + 0x3c, 0x28, 0x52, 0xbb, 0x91, 0xc3, 0x00, 0xcb, 0x88, 0xd0, 0x65, 0x8b, 0x1b, 0x53, 0x2e, 0xa3, + 0x71, 0x64, 0x48, 0x97, 0xa2, 0x0d, 0xf9, 0x4e, 0x38, 0x19, 0xef, 0x46, 0xa9, 0xde, 0xac, 0xd8, + 0xa8, 0xfa, 0x76, 0x3f, 0xe3, 0x9c, 0x34, 0x3f, 0xf9, 0xdc, 0xbb, 0xc7, 0xc7, 0x0b, 0x4f, 0x1d, + 0x8a, 0x51, 0xe0, 0x4b, 0xcd, 0xb4, 0x59, 0x31, 0xc8, 0x9f, 0x7e, 0xc9, 0xd9, 0x78, 0x73, 0x64, + 0xea, 0xc5, 0xac, 0x83, 0x34, 0xd3, 0xeb, 0xc3, 0xc5, 0x81, 0xa0, 0xff, 0xfa, 0x13, 0x63, 0xeb, + 0x17, 0x0d, 0xdd, 0x51, 0xb7, 0xf0, 0xda, 0x49, 0xd3, 0x16, 0x55, 0x26, 0x29, 0xd4, 0x68, 0x9e, + 0x2b, 0x16, 0xbe, 0x58, 0x7d, 0x47, 0xa1, 0xfc, 0x8f, 0xf8, 0xb8, 0xd1, 0x7a, 0xd0, 0x31, 0xce, + 0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e, +}; +// clang-format on + +constexpr uint64_t PRIME_MX1 = 0x165667919E3779F9; +constexpr uint64_t PRIME_MX2 = 0x9FB21C651E98DF25; + +// Calculates a 64-bit to 128-bit multiply, then XOR folds it. +static uint64_t XXH3_mul128_fold64(uint64_t lhs, uint64_t rhs) { +#if defined(__SIZEOF_INT128__) || \ + (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128) + __uint128_t product = (__uint128_t)lhs * (__uint128_t)rhs; + return uint64_t(product) ^ uint64_t(product >> 64); + +#else + /* First calculate all of the cross products. */ + const uint64_t lo_lo = (lhs & 0xFFFFFFFF) * (rhs & 0xFFFFFFFF); + const uint64_t hi_lo = (lhs >> 32) * (rhs & 0xFFFFFFFF); + const uint64_t lo_hi = (lhs & 0xFFFFFFFF) * (rhs >> 32); + const uint64_t hi_hi = (lhs >> 32) * (rhs >> 32); + + /* Now add the products together. These will never overflow. */ + const uint64_t cross = (lo_lo >> 32) + (hi_lo & 0xFFFFFFFF) + lo_hi; + const uint64_t upper = (hi_lo >> 32) + (cross >> 32) + hi_hi; + const uint64_t lower = (cross << 32) | (lo_lo & 0xFFFFFFFF); + + return upper ^ lower; +#endif +} + +constexpr size_t XXH_STRIPE_LEN = 64; +constexpr size_t XXH_SECRET_CONSUME_RATE = 8; +constexpr size_t XXH_ACC_NB = XXH_STRIPE_LEN / sizeof(uint64_t); + +static uint64_t XXH3_avalanche(uint64_t hash) { + hash ^= hash >> 37; + hash *= PRIME_MX1; + hash ^= hash >> 32; + return hash; +} + +static uint64_t XXH3_len_1to3_64b(const uint8_t *input, size_t len, + const uint8_t *secret, uint64_t seed) { + const uint8_t c1 = input[0]; + const uint8_t c2 = input[len >> 1]; + const uint8_t c3 = input[len - 1]; + uint32_t combined = ((uint32_t)c1 << 16) | ((uint32_t)c2 << 24) | + ((uint32_t)c3 << 0) | ((uint32_t)len << 8); + uint64_t bitflip = + (uint64_t)(endian::read32le(secret) ^ endian::read32le(secret + 4)) + + seed; + return XXH64_avalanche(uint64_t(combined) ^ bitflip); +} + +static uint64_t XXH3_len_4to8_64b(const uint8_t *input, size_t len, + const uint8_t *secret, uint64_t seed) { + seed ^= (uint64_t)byteswap(uint32_t(seed)) << 32; + const uint32_t input1 = endian::read32le(input); + const uint32_t input2 = endian::read32le(input + len - 4); + uint64_t acc = + (endian::read64le(secret + 8) ^ endian::read64le(secret + 16)) - seed; + const uint64_t input64 = (uint64_t)input2 | ((uint64_t)input1 << 32); + acc ^= input64; + // XXH3_rrmxmx(acc, len) + acc ^= rotl64(acc, 49) ^ rotl64(acc, 24); + acc *= PRIME_MX2; + acc ^= (acc >> 35) + (uint64_t)len; + acc *= PRIME_MX2; + return acc ^ (acc >> 28); +} + +static uint64_t XXH3_len_9to16_64b(const uint8_t *input, size_t len, + const uint8_t *secret, uint64_t const seed) { + uint64_t input_lo = + (endian::read64le(secret + 24) ^ endian::read64le(secret + 32)) + seed; + uint64_t input_hi = + (endian::read64le(secret + 40) ^ endian::read64le(secret + 48)) - seed; + input_lo ^= endian::read64le(input); + input_hi ^= endian::read64le(input + len - 8); + uint64_t acc = uint64_t(len) + byteswap(input_lo) + input_hi + + XXH3_mul128_fold64(input_lo, input_hi); + return XXH3_avalanche(acc); +} + +LLVM_ATTRIBUTE_ALWAYS_INLINE +static uint64_t XXH3_len_0to16_64b(const uint8_t *input, size_t len, + const uint8_t *secret, uint64_t const seed) { + if (LLVM_LIKELY(len > 8)) + return XXH3_len_9to16_64b(input, len, secret, seed); + if (LLVM_LIKELY(len >= 4)) + return XXH3_len_4to8_64b(input, len, secret, seed); + if (len != 0) + return XXH3_len_1to3_64b(input, len, secret, seed); + return XXH64_avalanche(seed ^ endian::read64le(secret + 56) ^ + endian::read64le(secret + 64)); +} + +static uint64_t XXH3_mix16B(const uint8_t *input, uint8_t const *secret, + uint64_t seed) { + uint64_t lhs = seed; + uint64_t rhs = 0U - seed; + lhs += endian::read64le(secret); + rhs += endian::read64le(secret + 8); + lhs ^= endian::read64le(input); + rhs ^= endian::read64le(input + 8); + return XXH3_mul128_fold64(lhs, rhs); +} + +/* For mid range keys, XXH3 uses a Mum-hash variant. */ +LLVM_ATTRIBUTE_ALWAYS_INLINE +static uint64_t XXH3_len_17to128_64b(const uint8_t *input, size_t len, + const uint8_t *secret, + uint64_t const seed) { + uint64_t acc = len * PRIME64_1, acc_end; + acc += XXH3_mix16B(input + 0, secret + 0, seed); + acc_end = XXH3_mix16B(input + len - 16, secret + 16, seed); + if (len > 32) { + acc += XXH3_mix16B(input + 16, secret + 32, seed); + acc_end += XXH3_mix16B(input + len - 32, secret + 48, seed); + if (len > 64) { + acc += XXH3_mix16B(input + 32, secret + 64, seed); + acc_end += XXH3_mix16B(input + len - 48, secret + 80, seed); + if (len > 96) { + acc += XXH3_mix16B(input + 48, secret + 96, seed); + acc_end += XXH3_mix16B(input + len - 64, secret + 112, seed); + } + } + } + return XXH3_avalanche(acc + acc_end); +} + +constexpr size_t XXH3_MIDSIZE_MAX = 240; + +LLVM_ATTRIBUTE_NOINLINE +static uint64_t XXH3_len_129to240_64b(const uint8_t *input, size_t len, + const uint8_t *secret, uint64_t seed) { + constexpr size_t XXH3_MIDSIZE_STARTOFFSET = 3; + constexpr size_t XXH3_MIDSIZE_LASTOFFSET = 17; + uint64_t acc = (uint64_t)len * PRIME64_1; + const unsigned nbRounds = len / 16; + for (unsigned i = 0; i < 8; ++i) + acc += XXH3_mix16B(input + 16 * i, secret + 16 * i, seed); + acc = XXH3_avalanche(acc); + + for (unsigned i = 8; i < nbRounds; ++i) { + acc += XXH3_mix16B(input + 16 * i, + secret + 16 * (i - 8) + XXH3_MIDSIZE_STARTOFFSET, seed); + } + /* last bytes */ + acc += + XXH3_mix16B(input + len - 16, + secret + XXH3_SECRETSIZE_MIN - XXH3_MIDSIZE_LASTOFFSET, seed); + return XXH3_avalanche(acc); +} + +LLVM_ATTRIBUTE_ALWAYS_INLINE +static void XXH3_accumulate_512_scalar(uint64_t *acc, const uint8_t *input, + const uint8_t *secret) { + for (size_t i = 0; i < XXH_ACC_NB; ++i) { + uint64_t data_val = endian::read64le(input + 8 * i); + uint64_t data_key = data_val ^ endian::read64le(secret + 8 * i); + acc[i ^ 1] += data_val; + acc[i] += uint32_t(data_key) * (data_key >> 32); + } +} + +LLVM_ATTRIBUTE_ALWAYS_INLINE +static void XXH3_accumulate_scalar(uint64_t *acc, const uint8_t *input, + const uint8_t *secret, size_t nbStripes) { + for (size_t n = 0; n < nbStripes; ++n) + XXH3_accumulate_512_scalar(acc, input + n * XXH_STRIPE_LEN, + secret + n * XXH_SECRET_CONSUME_RATE); +} + +static void XXH3_scrambleAcc(uint64_t *acc, const uint8_t *secret) { + for (size_t i = 0; i < XXH_ACC_NB; ++i) { + acc[i] ^= acc[i] >> 47; + acc[i] ^= endian::read64le(secret + 8 * i); + acc[i] *= PRIME32_1; + } +} + +static uint64_t XXH3_mix2Accs(const uint64_t *acc, const uint8_t *secret) { + return XXH3_mul128_fold64(acc[0] ^ endian::read64le(secret), + acc[1] ^ endian::read64le(secret + 8)); +} + +static uint64_t XXH3_mergeAccs(const uint64_t *acc, const uint8_t *key, + uint64_t start) { + uint64_t result64 = start; + for (size_t i = 0; i < 4; ++i) + result64 += XXH3_mix2Accs(acc + 2 * i, key + 16 * i); + return XXH3_avalanche(result64); +} + +LLVM_ATTRIBUTE_NOINLINE +static uint64_t XXH3_hashLong_64b(const uint8_t *input, size_t len, + const uint8_t *secret, size_t secretSize) { + const size_t nbStripesPerBlock = + (secretSize - XXH_STRIPE_LEN) / XXH_SECRET_CONSUME_RATE; + const size_t block_len = XXH_STRIPE_LEN * nbStripesPerBlock; + const size_t nb_blocks = (len - 1) / block_len; + alignas(16) uint64_t acc[XXH_ACC_NB] = { + PRIME32_3, PRIME64_1, PRIME64_2, PRIME64_3, + PRIME64_4, PRIME32_2, PRIME64_5, PRIME32_1, + }; + for (size_t n = 0; n < nb_blocks; ++n) { + XXH3_accumulate_scalar(acc, input + n * block_len, secret, + nbStripesPerBlock); + XXH3_scrambleAcc(acc, secret + secretSize - XXH_STRIPE_LEN); + } + + /* last partial block */ + const size_t nbStripes = (len - 1 - (block_len * nb_blocks)) / XXH_STRIPE_LEN; + assert(nbStripes <= secretSize / XXH_SECRET_CONSUME_RATE); + XXH3_accumulate_scalar(acc, input + nb_blocks * block_len, secret, nbStripes); + + /* last stripe */ + constexpr size_t XXH_SECRET_LASTACC_START = 7; + XXH3_accumulate_512_scalar(acc, input + len - XXH_STRIPE_LEN, + secret + secretSize - XXH_STRIPE_LEN - + XXH_SECRET_LASTACC_START); + + /* converge into final hash */ + constexpr size_t XXH_SECRET_MERGEACCS_START = 11; + return XXH3_mergeAccs(acc, secret + XXH_SECRET_MERGEACCS_START, + (uint64_t)len * PRIME64_1); +} + +uint64_t llvm::xxh3_64bits(ArrayRef<uint8_t> data) { + auto *in = data.data(); + size_t len = data.size(); + if (len <= 16) + return XXH3_len_0to16_64b(in, len, kSecret, 0); + if (len <= 128) + return XXH3_len_17to128_64b(in, len, kSecret, 0); + if (len <= XXH3_MIDSIZE_MAX) + return XXH3_len_129to240_64b(in, len, kSecret, 0); + return XXH3_hashLong_64b(in, len, kSecret, sizeof(kSecret)); +} |