diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2017-05-03 20:26:11 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2017-05-03 20:26:11 +0000 |
commit | 148779df305667b6942fee7e758fdf81a6498f38 (patch) | |
tree | 976d85fb9cb4bc8ed54348b045f742be90e10c57 | |
parent | a303c417bbdb53703c2c17398b08486bde78f1f6 (diff) |
Vendor import of llvm trunk r302069:vendor/llvm/llvm-trunk-r302069
Notes
Notes:
svn path=/vendor/llvm/dist/; revision=317760
svn path=/vendor/llvm/llvm-trunk-r302069/; revision=317761; tag=vendor/llvm/llvm-trunk-r302069
225 files changed, 5680 insertions, 2657 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 2f5df776e9e0..78e2e0166257 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -530,6 +530,8 @@ if(LLVM_LINK_LLVM_DYLIB OR LLVM_BUILD_LLVM_C_DYLIB) endif() option(LLVM_BUILD_LLVM_DYLIB "Build libllvm dynamic library" ${LLVM_BUILD_LLVM_DYLIB_default}) +option(LLVM_DYLIB_SYMBOL_VERSIONING OFF) + option(LLVM_OPTIMIZED_TABLEGEN "Force TableGen to be built with optimization" OFF) if(CMAKE_CROSSCOMPILING OR (LLVM_OPTIMIZED_TABLEGEN AND (LLVM_ENABLE_ASSERTIONS OR CMAKE_CONFIGURATION_TYPES))) set(LLVM_USE_HOST_TOOLS ON) diff --git a/docs/LangRef.rst b/docs/LangRef.rst index bf4973ca9aed..dad99e3352dd 100644 --- a/docs/LangRef.rst +++ b/docs/LangRef.rst @@ -1539,7 +1539,7 @@ example: This function attribute indicates that the function does not have any effects besides calculating its result and does not have undefined behavior. Note that ``speculatable`` is not enough to conclude that along any - particular exection path the number of calls to this function will not be + particular execution path the number of calls to this function will not be externally observable. This attribute is only valid on functions and declarations, not on individual call sites. If a function is incorrectly marked as speculatable and really does exhibit @@ -7915,7 +7915,7 @@ makes sense: ; get pointers for 8 elements from array B %ptrs = getelementptr double, double* %B, <8 x i32> %C ; load 8 elements from array B into A - %A = call <8 x double> @llvm.masked.gather.v8f64(<8 x double*> %ptrs, + %A = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> %ptrs, i32 8, <8 x i1> %mask, <8 x double> %passthru) Conversion Operations @@ -12024,9 +12024,9 @@ This is an overloaded intrinsic. The loaded data are multiple scalar values of a :: - declare <16 x float> @llvm.masked.gather.v16f32 (<16 x float*> <ptrs>, i32 <alignment>, <16 x i1> <mask>, <16 x float> <passthru>) - declare <2 x double> @llvm.masked.gather.v2f64 (<2 x double*> <ptrs>, i32 <alignment>, <2 x i1> <mask>, <2 x double> <passthru>) - declare <8 x float*> @llvm.masked.gather.v8p0f32 (<8 x float**> <ptrs>, i32 <alignment>, <8 x i1> <mask>, <8 x float*> <passthru>) + declare <16 x float> @llvm.masked.gather.v16f32.v16p0f32 (<16 x float*> <ptrs>, i32 <alignment>, <16 x i1> <mask>, <16 x float> <passthru>) + declare <2 x double> @llvm.masked.gather.v2f64.v2p1f64 (<2 x double addrspace(1)*> <ptrs>, i32 <alignment>, <2 x i1> <mask>, <2 x double> <passthru>) + declare <8 x float*> @llvm.masked.gather.v8p0f32.v8p0p0f32 (<8 x float**> <ptrs>, i32 <alignment>, <8 x i1> <mask>, <8 x float*> <passthru>) Overview: """"""""" @@ -12049,7 +12049,7 @@ The semantics of this operation are equivalent to a sequence of conditional scal :: - %res = call <4 x double> @llvm.masked.gather.v4f64 (<4 x double*> %ptrs, i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x double> undef) + %res = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64 (<4 x double*> %ptrs, i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x double> undef) ;; The gather with all-true mask is equivalent to the following instruction sequence %ptr0 = extractelement <4 x double*> %ptrs, i32 0 @@ -12078,9 +12078,9 @@ This is an overloaded intrinsic. The data stored in memory is a vector of any in :: - declare void @llvm.masked.scatter.v8i32 (<8 x i32> <value>, <8 x i32*> <ptrs>, i32 <alignment>, <8 x i1> <mask>) - declare void @llvm.masked.scatter.v16f32 (<16 x float> <value>, <16 x float*> <ptrs>, i32 <alignment>, <16 x i1> <mask>) - declare void @llvm.masked.scatter.v4p0f64 (<4 x double*> <value>, <4 x double**> <ptrs>, i32 <alignment>, <4 x i1> <mask>) + declare void @llvm.masked.scatter.v8i32.v8p0i32 (<8 x i32> <value>, <8 x i32*> <ptrs>, i32 <alignment>, <8 x i1> <mask>) + declare void @llvm.masked.scatter.v16f32.v16p1f32 (<16 x float> <value>, <16 x float addrspace(1)*> <ptrs>, i32 <alignment>, <16 x i1> <mask>) + declare void @llvm.masked.scatter.v4p0f64.v4p0p0f64 (<4 x double*> <value>, <4 x double**> <ptrs>, i32 <alignment>, <4 x i1> <mask>) Overview: """"""""" @@ -12101,7 +12101,7 @@ The '``llvm.masked.scatter``' intrinsics is designed for writing selected vector :: ;; This instruction unconditionally stores data vector in multiple addresses - call @llvm.masked.scatter.v8i32 (<8 x i32> %value, <8 x i32*> %ptrs, i32 4, <8 x i1> <true, true, .. true>) + call @llvm.masked.scatter.v8i32.v8p0i32 (<8 x i32> %value, <8 x i32*> %ptrs, i32 4, <8 x i1> <true, true, .. true>) ;; It is equivalent to a list of scalar stores %val0 = extractelement <8 x i32> %value, i32 0 diff --git a/include/llvm/ADT/APInt.h b/include/llvm/ADT/APInt.h index 6d74f344aad5..63c92c1a7fce 100644 --- a/include/llvm/ADT/APInt.h +++ b/include/llvm/ADT/APInt.h @@ -86,7 +86,7 @@ private: union { uint64_t VAL; ///< Used to store the <= 64 bits integer value. uint64_t *pVal; ///< Used to store the >64 bits integer value. - }; + } U; unsigned BitWidth; ///< The number of bits in this APInt. @@ -98,7 +98,9 @@ private: /// /// This constructor is used only internally for speed of construction of /// temporaries. It is unsafe for general use so it is not public. - APInt(uint64_t *val, unsigned bits) : pVal(val), BitWidth(bits) {} + APInt(uint64_t *val, unsigned bits) : BitWidth(bits) { + U.pVal = val; + } /// \brief Determine if this APInt just has one word to store value. /// @@ -143,16 +145,16 @@ private: // Mask out the high bits. uint64_t mask = WORD_MAX >> (APINT_BITS_PER_WORD - WordBits); if (isSingleWord()) - VAL &= mask; + U.VAL &= mask; else - pVal[getNumWords() - 1] &= mask; + U.pVal[getNumWords() - 1] &= mask; return *this; } /// \brief Get the word corresponding to a bit position /// \returns the corresponding word for the specified bit position. uint64_t getWord(unsigned bitPosition) const { - return isSingleWord() ? VAL : pVal[whichWord(bitPosition)]; + return isSingleWord() ? U.VAL : U.pVal[whichWord(bitPosition)]; } /// \brief Convert a char array into an APInt @@ -258,7 +260,7 @@ public: : BitWidth(numBits) { assert(BitWidth && "bitwidth too small"); if (isSingleWord()) { - VAL = val; + U.VAL = val; clearUnusedBits(); } else { initSlowCase(val, isSigned); @@ -300,20 +302,21 @@ public: /// @brief Copy Constructor. APInt(const APInt &that) : BitWidth(that.BitWidth) { if (isSingleWord()) - VAL = that.VAL; + U.VAL = that.U.VAL; else initSlowCase(that); } /// \brief Move Constructor. - APInt(APInt &&that) : VAL(that.VAL), BitWidth(that.BitWidth) { + APInt(APInt &&that) : BitWidth(that.BitWidth) { + memcpy(&U, &that.U, sizeof(U)); that.BitWidth = 0; } /// \brief Destructor. ~APInt() { if (needsCleanup()) - delete[] pVal; + delete[] U.pVal; } /// \brief Default constructor that creates an uninteresting APInt @@ -321,7 +324,7 @@ public: /// /// This is useful for object deserialization (pair this with the static /// method Read). - explicit APInt() : VAL(0), BitWidth(1) {} + explicit APInt() : BitWidth(1) { U.VAL = 0; } /// \brief Returns whether this instance allocated memory. bool needsCleanup() const { return !isSingleWord(); } @@ -373,7 +376,7 @@ public: /// This checks to see if the value has all bits of the APInt are set or not. bool isAllOnesValue() const { if (isSingleWord()) - return VAL == WORD_MAX >> (APINT_BITS_PER_WORD - BitWidth); + return U.VAL == WORD_MAX >> (APINT_BITS_PER_WORD - BitWidth); return countPopulationSlowCase() == BitWidth; } @@ -428,7 +431,7 @@ public: /// \returns true if the argument APInt value is a power of two > 0. bool isPowerOf2() const { if (isSingleWord()) - return isPowerOf2_64(VAL); + return isPowerOf2_64(U.VAL); return countPopulationSlowCase() == 1; } @@ -461,7 +464,7 @@ public: assert(numBits != 0 && "numBits must be non-zero"); assert(numBits <= BitWidth && "numBits out of range"); if (isSingleWord()) - return VAL == (WORD_MAX >> (APINT_BITS_PER_WORD - numBits)); + return U.VAL == (WORD_MAX >> (APINT_BITS_PER_WORD - numBits)); unsigned Ones = countTrailingOnesSlowCase(); return (numBits == Ones) && ((Ones + countLeadingZerosSlowCase()) == BitWidth); @@ -472,7 +475,7 @@ public: /// Ex. isMask(0x0000FFFFU) == true. bool isMask() const { if (isSingleWord()) - return isMask_64(VAL); + return isMask_64(U.VAL); unsigned Ones = countTrailingOnesSlowCase(); return (Ones > 0) && ((Ones + countLeadingZerosSlowCase()) == BitWidth); } @@ -481,7 +484,7 @@ public: /// the remainder zero. bool isShiftedMask() const { if (isSingleWord()) - return isShiftedMask_64(VAL); + return isShiftedMask_64(U.VAL); unsigned Ones = countPopulationSlowCase(); unsigned LeadZ = countLeadingZerosSlowCase(); return (Ones + LeadZ + countTrailingZeros()) == BitWidth; @@ -639,8 +642,8 @@ public: /// conversions. const uint64_t *getRawData() const { if (isSingleWord()) - return &VAL; - return &pVal[0]; + return &U.VAL; + return &U.pVal[0]; } /// @} @@ -686,7 +689,7 @@ public: /// \returns true if *this is zero, false otherwise. bool operator!() const { if (isSingleWord()) - return VAL == 0; + return U.VAL == 0; return countLeadingZerosSlowCase() == BitWidth; } @@ -700,7 +703,7 @@ public: APInt &operator=(const APInt &RHS) { // If the bitwidths are the same, we can avoid mucking with memory if (isSingleWord() && RHS.isSingleWord()) { - VAL = RHS.VAL; + U.VAL = RHS.U.VAL; BitWidth = RHS.BitWidth; return clearUnusedBits(); } @@ -713,11 +716,11 @@ public: APInt &operator=(APInt &&that) { assert(this != &that && "Self-move not supported"); if (!isSingleWord()) - delete[] pVal; + delete[] U.pVal; // Use memcpy so that type based alias analysis sees both VAL and pVal // as modified. - memcpy(&VAL, &that.VAL, sizeof(uint64_t)); + memcpy(&U, &that.U, sizeof(U)); BitWidth = that.BitWidth; that.BitWidth = 0; @@ -734,11 +737,11 @@ public: /// \returns *this after assignment of RHS value. APInt &operator=(uint64_t RHS) { if (isSingleWord()) { - VAL = RHS; + U.VAL = RHS; clearUnusedBits(); } else { - pVal[0] = RHS; - memset(pVal+1, 0, (getNumWords() - 1) * APINT_WORD_SIZE); + U.pVal[0] = RHS; + memset(U.pVal+1, 0, (getNumWords() - 1) * APINT_WORD_SIZE); } return *this; } @@ -752,7 +755,7 @@ public: APInt &operator&=(const APInt &RHS) { assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); if (isSingleWord()) - VAL &= RHS.VAL; + U.VAL &= RHS.U.VAL; else AndAssignSlowCase(RHS); return *this; @@ -765,11 +768,11 @@ public: /// the LHS. APInt &operator&=(uint64_t RHS) { if (isSingleWord()) { - VAL &= RHS; + U.VAL &= RHS; return *this; } - pVal[0] &= RHS; - memset(pVal+1, 0, (getNumWords() - 1) * APINT_WORD_SIZE); + U.pVal[0] &= RHS; + memset(U.pVal+1, 0, (getNumWords() - 1) * APINT_WORD_SIZE); return *this; } @@ -782,7 +785,7 @@ public: APInt &operator|=(const APInt &RHS) { assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); if (isSingleWord()) - VAL |= RHS.VAL; + U.VAL |= RHS.U.VAL; else OrAssignSlowCase(RHS); return *this; @@ -795,10 +798,10 @@ public: /// the LHS. APInt &operator|=(uint64_t RHS) { if (isSingleWord()) { - VAL |= RHS; + U.VAL |= RHS; clearUnusedBits(); } else { - pVal[0] |= RHS; + U.pVal[0] |= RHS; } return *this; } @@ -812,7 +815,7 @@ public: APInt &operator^=(const APInt &RHS) { assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); if (isSingleWord()) - VAL ^= RHS.VAL; + U.VAL ^= RHS.U.VAL; else XorAssignSlowCase(RHS); return *this; @@ -825,10 +828,10 @@ public: /// the LHS. APInt &operator^=(uint64_t RHS) { if (isSingleWord()) { - VAL ^= RHS; + U.VAL ^= RHS; clearUnusedBits(); } else { - pVal[0] ^= RHS; + U.pVal[0] ^= RHS; } return *this; } @@ -865,9 +868,9 @@ public: assert(ShiftAmt <= BitWidth && "Invalid shift amount"); if (isSingleWord()) { if (ShiftAmt == BitWidth) - VAL = 0; + U.VAL = 0; else - VAL <<= ShiftAmt; + U.VAL <<= ShiftAmt; return clearUnusedBits(); } shlSlowCase(ShiftAmt); @@ -913,11 +916,11 @@ public: void ashrInPlace(unsigned ShiftAmt) { assert(ShiftAmt <= BitWidth && "Invalid shift amount"); if (isSingleWord()) { - int64_t SExtVAL = SignExtend64(VAL, BitWidth); + int64_t SExtVAL = SignExtend64(U.VAL, BitWidth); if (ShiftAmt == BitWidth) - VAL = SExtVAL >> (APINT_BITS_PER_WORD - 1); // Fill with sign bit. + U.VAL = SExtVAL >> (APINT_BITS_PER_WORD - 1); // Fill with sign bit. else - VAL = SExtVAL >> ShiftAmt; + U.VAL = SExtVAL >> ShiftAmt; clearUnusedBits(); return; } @@ -938,9 +941,9 @@ public: assert(ShiftAmt <= BitWidth && "Invalid shift amount"); if (isSingleWord()) { if (ShiftAmt == BitWidth) - VAL = 0; + U.VAL = 0; else - VAL >>= ShiftAmt; + U.VAL >>= ShiftAmt; return; } lshrSlowCase(ShiftAmt); @@ -1059,7 +1062,7 @@ public: bool operator[](unsigned bitPosition) const { assert(bitPosition < getBitWidth() && "Bit position out of bounds!"); return (maskBit(bitPosition) & - (isSingleWord() ? VAL : pVal[whichWord(bitPosition)])) != + (isSingleWord() ? U.VAL : U.pVal[whichWord(bitPosition)])) != 0; } @@ -1074,7 +1077,7 @@ public: bool operator==(const APInt &RHS) const { assert(BitWidth == RHS.BitWidth && "Comparison requires equal bit widths"); if (isSingleWord()) - return VAL == RHS.VAL; + return U.VAL == RHS.U.VAL; return EqualSlowCase(RHS); } @@ -1265,7 +1268,7 @@ public: bool intersects(const APInt &RHS) const { assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); if (isSingleWord()) - return (VAL & RHS.VAL) != 0; + return (U.VAL & RHS.U.VAL) != 0; return intersectsSlowCase(RHS); } @@ -1273,7 +1276,7 @@ public: bool isSubsetOf(const APInt &RHS) const { assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); if (isSingleWord()) - return (VAL & ~RHS.VAL) == 0; + return (U.VAL & ~RHS.U.VAL) == 0; return isSubsetOfSlowCase(RHS); } @@ -1333,10 +1336,10 @@ public: /// \brief Set every bit to 1. void setAllBits() { if (isSingleWord()) - VAL = WORD_MAX; + U.VAL = WORD_MAX; else // Set all the bits in all the words. - memset(pVal, -1, getNumWords() * APINT_WORD_SIZE); + memset(U.pVal, -1, getNumWords() * APINT_WORD_SIZE); // Clear the unused ones clearUnusedBits(); } @@ -1348,9 +1351,9 @@ public: assert(BitPosition <= BitWidth && "BitPosition out of range"); WordType Mask = maskBit(BitPosition); if (isSingleWord()) - VAL |= Mask; + U.VAL |= Mask; else - pVal[whichWord(BitPosition)] |= Mask; + U.pVal[whichWord(BitPosition)] |= Mask; } /// Set the sign bit to 1. @@ -1369,9 +1372,9 @@ public: uint64_t mask = WORD_MAX >> (APINT_BITS_PER_WORD - (hiBit - loBit)); mask <<= loBit; if (isSingleWord()) - VAL |= mask; + U.VAL |= mask; else - pVal[0] |= mask; + U.pVal[0] |= mask; } else { setBitsSlowCase(loBit, hiBit); } @@ -1395,9 +1398,9 @@ public: /// \brief Set every bit to 0. void clearAllBits() { if (isSingleWord()) - VAL = 0; + U.VAL = 0; else - memset(pVal, 0, getNumWords() * APINT_WORD_SIZE); + memset(U.pVal, 0, getNumWords() * APINT_WORD_SIZE); } /// \brief Set a given bit to 0. @@ -1407,9 +1410,9 @@ public: assert(BitPosition <= BitWidth && "BitPosition out of range"); WordType Mask = ~maskBit(BitPosition); if (isSingleWord()) - VAL &= Mask; + U.VAL &= Mask; else - pVal[whichWord(BitPosition)] &= Mask; + U.pVal[whichWord(BitPosition)] &= Mask; } /// Set the sign bit to 0. @@ -1420,7 +1423,7 @@ public: /// \brief Toggle every bit to its opposite value. void flipAllBits() { if (isSingleWord()) { - VAL ^= WORD_MAX; + U.VAL ^= WORD_MAX; clearUnusedBits(); } else { flipAllBitsSlowCase(); @@ -1500,9 +1503,9 @@ public: /// uint64_t. Otherwise an assertion will result. uint64_t getZExtValue() const { if (isSingleWord()) - return VAL; + return U.VAL; assert(getActiveBits() <= 64 && "Too many bits for uint64_t"); - return pVal[0]; + return U.pVal[0]; } /// \brief Get sign extended value @@ -1512,9 +1515,9 @@ public: /// int64_t. Otherwise an assertion will result. int64_t getSExtValue() const { if (isSingleWord()) - return SignExtend64(VAL, BitWidth); + return SignExtend64(U.VAL, BitWidth); assert(getMinSignedBits() <= 64 && "Too many bits for int64_t"); - return int64_t(pVal[0]); + return int64_t(U.pVal[0]); } /// \brief Get bits required for string value. @@ -1534,7 +1537,7 @@ public: unsigned countLeadingZeros() const { if (isSingleWord()) { unsigned unusedBits = APINT_BITS_PER_WORD - BitWidth; - return llvm::countLeadingZeros(VAL) - unusedBits; + return llvm::countLeadingZeros(U.VAL) - unusedBits; } return countLeadingZerosSlowCase(); } @@ -1575,7 +1578,7 @@ public: /// of ones from the least significant bit to the first zero bit. unsigned countTrailingOnes() const { if (isSingleWord()) - return llvm::countTrailingOnes(VAL); + return llvm::countTrailingOnes(U.VAL); return countTrailingOnesSlowCase(); } @@ -1587,7 +1590,7 @@ public: /// \returns 0 if the value is zero, otherwise returns the number of set bits. unsigned countPopulation() const { if (isSingleWord()) - return llvm::countPopulation(VAL); + return llvm::countPopulation(U.VAL); return countPopulationSlowCase(); } @@ -1646,7 +1649,7 @@ public: uint64_t I; double D; } T; - T.I = (isSingleWord() ? VAL : pVal[0]); + T.I = (isSingleWord() ? U.VAL : U.pVal[0]); return T.D; } @@ -1660,7 +1663,7 @@ public: unsigned I; float F; } T; - T.I = unsigned((isSingleWord() ? VAL : pVal[0])); + T.I = unsigned((isSingleWord() ? U.VAL : U.pVal[0])); return T.F; } @@ -1718,7 +1721,7 @@ public: // get 0. If VAL is 0, we get WORD_MAX which gets truncated to // UINT32_MAX. if (BitWidth == 1) - return VAL - 1; + return U.VAL - 1; // Handle the zero case. if (isNullValue()) diff --git a/include/llvm/CodeGen/CommandFlags.h b/include/llvm/CodeGen/CommandFlags.h index 317a5d3f54c8..0d898827efc6 100644 --- a/include/llvm/CodeGen/CommandFlags.h +++ b/include/llvm/CodeGen/CommandFlags.h @@ -346,29 +346,21 @@ static inline void setFunctionAttributes(StringRef CPU, StringRef Features, Module &M) { for (auto &F : M) { auto &Ctx = F.getContext(); - AttributeList Attrs = F.getAttributes(), NewAttrs; + AttributeList Attrs = F.getAttributes(); + AttrBuilder NewAttrs; if (!CPU.empty()) - NewAttrs = NewAttrs.addAttribute(Ctx, AttributeList::FunctionIndex, - "target-cpu", CPU); - + NewAttrs.addAttribute("target-cpu", CPU); if (!Features.empty()) - NewAttrs = NewAttrs.addAttribute(Ctx, AttributeList::FunctionIndex, - "target-features", Features); - + NewAttrs.addAttribute("target-features", Features); if (DisableFPElim.getNumOccurrences() > 0) - NewAttrs = NewAttrs.addAttribute(Ctx, AttributeList::FunctionIndex, - "no-frame-pointer-elim", - DisableFPElim ? "true" : "false"); - + NewAttrs.addAttribute("no-frame-pointer-elim", + DisableFPElim ? "true" : "false"); if (DisableTailCalls.getNumOccurrences() > 0) - NewAttrs = NewAttrs.addAttribute(Ctx, AttributeList::FunctionIndex, - "disable-tail-calls", - toStringRef(DisableTailCalls)); - + NewAttrs.addAttribute("disable-tail-calls", + toStringRef(DisableTailCalls)); if (StackRealign) - NewAttrs = NewAttrs.addAttribute(Ctx, AttributeList::FunctionIndex, - "stackrealign"); + NewAttrs.addAttribute("stackrealign"); if (TrapFuncName.getNumOccurrences() > 0) for (auto &B : F) @@ -382,8 +374,8 @@ static inline void setFunctionAttributes(StringRef CPU, StringRef Features, Attribute::get(Ctx, "trap-func-name", TrapFuncName)); // Let NewAttrs override Attrs. - NewAttrs = Attrs.addAttributes(Ctx, AttributeList::FunctionIndex, NewAttrs); - F.setAttributes(NewAttrs); + F.setAttributes( + Attrs.addAttributes(Ctx, AttributeList::FunctionIndex, NewAttrs)); } } diff --git a/include/llvm/DebugInfo/CodeView/CVRecord.h b/include/llvm/DebugInfo/CodeView/CVRecord.h index 086d6dff11c5..ac8aaafeadc1 100644 --- a/include/llvm/DebugInfo/CodeView/CVRecord.h +++ b/include/llvm/DebugInfo/CodeView/CVRecord.h @@ -53,7 +53,7 @@ struct VarStreamArrayExtractor<codeview::CVRecord<Kind>> { typedef void ContextType; static Error extract(BinaryStreamRef Stream, uint32_t &Len, - codeview::CVRecord<Kind> &Item, void *Ctx) { + codeview::CVRecord<Kind> &Item) { using namespace codeview; const RecordPrefix *Prefix = nullptr; BinaryStreamReader Reader(Stream); diff --git a/include/llvm/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.h b/include/llvm/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.h index a5a3b851b841..6c08c9aa2137 100644 --- a/include/llvm/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.h +++ b/include/llvm/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.h @@ -21,6 +21,8 @@ namespace llvm { namespace codeview { +class StringTable; + struct FileChecksumEntry { uint32_t FileNameOffset; // Byte offset of filename in global stringtable. FileChecksumKind Kind; // The type of checksum. @@ -35,7 +37,7 @@ public: typedef void ContextType; static Error extract(BinaryStreamRef Stream, uint32_t &Len, - codeview::FileChecksumEntry &Item, void *Ctx); + codeview::FileChecksumEntry &Item); }; } @@ -55,8 +57,8 @@ public: Error initialize(BinaryStreamReader Reader); - Iterator begin() const { return Checksums.begin(); } - Iterator end() const { return Checksums.end(); } + Iterator begin() { return Checksums.begin(); } + Iterator end() { return Checksums.end(); } const FileChecksumArray &getArray() const { return Checksums; } @@ -66,20 +68,22 @@ private: class ModuleDebugFileChecksumFragment final : public ModuleDebugFragment { public: - ModuleDebugFileChecksumFragment(); + explicit ModuleDebugFileChecksumFragment(StringTable &Strings); static bool classof(const ModuleDebugFragment *S) { return S->kind() == ModuleDebugFragmentKind::FileChecksums; } - void addChecksum(uint32_t StringTableOffset, FileChecksumKind Kind, + void addChecksum(StringRef FileName, FileChecksumKind Kind, ArrayRef<uint8_t> Bytes); uint32_t calculateSerializedLength() override; Error commit(BinaryStreamWriter &Writer) override; - uint32_t mapChecksumOffset(uint32_t StringTableOffset) const; + uint32_t mapChecksumOffset(StringRef FileName) const; private: + StringTable &Strings; + DenseMap<uint32_t, uint32_t> OffsetMap; uint32_t SerializedSize = 0; llvm::BumpPtrAllocator Storage; diff --git a/include/llvm/DebugInfo/CodeView/ModuleDebugFragmentRecord.h b/include/llvm/DebugInfo/CodeView/ModuleDebugFragmentRecord.h index b98c8605592c..f68f21b224f1 100644 --- a/include/llvm/DebugInfo/CodeView/ModuleDebugFragmentRecord.h +++ b/include/llvm/DebugInfo/CodeView/ModuleDebugFragmentRecord.h @@ -57,8 +57,6 @@ private: ModuleDebugFragment &Frag; }; -typedef VarStreamArray<ModuleDebugFragmentRecord> ModuleDebugFragmentArray; - } // namespace codeview template <> @@ -66,13 +64,17 @@ struct VarStreamArrayExtractor<codeview::ModuleDebugFragmentRecord> { typedef void ContextType; static Error extract(BinaryStreamRef Stream, uint32_t &Length, - codeview::ModuleDebugFragmentRecord &Info, void *Ctx) { + codeview::ModuleDebugFragmentRecord &Info) { if (auto EC = codeview::ModuleDebugFragmentRecord::initialize(Stream, Info)) return EC; Length = Info.getRecordLength(); return Error::success(); } }; + +namespace codeview { +typedef VarStreamArray<ModuleDebugFragmentRecord> ModuleDebugFragmentArray; +} } // namespace llvm #endif // LLVM_DEBUGINFO_CODEVIEW_MODULEDEBUGFRAGMENTRECORD_H diff --git a/include/llvm/DebugInfo/CodeView/ModuleDebugInlineeLinesFragment.h b/include/llvm/DebugInfo/CodeView/ModuleDebugInlineeLinesFragment.h index 177367c111c3..348497cbf7f2 100644 --- a/include/llvm/DebugInfo/CodeView/ModuleDebugInlineeLinesFragment.h +++ b/include/llvm/DebugInfo/CodeView/ModuleDebugInlineeLinesFragment.h @@ -20,6 +20,8 @@ namespace llvm { namespace codeview { class ModuleDebugInlineeLineFragmentRef; +class ModuleDebugFileChecksumFragment; +class StringTable; enum class InlineeLinesSignature : uint32_t { Normal, // CV_INLINEE_SOURCE_LINE_SIGNATURE @@ -42,11 +44,10 @@ struct InlineeSourceLine { } template <> struct VarStreamArrayExtractor<codeview::InlineeSourceLine> { - typedef codeview::ModuleDebugInlineeLineFragmentRef ContextType; + typedef bool ContextType; static Error extract(BinaryStreamRef Stream, uint32_t &Len, - codeview::InlineeSourceLine &Item, - ContextType *Fragment); + codeview::InlineeSourceLine &Item, bool HasExtraFiles); }; namespace codeview { @@ -74,7 +75,8 @@ private: class ModuleDebugInlineeLineFragment final : public ModuleDebugFragment { public: - explicit ModuleDebugInlineeLineFragment(bool HasExtraFiles); + ModuleDebugInlineeLineFragment(ModuleDebugFileChecksumFragment &Checksums, + bool HasExtraFiles); static bool classof(const ModuleDebugFragment *S) { return S->kind() == ModuleDebugFragmentKind::InlineeLines; @@ -83,11 +85,12 @@ public: Error commit(BinaryStreamWriter &Writer) override; uint32_t calculateSerializedLength() override; - void addInlineSite(TypeIndex FuncId, uint32_t FileOffset, - uint32_t SourceLine); - void addExtraFile(uint32_t FileOffset); + void addInlineSite(TypeIndex FuncId, StringRef FileName, uint32_t SourceLine); + void addExtraFile(StringRef FileName); private: + ModuleDebugFileChecksumFragment &Checksums; + bool HasExtraFiles = false; uint32_t ExtraFileCount = 0; diff --git a/include/llvm/DebugInfo/CodeView/ModuleDebugLineFragment.h b/include/llvm/DebugInfo/CodeView/ModuleDebugLineFragment.h index dcfe86dd8503..3124236b8fb1 100644 --- a/include/llvm/DebugInfo/CodeView/ModuleDebugLineFragment.h +++ b/include/llvm/DebugInfo/CodeView/ModuleDebugLineFragment.h @@ -19,6 +19,9 @@ namespace llvm { namespace codeview { +class ModuleDebugFileChecksumFragment; +class StringTable; + // Corresponds to the `CV_DebugSLinesHeader_t` structure. struct LineFragmentHeader { support::ulittle32_t RelocOffset; // Code offset of line contribution. @@ -61,10 +64,10 @@ struct LineColumnEntry { class LineColumnExtractor { public: - typedef const LineFragmentHeader ContextType; + typedef const LineFragmentHeader *ContextType; static Error extract(BinaryStreamRef Stream, uint32_t &Len, - LineColumnEntry &Item, const LineFragmentHeader *Header); + LineColumnEntry &Item, const LineFragmentHeader *Ctx); }; class ModuleDebugLineFragmentRef final : public ModuleDebugFragmentRef { @@ -104,13 +107,14 @@ class ModuleDebugLineFragment final : public ModuleDebugFragment { }; public: - ModuleDebugLineFragment(); + ModuleDebugLineFragment(ModuleDebugFileChecksumFragment &Checksums, + StringTable &Strings); static bool classof(const ModuleDebugFragment *S) { return S->kind() == ModuleDebugFragmentKind::Lines; } - void createBlock(uint32_t ChecksumBufferOffset); + void createBlock(StringRef FileName); void addLineInfo(uint32_t Offset, const LineInfo &Line); void addLineAndColumnInfo(uint32_t Offset, const LineInfo &Line, uint32_t ColStart, uint32_t ColEnd); @@ -125,6 +129,8 @@ public: bool hasColumnInfo() const; private: + ModuleDebugFileChecksumFragment &Checksums; + uint16_t RelocOffset = 0; uint16_t RelocSegment = 0; uint32_t CodeSize = 0; diff --git a/include/llvm/DebugInfo/CodeView/StringTable.h b/include/llvm/DebugInfo/CodeView/StringTable.h new file mode 100644 index 000000000000..05dc02ee849f --- /dev/null +++ b/include/llvm/DebugInfo/CodeView/StringTable.h @@ -0,0 +1,75 @@ +//===- StringTable.h - CodeView String Table Reader/Writer ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_CODEVIEW_STRINGTABLE_H +#define LLVM_DEBUGINFO_CODEVIEW_STRINGTABLE_H + +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" + +#include "llvm/Support/BinaryStreamRef.h" +#include "llvm/Support/Error.h" + +#include <stdint.h> + +namespace llvm { + +class BinaryStreamReader; +class BinaryStreamRef; +class BinaryStreamWriter; + +namespace codeview { + +/// Represents a read-only view of a CodeView string table. This is a very +/// simple flat buffer consisting of null-terminated strings, where strings +/// are retrieved by their offset in the buffer. StringTableRef does not own +/// the underlying storage for the buffer. +class StringTableRef { +public: + StringTableRef(); + + Error initialize(BinaryStreamRef Contents); + + Expected<StringRef> getString(uint32_t Offset) const; + + bool valid() const { return Stream.valid(); } + +private: + BinaryStreamRef Stream; +}; + +/// Represents a read-write view of a CodeView string table. StringTable owns +/// the underlying storage for the table, and is capable of serializing the +/// string table into a format understood by StringTableRef. +class StringTable { +public: + // If string S does not exist in the string table, insert it. + // Returns the ID for S. + uint32_t insert(StringRef S); + + // Return the ID for string S. Assumes S exists in the table. + uint32_t getStringId(StringRef S) const; + + uint32_t calculateSerializedSize() const; + Error commit(BinaryStreamWriter &Writer) const; + + uint32_t size() const; + + StringMap<uint32_t>::const_iterator begin() const { return Strings.begin(); } + + StringMap<uint32_t>::const_iterator end() const { return Strings.end(); } + +private: + StringMap<uint32_t> Strings; + uint32_t StringSize = 1; +}; +} +} + +#endif diff --git a/include/llvm/DebugInfo/CodeView/SymbolVisitorDelegate.h b/include/llvm/DebugInfo/CodeView/SymbolVisitorDelegate.h index 2bef3f61adfc..96c8a47a3669 100644 --- a/include/llvm/DebugInfo/CodeView/SymbolVisitorDelegate.h +++ b/include/llvm/DebugInfo/CodeView/SymbolVisitorDelegate.h @@ -19,13 +19,15 @@ class BinaryStreamReader; namespace codeview { +class StringTableRef; + class SymbolVisitorDelegate { public: virtual ~SymbolVisitorDelegate() = default; virtual uint32_t getRecordOffset(BinaryStreamReader Reader) = 0; virtual StringRef getFileNameForFileOffset(uint32_t FileOffset) = 0; - virtual StringRef getStringTable() = 0; + virtual StringTableRef getStringTable() = 0; }; } // end namespace codeview diff --git a/include/llvm/DebugInfo/DWARF/DWARFContext.h b/include/llvm/DebugInfo/DWARF/DWARFContext.h index 3c04c6716ea3..b9f3425d5deb 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFContext.h +++ b/include/llvm/DebugInfo/DWARF/DWARFContext.h @@ -172,6 +172,9 @@ public: return DWOCUs[index].get(); } + /// Get a DIE given an exact offset. + DWARFDie getDIEForOffset(uint32_t Offset); + const DWARFUnitIndex &getCUIndex(); DWARFGdbIndex &getGdbIndex(); const DWARFUnitIndex &getTUIndex(); diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h b/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h index dd0e2648bf30..e21245b97b73 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h +++ b/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h @@ -30,7 +30,7 @@ public: struct FileNameEntry { FileNameEntry() = default; - const char *Name = nullptr; + StringRef Name = StringRef(); uint64_t DirIdx = 0; uint64_t ModTime = 0; uint64_t Length = 0; @@ -44,6 +44,10 @@ public: uint64_t TotalLength; /// Version identifier for the statement information format. uint16_t Version; + /// In v5, size in bytes of an address (or segment offset). + uint8_t AddressSize; + /// In v5, size in bytes of a segment selector. + uint8_t SegSelectorSize; /// The number of bytes following the prologue_length field to the beginning /// of the first byte of the statement program itself. uint64_t PrologueLength; @@ -63,7 +67,7 @@ public: /// The number assigned to the first special opcode. uint8_t OpcodeBase; std::vector<uint8_t> StandardOpcodeLengths; - std::vector<const char *> IncludeDirectories; + std::vector<StringRef> IncludeDirectories; std::vector<FileNameEntry> FileNames; bool IsDWARF64; @@ -100,7 +104,7 @@ public: void postAppend(); void reset(bool DefaultIsStmt); void dump(raw_ostream &OS) const; - + static void dumpTableHeader(raw_ostream &OS); static bool orderByAddress(const Row &LHS, const Row &RHS) { return LHS.Address < RHS.Address; } diff --git a/include/llvm/DebugInfo/DWARF/DWARFUnit.h b/include/llvm/DebugInfo/DWARF/DWARFUnit.h index e29ba523238c..68e541bac73c 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFUnit.h +++ b/include/llvm/DebugInfo/DWARF/DWARFUnit.h @@ -312,9 +312,9 @@ public: [](const DWARFDebugInfoEntry &LHS, uint32_t Offset) { return LHS.getOffset() < Offset; }); - if (it == DieArray.end()) - return DWARFDie(); - return DWARFDie(this, &*it); + if (it != DieArray.end() && it->getOffset() == Offset) + return DWARFDie(this, &*it); + return DWARFDie(); } uint32_t getLineTableOffset() const { diff --git a/include/llvm/DebugInfo/DWARF/DWARFVerifier.h b/include/llvm/DebugInfo/DWARF/DWARFVerifier.h new file mode 100644 index 000000000000..8e12bcd2c8e2 --- /dev/null +++ b/include/llvm/DebugInfo/DWARF/DWARFVerifier.h @@ -0,0 +1,98 @@ +//===- DWARFVerifier.h ----------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_DWARF_DWARFVERIFIER_H +#define LLVM_DEBUGINFO_DWARF_DWARFVERIFIER_H + +#include <cstdint> +#include <map> +#include <set> + +namespace llvm { +class raw_ostream; +struct DWARFAttribute; +class DWARFContext; +class DWARFDie; +class DWARFUnit; + +/// A class that verifies DWARF debug information given a DWARF Context. +class DWARFVerifier { + raw_ostream &OS; + DWARFContext &DCtx; + /// A map that tracks all references (converted absolute references) so we + /// can verify each reference points to a valid DIE and not an offset that + /// lies between to valid DIEs. + std::map<uint64_t, std::set<uint32_t>> ReferenceToDIEOffsets; + uint32_t NumDebugInfoErrors; + uint32_t NumDebugLineErrors; + + /// Verifies the attribute's DWARF attribute and its value. + /// + /// This function currently checks for: + /// - DW_AT_ranges values is a valid .debug_ranges offset + /// - DW_AT_stmt_list is a valid .debug_line offset + /// + /// @param Die The DWARF DIE that owns the attribute value + /// @param AttrValue The DWARF attribute value to check + void verifyDebugInfoAttribute(DWARFDie &Die, DWARFAttribute &AttrValue); + + /// Verifies the attribute's DWARF form. + /// + /// This function currently checks for: + /// - All DW_FORM_ref values that are CU relative have valid CU offsets + /// - All DW_FORM_ref_addr values have valid .debug_info offsets + /// - All DW_FORM_strp values have valid .debug_str offsets + /// + /// @param Die The DWARF DIE that owns the attribute value + /// @param AttrValue The DWARF attribute value to check + void verifyDebugInfoForm(DWARFDie &Die, DWARFAttribute &AttrValue); + + /// Verifies the all valid references that were found when iterating through + /// all of the DIE attributes. + /// + /// This function will verify that all references point to DIEs whose DIE + /// offset matches. This helps to ensure if a DWARF link phase moved things + /// around, that it doesn't create invalid references by failing to relocate + /// CU relative and absolute references. + void veifyDebugInfoReferences(); + + /// Verify the the DW_AT_stmt_list encoding and value and ensure that no + /// compile units that have the same DW_AT_stmt_list value. + void verifyDebugLineStmtOffsets(); + + /// Verify that all of the rows in the line table are valid. + /// + /// This function currently checks for: + /// - addresses within a sequence that decrease in value + /// - invalid file indexes + void verifyDebugLineRows(); + +public: + DWARFVerifier(raw_ostream &S, DWARFContext &D) + : OS(S), DCtx(D), NumDebugInfoErrors(0), NumDebugLineErrors(0) {} + /// Verify the information in the .debug_info section. + /// + /// Any errors are reported to the stream that was this object was + /// constructed with. + /// + /// @return True if the .debug_info verifies successfully, false otherwise. + bool handleDebugInfo(); + + /// Verify the information in the .debug_line section. + /// + /// Any errors are reported to the stream that was this object was + /// constructed with. + /// + /// @return True if the .debug_line verifies successfully, false otherwise. + bool handleDebugLine(); +}; + +} // end namespace llvm + +#endif // LLVM_DEBUGINFO_DWARF_DWARFCONTEXT_H diff --git a/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h b/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h index 879cb4285cd7..d1f791b9daed 100644 --- a/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h +++ b/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h @@ -66,7 +66,7 @@ struct ModuleInfoEx { template <> struct VarStreamArrayExtractor<pdb::DbiModuleDescriptor> { typedef void ContextType; static Error extract(BinaryStreamRef Stream, uint32_t &Length, - pdb::DbiModuleDescriptor &Info, void *Ctx) { + pdb::DbiModuleDescriptor &Info) { if (auto EC = pdb::DbiModuleDescriptor::initialize(Stream, Info)) return EC; Length = Info.getRecordLength(); diff --git a/include/llvm/DebugInfo/PDB/Native/DbiStream.h b/include/llvm/DebugInfo/PDB/Native/DbiStream.h index 84ae57f2e23a..08262e47f77f 100644 --- a/include/llvm/DebugInfo/PDB/Native/DbiStream.h +++ b/include/llvm/DebugInfo/PDB/Native/DbiStream.h @@ -13,9 +13,9 @@ #include "llvm/DebugInfo/CodeView/ModuleDebugFragment.h" #include "llvm/DebugInfo/MSF/MappedBlockStream.h" #include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h" +#include "llvm/DebugInfo/PDB/Native/PDBStringTable.h" #include "llvm/DebugInfo/PDB/Native/RawConstants.h" #include "llvm/DebugInfo/PDB/Native/RawTypes.h" -#include "llvm/DebugInfo/PDB/Native/StringTable.h" #include "llvm/DebugInfo/PDB/PDBTypes.h" #include "llvm/Support/BinaryStreamArray.h" #include "llvm/Support/BinaryStreamArray.h" @@ -91,7 +91,7 @@ private: std::unique_ptr<msf::MappedBlockStream> Stream; std::vector<ModuleInfoEx> ModuleInfos; - StringTable ECNames; + PDBStringTable ECNames; BinaryStreamRef ModInfoSubstream; BinaryStreamRef SecContrSubstream; diff --git a/include/llvm/DebugInfo/PDB/Native/PDBFile.h b/include/llvm/DebugInfo/PDB/Native/PDBFile.h index fbca62d6e9d9..3bed67141c56 100644 --- a/include/llvm/DebugInfo/PDB/Native/PDBFile.h +++ b/include/llvm/DebugInfo/PDB/Native/PDBFile.h @@ -33,7 +33,7 @@ namespace pdb { class DbiStream; class GlobalsStream; class InfoStream; -class StringTable; +class PDBStringTable; class PDBFileBuilder; class PublicsStream; class SymbolStream; @@ -95,7 +95,7 @@ public: Expected<TpiStream &> getPDBIpiStream(); Expected<PublicsStream &> getPDBPublicsStream(); Expected<SymbolStream &> getPDBSymbolStream(); - Expected<StringTable &> getStringTable(); + Expected<PDBStringTable &> getStringTable(); BumpPtrAllocator &getAllocator() { return Allocator; } @@ -106,7 +106,7 @@ public: bool hasPDBPublicsStream(); bool hasPDBSymbolStream(); bool hasPDBTpiStream() const; - bool hasStringTable(); + bool hasPDBStringTable(); private: Expected<std::unique_ptr<msf::MappedBlockStream>> @@ -131,7 +131,7 @@ private: std::unique_ptr<SymbolStream> Symbols; std::unique_ptr<msf::MappedBlockStream> DirectoryStream; std::unique_ptr<msf::MappedBlockStream> StringTableStream; - std::unique_ptr<StringTable> Strings; + std::unique_ptr<PDBStringTable> Strings; }; } } diff --git a/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h b/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h index 3898af5afc9e..cd7d3b063793 100644 --- a/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h +++ b/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h @@ -15,8 +15,8 @@ #include "llvm/ADT/Optional.h" #include "llvm/DebugInfo/PDB/Native/NamedStreamMap.h" #include "llvm/DebugInfo/PDB/Native/PDBFile.h" +#include "llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h" #include "llvm/DebugInfo/PDB/Native/RawConstants.h" -#include "llvm/DebugInfo/PDB/Native/StringTableBuilder.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" @@ -46,12 +46,14 @@ public: DbiStreamBuilder &getDbiBuilder(); TpiStreamBuilder &getTpiBuilder(); TpiStreamBuilder &getIpiBuilder(); - StringTableBuilder &getStringTableBuilder(); + PDBStringTableBuilder &getStringTableBuilder(); Error commit(StringRef Filename); -private: + Expected<uint32_t> getNamedStreamIndex(StringRef Name) const; Error addNamedStream(StringRef Name, uint32_t Size); + +private: Expected<msf::MSFLayout> finalizeMsfLayout(); BumpPtrAllocator &Allocator; @@ -62,7 +64,7 @@ private: std::unique_ptr<TpiStreamBuilder> Tpi; std::unique_ptr<TpiStreamBuilder> Ipi; - StringTableBuilder Strings; + PDBStringTableBuilder Strings; NamedStreamMap NamedStreams; }; } diff --git a/include/llvm/DebugInfo/PDB/Native/StringTable.h b/include/llvm/DebugInfo/PDB/Native/PDBStringTable.h index dd5e30e61827..7c7f16bd1c73 100644 --- a/include/llvm/DebugInfo/PDB/Native/StringTable.h +++ b/include/llvm/DebugInfo/PDB/Native/PDBStringTable.h @@ -1,4 +1,4 @@ -//===- StringTable.h - PDB String Table -------------------------*- C++ -*-===// +//===- PDBStringTable.h - PDB String Table -----------------------*- C++-*-===// // // The LLVM Compiler Infrastructure // @@ -7,11 +7,12 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_DEBUGINFO_PDB_RAW_STRINGTABLE_H -#define LLVM_DEBUGINFO_PDB_RAW_STRINGTABLE_H +#ifndef LLVM_DEBUGINFO_PDB_RAW_PDBSTRINGTABLE_H +#define LLVM_DEBUGINFO_PDB_RAW_PDBSTRINGTABLE_H #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" +#include "llvm/DebugInfo/CodeView/StringTable.h" #include "llvm/Support/BinaryStreamArray.h" #include "llvm/Support/BinaryStreamRef.h" #include "llvm/Support/Endian.h" @@ -22,31 +23,38 @@ namespace llvm { class BinaryStreamReader; +namespace msf { +class MappedBlockStream; +} + namespace pdb { -class StringTable { -public: - StringTable(); +struct PDBStringTableHeader; - Error load(BinaryStreamReader &Stream); +class PDBStringTable { +public: + Error reload(BinaryStreamReader &Reader); uint32_t getByteSize() const; + uint32_t getNameCount() const; + uint32_t getHashVersion() const; + uint32_t getSignature() const; - uint32_t getNameCount() const { return NameCount; } - uint32_t getHashVersion() const { return HashVersion; } - uint32_t getSignature() const { return Signature; } - - StringRef getStringForID(uint32_t ID) const; - uint32_t getIDForString(StringRef Str) const; + Expected<StringRef> getStringForID(uint32_t ID) const; + Expected<uint32_t> getIDForString(StringRef Str) const; FixedStreamArray<support::ulittle32_t> name_ids() const; private: - BinaryStreamRef NamesBuffer; + Error readHeader(BinaryStreamReader &Reader); + Error readStrings(BinaryStreamReader &Reader); + Error readHashTable(BinaryStreamReader &Reader); + Error readEpilogue(BinaryStreamReader &Reader); + + const PDBStringTableHeader *Header = nullptr; + codeview::StringTableRef Strings; FixedStreamArray<support::ulittle32_t> IDs; uint32_t ByteSize = 0; - uint32_t Signature = 0; - uint32_t HashVersion = 0; uint32_t NameCount = 0; }; diff --git a/include/llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h b/include/llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h new file mode 100644 index 000000000000..6f85e7a4a074 --- /dev/null +++ b/include/llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h @@ -0,0 +1,60 @@ +//===- PDBStringTableBuilder.h - PDB String Table Builder -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file creates the "/names" stream. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_PDB_RAW_PDBSTRINGTABLEBUILDER_H +#define LLVM_DEBUGINFO_PDB_RAW_PDBSTRINGTABLEBUILDER_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/DebugInfo/CodeView/StringTable.h" +#include "llvm/Support/Error.h" +#include <vector> + +namespace llvm { +class BinaryStreamWriter; +class WritableBinaryStreamRef; + +namespace msf { +struct MSFLayout; +} + +namespace pdb { + +class PDBFileBuilder; + +class PDBStringTableBuilder { +public: + // If string S does not exist in the string table, insert it. + // Returns the ID for S. + uint32_t insert(StringRef S); + + uint32_t calculateSerializedSize() const; + Error commit(BinaryStreamWriter &Writer) const; + + codeview::StringTable &getStrings() { return Strings; } + const codeview::StringTable &getStrings() const { return Strings; } + +private: + uint32_t calculateHashTableSize() const; + Error writeHeader(BinaryStreamWriter &Writer) const; + Error writeStrings(BinaryStreamWriter &Writer) const; + Error writeHashTable(BinaryStreamWriter &Writer) const; + Error writeEpilogue(BinaryStreamWriter &Writer) const; + + codeview::StringTable Strings; +}; + +} // end namespace pdb +} // end namespace llvm + +#endif // LLVM_DEBUGINFO_PDB_RAW_PDBSTRINGTABLEBUILDER_H diff --git a/include/llvm/DebugInfo/PDB/Native/RawTypes.h b/include/llvm/DebugInfo/PDB/Native/RawTypes.h index e1c6cf0021d5..93622d0a4394 100644 --- a/include/llvm/DebugInfo/PDB/Native/RawTypes.h +++ b/include/llvm/DebugInfo/PDB/Native/RawTypes.h @@ -307,13 +307,13 @@ struct InfoStreamHeader { }; /// The header preceeding the /names stream. -struct StringTableHeader { - support::ulittle32_t Signature; - support::ulittle32_t HashVersion; - support::ulittle32_t ByteSize; +struct PDBStringTableHeader { + support::ulittle32_t Signature; // PDBStringTableSignature + support::ulittle32_t HashVersion; // 1 or 2 + support::ulittle32_t ByteSize; // Number of bytes of names buffer. }; -const uint32_t StringTableSignature = 0xEFFEEFFE; +const uint32_t PDBStringTableSignature = 0xEFFEEFFE; } // namespace pdb } // namespace llvm diff --git a/include/llvm/DebugInfo/PDB/Native/StringTableBuilder.h b/include/llvm/DebugInfo/PDB/Native/StringTableBuilder.h deleted file mode 100644 index 9c4b12e33ba0..000000000000 --- a/include/llvm/DebugInfo/PDB/Native/StringTableBuilder.h +++ /dev/null @@ -1,45 +0,0 @@ -//===- StringTableBuilder.h - PDB String Table Builder ----------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file creates the "/names" stream. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_DEBUGINFO_PDB_RAW_STRINGTABLEBUILDER_H -#define LLVM_DEBUGINFO_PDB_RAW_STRINGTABLEBUILDER_H - -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/Support/Error.h" -#include <vector> - -namespace llvm { -class BinaryStreamWriter; - -namespace pdb { - -class StringTableBuilder { -public: - // If string S does not exist in the string table, insert it. - // Returns the ID for S. - uint32_t insert(StringRef S); - uint32_t getStringIndex(StringRef S); - - uint32_t finalize(); - Error commit(BinaryStreamWriter &Writer) const; - -private: - DenseMap<StringRef, uint32_t> Strings; - uint32_t StringSize = 1; -}; - -} // end namespace pdb -} // end namespace llvm - -#endif // LLVM_DEBUGINFO_PDB_RAW_STRINGTABLEBUILDER_H diff --git a/include/llvm/IR/Attributes.h b/include/llvm/IR/Attributes.h index af46034d5a9e..adcb7266073b 100644 --- a/include/llvm/IR/Attributes.h +++ b/include/llvm/IR/Attributes.h @@ -285,7 +285,8 @@ class AttributeList { public: enum AttrIndex : unsigned { ReturnIndex = 0U, - FunctionIndex = ~0U + FunctionIndex = ~0U, + FirstArgIndex = 1, }; private: @@ -336,6 +337,13 @@ public: static AttributeList get(LLVMContext &C, unsigned Index, const AttrBuilder &B); + /// Add an argument attribute to the list. Returns a new list because + /// attribute lists are immutable. + AttributeList addParamAttribute(LLVMContext &C, unsigned ArgNo, + Attribute::AttrKind Kind) const { + return addAttribute(C, ArgNo + FirstArgIndex, Kind); + } + /// \brief Add an attribute to the attribute set at the given index. Because /// attribute sets are immutable, this returns a new set. AttributeList addAttribute(LLVMContext &C, unsigned Index, @@ -354,9 +362,6 @@ public: /// \brief Add attributes to the attribute set at the given index. Because /// attribute sets are immutable, this returns a new set. AttributeList addAttributes(LLVMContext &C, unsigned Index, - AttributeList Attrs) const; - - AttributeList addAttributes(LLVMContext &C, unsigned Index, const AttrBuilder &B) const; /// \brief Remove the specified attribute at the specified index from this @@ -375,13 +380,7 @@ public: /// attribute list. Because attribute lists are immutable, this returns the /// new list. AttributeList removeAttributes(LLVMContext &C, unsigned Index, - AttributeList Attrs) const; - - /// \brief Remove the specified attributes at the specified index from this - /// attribute list. Because attribute lists are immutable, this returns the - /// new list. - AttributeList removeAttributes(LLVMContext &C, unsigned Index, - const AttrBuilder &Attrs) const; + const AttrBuilder &AttrsToRemove) const; /// \brief Remove all attributes at the specified index from this /// attribute list. Because attribute lists are immutable, this returns the @@ -442,7 +441,7 @@ public: /// may be faster. bool hasFnAttribute(StringRef Kind) const; - /// \brief Equivalent to hasAttribute(ArgNo + 1, Kind). + /// \brief Equivalent to hasAttribute(ArgNo + FirstArgIndex, Kind). bool hasParamAttribute(unsigned ArgNo, Attribute::AttrKind Kind) const; /// \brief Return true if the specified attribute is set for at least one diff --git a/include/llvm/IR/CallSite.h b/include/llvm/IR/CallSite.h index bad1d4e383d5..d61431a51a97 100644 --- a/include/llvm/IR/CallSite.h +++ b/include/llvm/IR/CallSite.h @@ -339,6 +339,10 @@ public: CALLSITE_DELEGATE_SETTER(addAttribute(i, Attr)); } + void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) { + CALLSITE_DELEGATE_SETTER(addParamAttr(ArgNo, Kind)); + } + void removeAttribute(unsigned i, Attribute::AttrKind Kind) { CALLSITE_DELEGATE_SETTER(removeAttribute(i, Kind)); } @@ -347,6 +351,10 @@ public: CALLSITE_DELEGATE_SETTER(removeAttribute(i, Kind)); } + void removeParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) { + CALLSITE_DELEGATE_SETTER(removeParamAttr(ArgNo, Kind)); + } + /// Return true if this function has the given attribute. bool hasFnAttr(Attribute::AttrKind Kind) const { CALLSITE_DELEGATE_GETTER(hasFnAttr(Kind)); @@ -408,11 +416,9 @@ public: CALLSITE_DELEGATE_GETTER(getDereferenceableOrNullBytes(i)); } - /// Determine if the parameter or return value is marked with NoAlias - /// attribute. - /// @param n The parameter to check. 1 is the first parameter, 0 is the return - bool doesNotAlias(unsigned n) const { - CALLSITE_DELEGATE_GETTER(doesNotAlias(n)); + /// Determine if the return value is marked with NoAlias attribute. + bool returnDoesNotAlias() const { + CALLSITE_DELEGATE_GETTER(returnDoesNotAlias()); } /// Return true if the call should not be treated as a call to a builtin. diff --git a/include/llvm/IR/Function.h b/include/llvm/IR/Function.h index 9e723f977755..f9582f51ca8d 100644 --- a/include/llvm/IR/Function.h +++ b/include/llvm/IR/Function.h @@ -204,6 +204,10 @@ public: addAttribute(AttributeList::FunctionIndex, Attr); } + void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) { + addAttribute(ArgNo + AttributeList::FirstArgIndex, Kind); + } + /// @brief Remove function attributes from this function. void removeFnAttr(Attribute::AttrKind Kind) { removeAttribute(AttributeList::FunctionIndex, Kind); @@ -211,10 +215,14 @@ public: /// @brief Remove function attribute from this function. void removeFnAttr(StringRef Kind) { - setAttributes(AttributeSets.removeAttribute( + setAttributes(getAttributes().removeAttribute( getContext(), AttributeList::FunctionIndex, Kind)); } + void removeParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) { + removeAttribute(ArgNo + AttributeList::FirstArgIndex, Kind); + } + /// \brief Set the entry count for this function. /// /// Entry count is the number of times this function was executed based on @@ -279,7 +287,7 @@ public: void addAttribute(unsigned i, Attribute Attr); /// @brief adds the attributes to the list of attributes. - void addAttributes(unsigned i, AttributeList Attrs); + void addAttributes(unsigned i, const AttrBuilder &Attrs); /// @brief removes the attribute from the list of attributes. void removeAttribute(unsigned i, Attribute::AttrKind Kind); @@ -288,7 +296,7 @@ public: void removeAttribute(unsigned i, StringRef Kind); /// @brief removes the attributes from the list of attributes. - void removeAttributes(unsigned i, AttributeList Attrs); + void removeAttributes(unsigned i, const AttrBuilder &Attrs); /// @brief check if an attributes is in the list of attributes. bool hasAttribute(unsigned i, Attribute::AttrKind Kind) const { @@ -459,35 +467,12 @@ public: /// @brief Determine if the parameter or return value is marked with NoAlias /// attribute. /// @param n The parameter to check. 1 is the first parameter, 0 is the return - bool doesNotAlias(unsigned n) const { - return AttributeSets.hasAttribute(n, Attribute::NoAlias); - } - void setDoesNotAlias(unsigned n) { - addAttribute(n, Attribute::NoAlias); - } - - /// @brief Determine if the parameter can be captured. - /// @param n The parameter to check. 1 is the first parameter, 0 is the return - bool doesNotCapture(unsigned n) const { - return AttributeSets.hasAttribute(n, Attribute::NoCapture); - } - void setDoesNotCapture(unsigned n) { - addAttribute(n, Attribute::NoCapture); - } - - bool doesNotAccessMemory(unsigned n) const { - return AttributeSets.hasAttribute(n, Attribute::ReadNone); - } - void setDoesNotAccessMemory(unsigned n) { - addAttribute(n, Attribute::ReadNone); - } - - bool onlyReadsMemory(unsigned n) const { - return doesNotAccessMemory(n) || - AttributeSets.hasAttribute(n, Attribute::ReadOnly); + bool returnDoesNotAlias() const { + return AttributeSets.hasAttribute(AttributeList::ReturnIndex, + Attribute::NoAlias); } - void setOnlyReadsMemory(unsigned n) { - addAttribute(n, Attribute::ReadOnly); + void setReturnDoesNotAlias() { + addAttribute(AttributeList::ReturnIndex, Attribute::NoAlias); } /// Optimize this function for minimum size (-Oz). diff --git a/include/llvm/IR/Instructions.h b/include/llvm/IR/Instructions.h index 4d3f1dc267f2..844a7273eca9 100644 --- a/include/llvm/IR/Instructions.h +++ b/include/llvm/IR/Instructions.h @@ -1658,12 +1658,18 @@ public: /// adds the attribute to the list of attributes. void addAttribute(unsigned i, Attribute Attr); + /// Adds the attribute to the indicated argument + void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind); + /// removes the attribute from the list of attributes. void removeAttribute(unsigned i, Attribute::AttrKind Kind); /// removes the attribute from the list of attributes. void removeAttribute(unsigned i, StringRef Kind); + /// Removes the attribute from the given argument + void removeParamAttr(unsigned ArgNo, Attribute::AttrKind Kind); + /// adds the dereferenceable attribute to the list of attributes. void addDereferenceableAttr(unsigned i, uint64_t Bytes); @@ -1734,11 +1740,9 @@ public: return Attrs.getDereferenceableOrNullBytes(i); } - /// @brief Determine if the parameter or return value is marked with NoAlias - /// attribute. - /// @param n The parameter to check. 1 is the first parameter, 0 is the return - bool doesNotAlias(unsigned n) const { - return Attrs.hasAttribute(n, Attribute::NoAlias); + /// @brief Determine if the return value is marked with NoAlias attribute. + bool returnDoesNotAlias() const { + return Attrs.hasAttribute(AttributeList::ReturnIndex, Attribute::NoAlias); } /// Return true if the call should not be treated as a call to a @@ -3750,12 +3754,18 @@ public: /// adds the attribute to the list of attributes. void addAttribute(unsigned i, Attribute Attr); + /// Adds the attribute to the indicated argument + void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind); + /// removes the attribute from the list of attributes. void removeAttribute(unsigned i, Attribute::AttrKind Kind); /// removes the attribute from the list of attributes. void removeAttribute(unsigned i, StringRef Kind); + /// Removes the attribute from the given argument + void removeParamAttr(unsigned ArgNo, Attribute::AttrKind Kind); + /// adds the dereferenceable attribute to the list of attributes. void addDereferenceableAttr(unsigned i, uint64_t Bytes); @@ -3827,11 +3837,9 @@ public: return Attrs.getDereferenceableOrNullBytes(i); } - /// @brief Determine if the parameter or return value is marked with NoAlias - /// attribute. - /// @param n The parameter to check. 1 is the first parameter, 0 is the return - bool doesNotAlias(unsigned n) const { - return Attrs.hasAttribute(n, Attribute::NoAlias); + /// @brief Determine if the return value is marked with NoAlias attribute. + bool returnDoesNotAlias() const { + return Attrs.hasAttribute(AttributeList::ReturnIndex, Attribute::NoAlias); } /// Return true if the call should not be treated as a call to a diff --git a/include/llvm/IR/Intrinsics.h b/include/llvm/IR/Intrinsics.h index 2f6bdf8ecf19..fc79da7ae0e6 100644 --- a/include/llvm/IR/Intrinsics.h +++ b/include/llvm/IR/Intrinsics.h @@ -100,7 +100,7 @@ namespace Intrinsic { Void, VarArg, MMX, Token, Metadata, Half, Float, Double, Integer, Vector, Pointer, Struct, Argument, ExtendArgument, TruncArgument, HalfVecArgument, - SameVecWidthArgument, PtrToArgument, PtrToElt, VecOfPtrsToElt + SameVecWidthArgument, PtrToArgument, PtrToElt, VecOfAnyPtrsToElt } Kind; union { @@ -119,25 +119,43 @@ namespace Intrinsic { AK_AnyVector, AK_AnyPointer }; + unsigned getArgumentNumber() const { assert(Kind == Argument || Kind == ExtendArgument || Kind == TruncArgument || Kind == HalfVecArgument || Kind == SameVecWidthArgument || Kind == PtrToArgument || - Kind == PtrToElt || Kind == VecOfPtrsToElt); + Kind == PtrToElt); return Argument_Info >> 3; } ArgKind getArgumentKind() const { assert(Kind == Argument || Kind == ExtendArgument || Kind == TruncArgument || Kind == HalfVecArgument || - Kind == SameVecWidthArgument || Kind == PtrToArgument || - Kind == VecOfPtrsToElt); + Kind == SameVecWidthArgument || Kind == PtrToArgument); return (ArgKind)(Argument_Info & 7); } + // VecOfAnyPtrsToElt uses both an overloaded argument (for address space) + // and a reference argument (for matching vector width and element types) + unsigned getOverloadArgNumber() const { + assert(Kind == VecOfAnyPtrsToElt); + return Argument_Info >> 16; + } + unsigned getRefArgNumber() const { + assert(Kind == VecOfAnyPtrsToElt); + return Argument_Info & 0xFFFF; + } + static IITDescriptor get(IITDescriptorKind K, unsigned Field) { IITDescriptor Result = { K, { Field } }; return Result; } + + static IITDescriptor get(IITDescriptorKind K, unsigned short Hi, + unsigned short Lo) { + unsigned Field = Hi << 16 | Lo; + IITDescriptor Result = {K, {Field}}; + return Result; + } }; /// Return the IIT table descriptor for the specified intrinsic into an array diff --git a/include/llvm/IR/Intrinsics.td b/include/llvm/IR/Intrinsics.td index 39b992cd06a8..cf7e5d8758a9 100644 --- a/include/llvm/IR/Intrinsics.td +++ b/include/llvm/IR/Intrinsics.td @@ -155,7 +155,7 @@ class LLVMVectorSameWidth<int num, LLVMType elty> } class LLVMPointerTo<int num> : LLVMMatchType<num>; class LLVMPointerToElt<int num> : LLVMMatchType<num>; -class LLVMVectorOfPointersToElt<int num> : LLVMMatchType<num>; +class LLVMVectorOfAnyPointersToElt<int num> : LLVMMatchType<num>; // Match the type of another intrinsic parameter that is expected to be a // vector type, but change the element count to be half as many @@ -404,7 +404,7 @@ def int_memset : Intrinsic<[], // FIXME: Add version of these floating point intrinsics which allow non-default // rounding modes and FP exception handling. -let IntrProperties = [IntrNoMem] in { +let IntrProperties = [IntrNoMem, IntrSpeculatable] in { def int_fma : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>; @@ -440,10 +440,12 @@ let IntrProperties = [IntrNoMem] in { } def int_minnum : Intrinsic<[llvm_anyfloat_ty], - [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem, Commutative] + [LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem, IntrSpeculatable, Commutative] >; def int_maxnum : Intrinsic<[llvm_anyfloat_ty], - [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem, Commutative] + [LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem, IntrSpeculatable, Commutative] >; // NOTE: these are internal interfaces. @@ -455,7 +457,7 @@ def int_siglongjmp : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], [IntrNoReturn]>; // Internal interface for object size checking def int_objectsize : Intrinsic<[llvm_anyint_ty], [llvm_anyptr_ty, llvm_i1_ty, llvm_i1_ty], - [IntrNoMem]>, + [IntrNoMem, IntrSpeculatable]>, GCCBuiltin<"__builtin_object_size">; //===--------------- Constrained Floating Point Intrinsics ----------------===// @@ -500,7 +502,7 @@ def int_expect : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, // // None of these intrinsics accesses memory at all. -let IntrProperties = [IntrNoMem] in { +let IntrProperties = [IntrNoMem, IntrSpeculatable] in { def int_bswap: Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>; def int_ctpop: Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>; def int_ctlz : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i1_ty]>; @@ -511,10 +513,11 @@ let IntrProperties = [IntrNoMem] in { //===------------------------ Debugger Intrinsics -------------------------===// // -// None of these intrinsics accesses memory at all...but that doesn't mean the -// optimizers can change them aggressively. Special handling needed in a few -// places. -let IntrProperties = [IntrNoMem] in { +// None of these intrinsics accesses memory at all...but that doesn't +// mean the optimizers can change them aggressively. Special handling +// needed in a few places. These synthetic intrinsics have no +// side-effects and just mark information about their operands. +let IntrProperties = [IntrNoMem, IntrSpeculatable] in { def int_dbg_declare : Intrinsic<[], [llvm_metadata_ty, llvm_metadata_ty, @@ -592,24 +595,24 @@ def int_adjust_trampoline : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty], // Expose the carry flag from add operations on two integrals. def int_sadd_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty], [LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem]>; + [IntrNoMem, IntrSpeculatable]>; def int_uadd_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty], [LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem]>; + [IntrNoMem, IntrSpeculatable]>; def int_ssub_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty], [LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem]>; + [IntrNoMem, IntrSpeculatable]>; def int_usub_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty], [LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem]>; + [IntrNoMem, IntrSpeculatable]>; def int_smul_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty], [LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem]>; + [IntrNoMem, IntrSpeculatable]>; def int_umul_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty], [LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem]>; + [IntrNoMem, IntrSpeculatable]>; //===------------------------- Memory Use Markers -------------------------===// // @@ -633,7 +636,7 @@ def int_invariant_end : Intrinsic<[], // it can be CSE only if memory didn't change between 2 barriers call, // which is valid. // The argument also can't be marked with 'returned' attribute, because -// it would remove barrier. +// it would remove barrier. def int_invariant_group_barrier : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty], [IntrReadMem, IntrArgMemOnly]>; @@ -758,14 +761,14 @@ def int_masked_load : Intrinsic<[llvm_anyvector_ty], [IntrReadMem, IntrArgMemOnly]>; def int_masked_gather: Intrinsic<[llvm_anyvector_ty], - [LLVMVectorOfPointersToElt<0>, llvm_i32_ty, + [LLVMVectorOfAnyPointersToElt<0>, llvm_i32_ty, LLVMVectorSameWidth<0, llvm_i1_ty>, LLVMMatchType<0>], [IntrReadMem]>; def int_masked_scatter: Intrinsic<[], [llvm_anyvector_ty, - LLVMVectorOfPointersToElt<0>, llvm_i32_ty, + LLVMVectorOfAnyPointersToElt<0>, llvm_i32_ty, LLVMVectorSameWidth<0, llvm_i1_ty>]>; def int_masked_expandload: Intrinsic<[llvm_anyvector_ty], diff --git a/include/llvm/IR/IntrinsicsX86.td b/include/llvm/IR/IntrinsicsX86.td index 97c756cf4b60..1c466e73eb1b 100644 --- a/include/llvm/IR/IntrinsicsX86.td +++ b/include/llvm/IR/IntrinsicsX86.td @@ -3221,6 +3221,29 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". } //===----------------------------------------------------------------------===// +// LWP +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_llwpcb : + GCCBuiltin<"__builtin_ia32_llwpcb">, + Intrinsic<[], [llvm_ptr_ty], []>; + def int_x86_slwpcb : + GCCBuiltin<"__builtin_ia32_slwpcb">, + Intrinsic<[llvm_ptr_ty], [], []>; + def int_x86_lwpins32 : + GCCBuiltin<"__builtin_ia32_lwpins32">, + Intrinsic<[llvm_i8_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; + def int_x86_lwpins64 : + GCCBuiltin<"__builtin_ia32_lwpins64">, + Intrinsic<[llvm_i8_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; + def int_x86_lwpval32 : + GCCBuiltin<"__builtin_ia32_lwpval32">, + Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; + def int_x86_lwpval64 : + GCCBuiltin<"__builtin_ia32_lwpval64">, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +} + +//===----------------------------------------------------------------------===// // MMX // Empty MMX state op. diff --git a/include/llvm/Support/BinaryStreamArray.h b/include/llvm/Support/BinaryStreamArray.h index 748a62be231e..f141c30f16c7 100644 --- a/include/llvm/Support/BinaryStreamArray.h +++ b/include/llvm/Support/BinaryStreamArray.h @@ -42,99 +42,34 @@ namespace llvm { /// having to specify a second template argument to VarStreamArray (documented /// below). template <typename T> struct VarStreamArrayExtractor { - typedef void Context; + struct ContextType {}; // Method intentionally deleted. You must provide an explicit specialization - // with the following method implemented. - static Error extract(BinaryStreamRef Stream, uint32_t &Len, T &Item, - Context *Ctx) = delete; -}; - -/// VarStreamArray represents an array of variable length records backed by a -/// stream. This could be a contiguous sequence of bytes in memory, it could -/// be a file on disk, or it could be a PDB stream where bytes are stored as -/// discontiguous blocks in a file. Usually it is desirable to treat arrays -/// as contiguous blocks of memory, but doing so with large PDB files, for -/// example, could mean allocating huge amounts of memory just to allow -/// re-ordering of stream data to be contiguous before iterating over it. By -/// abstracting this out, we need not duplicate this memory, and we can -/// iterate over arrays in arbitrarily formatted streams. Elements are parsed -/// lazily on iteration, so there is no upfront cost associated with building -/// or copying a VarStreamArray, no matter how large it may be. -/// -/// You create a VarStreamArray by specifying a ValueType and an Extractor type. -/// If you do not specify an Extractor type, you are expected to specialize -/// VarStreamArrayExtractor<T> for your ValueType. -/// -/// The default extractor type is stateless, but by specializing -/// VarStreamArrayExtractor or defining your own custom extractor type and -/// adding the appropriate ContextType typedef to the class, you can pass a -/// context field during construction of the VarStreamArray that will be -/// passed to each call to extract. -/// -template <typename ValueType, typename ExtractorType> -class VarStreamArrayIterator; - -template <typename ValueType, - typename ExtractorType = VarStreamArrayExtractor<ValueType>> -class VarStreamArray { -public: - typedef typename ExtractorType::ContextType ContextType; - typedef VarStreamArrayIterator<ValueType, ExtractorType> Iterator; - friend Iterator; - - VarStreamArray() = default; - - explicit VarStreamArray(BinaryStreamRef Stream, - ContextType *Context = nullptr) - : Stream(Stream), Context(Context) {} - - VarStreamArray(const VarStreamArray<ValueType, ExtractorType> &Other) - : Stream(Other.Stream), Context(Other.Context) {} - - Iterator begin(bool *HadError = nullptr) const { - if (empty()) - return end(); - - return Iterator(*this, Context, HadError); - } - - Iterator end() const { return Iterator(); } - - bool empty() const { return Stream.getLength() == 0; } - - /// \brief given an offset into the array's underlying stream, return an - /// iterator to the record at that offset. This is considered unsafe - /// since the behavior is undefined if \p Offset does not refer to the - /// beginning of a valid record. - Iterator at(uint32_t Offset) const { - return Iterator(*this, Context, Stream.drop_front(Offset), nullptr); - } - - BinaryStreamRef getUnderlyingStream() const { return Stream; } + // with one of the following two methods implemented. + static Error extract(BinaryStreamRef Stream, uint32_t &Len, T &Item) = delete; -private: - BinaryStreamRef Stream; - ContextType *Context = nullptr; + static Error extract(BinaryStreamRef Stream, uint32_t &Len, T &Item, + const ContextType &Ctx) = delete; }; -template <typename ValueType, typename ExtractorType> +template <typename ArrayType, typename Value, typename Extractor, + typename WrappedCtx> class VarStreamArrayIterator : public iterator_facade_base< - VarStreamArrayIterator<ValueType, ExtractorType>, - std::forward_iterator_tag, ValueType> { - typedef typename ExtractorType::ContextType ContextType; - typedef VarStreamArrayIterator<ValueType, ExtractorType> IterType; - typedef VarStreamArray<ValueType, ExtractorType> ArrayType; + VarStreamArrayIterator<ArrayType, Value, Extractor, WrappedCtx>, + std::forward_iterator_tag, Value> { + typedef VarStreamArrayIterator<ArrayType, Value, Extractor, WrappedCtx> + IterType; public: - VarStreamArrayIterator(const ArrayType &Array, ContextType *Context, + VarStreamArrayIterator() = default; + VarStreamArrayIterator(const ArrayType &Array, const WrappedCtx &Ctx, BinaryStreamRef Stream, bool *HadError = nullptr) - : IterRef(Stream), Context(Context), Array(&Array), HadError(HadError) { + : IterRef(Stream), Ctx(&Ctx), Array(&Array), HadError(HadError) { if (IterRef.getLength() == 0) moveToEnd(); else { - auto EC = ExtractorType::extract(IterRef, ThisLen, ThisValue, Context); + auto EC = Ctx.template invoke<Extractor>(IterRef, ThisLen, ThisValue); if (EC) { consumeError(std::move(EC)); markError(); @@ -142,11 +77,13 @@ public: } } - VarStreamArrayIterator(const ArrayType &Array, ContextType *Context, + VarStreamArrayIterator(const ArrayType &Array, const WrappedCtx &Ctx, bool *HadError = nullptr) - : VarStreamArrayIterator(Array, Context, Array.Stream, HadError) {} + : VarStreamArrayIterator(Array, Ctx, Array.Stream, HadError) {} + + VarStreamArrayIterator(const WrappedCtx &Ctx) : Ctx(&Ctx) {} + VarStreamArrayIterator(const VarStreamArrayIterator &Other) = default; - VarStreamArrayIterator() = default; ~VarStreamArrayIterator() = default; bool operator==(const IterType &R) const { @@ -164,12 +101,12 @@ public: return false; } - const ValueType &operator*() const { + const Value &operator*() const { assert(Array && !HasError); return ThisValue; } - ValueType &operator*() { + Value &operator*() { assert(Array && !HasError); return ThisValue; } @@ -185,7 +122,7 @@ public: moveToEnd(); } else { // There is some data after the current record. - auto EC = ExtractorType::extract(IterRef, ThisLen, ThisValue, Context); + auto EC = Ctx->template invoke<Extractor>(IterRef, ThisLen, ThisValue); if (EC) { consumeError(std::move(EC)); markError(); @@ -210,15 +147,136 @@ private: *HadError = true; } - ValueType ThisValue; + Value ThisValue; BinaryStreamRef IterRef; - ContextType *Context{nullptr}; + const WrappedCtx *Ctx{nullptr}; const ArrayType *Array{nullptr}; uint32_t ThisLen{0}; bool HasError{false}; bool *HadError{nullptr}; }; +template <typename T, typename Context> struct ContextWrapper { + ContextWrapper() = default; + + explicit ContextWrapper(Context &&Ctx) : Ctx(Ctx) {} + + template <typename Extractor> + Error invoke(BinaryStreamRef Stream, uint32_t &Len, T &Item) const { + return Extractor::extract(Stream, Len, Item, Ctx); + } + + Context Ctx; +}; + +template <typename T> struct ContextWrapper<T, void> { + ContextWrapper() = default; + + template <typename Extractor> + Error invoke(BinaryStreamRef Stream, uint32_t &Len, T &Item) const { + return Extractor::extract(Stream, Len, Item); + } +}; + +/// VarStreamArray represents an array of variable length records backed by a +/// stream. This could be a contiguous sequence of bytes in memory, it could +/// be a file on disk, or it could be a PDB stream where bytes are stored as +/// discontiguous blocks in a file. Usually it is desirable to treat arrays +/// as contiguous blocks of memory, but doing so with large PDB files, for +/// example, could mean allocating huge amounts of memory just to allow +/// re-ordering of stream data to be contiguous before iterating over it. By +/// abstracting this out, we need not duplicate this memory, and we can +/// iterate over arrays in arbitrarily formatted streams. Elements are parsed +/// lazily on iteration, so there is no upfront cost associated with building +/// or copying a VarStreamArray, no matter how large it may be. +/// +/// You create a VarStreamArray by specifying a ValueType and an Extractor type. +/// If you do not specify an Extractor type, you are expected to specialize +/// VarStreamArrayExtractor<T> for your ValueType. +/// +/// The default extractor type is stateless, but by specializing +/// VarStreamArrayExtractor or defining your own custom extractor type and +/// adding the appropriate ContextType typedef to the class, you can pass a +/// context field during construction of the VarStreamArray that will be +/// passed to each call to extract. +/// +template <typename Value, typename Extractor, typename WrappedCtx> +class VarStreamArrayBase { + typedef VarStreamArrayBase<Value, Extractor, WrappedCtx> MyType; + +public: + typedef VarStreamArrayIterator<MyType, Value, Extractor, WrappedCtx> Iterator; + friend Iterator; + + VarStreamArrayBase() = default; + + VarStreamArrayBase(BinaryStreamRef Stream, const WrappedCtx &Ctx) + : Stream(Stream), Ctx(Ctx) {} + + VarStreamArrayBase(const MyType &Other) + : Stream(Other.Stream), Ctx(Other.Ctx) {} + + Iterator begin(bool *HadError = nullptr) const { + if (empty()) + return end(); + + return Iterator(*this, Ctx, Stream, HadError); + } + + bool valid() const { return Stream.valid(); } + + Iterator end() const { return Iterator(Ctx); } + + bool empty() const { return Stream.getLength() == 0; } + + /// \brief given an offset into the array's underlying stream, return an + /// iterator to the record at that offset. This is considered unsafe + /// since the behavior is undefined if \p Offset does not refer to the + /// beginning of a valid record. + Iterator at(uint32_t Offset) const { + return Iterator(*this, Ctx, Stream.drop_front(Offset), nullptr); + } + + BinaryStreamRef getUnderlyingStream() const { return Stream; } + +private: + BinaryStreamRef Stream; + WrappedCtx Ctx; +}; + +template <typename Value, typename Extractor, typename Context> +class VarStreamArrayImpl + : public VarStreamArrayBase<Value, Extractor, + ContextWrapper<Value, Context>> { + typedef ContextWrapper<Value, Context> WrappedContext; + typedef VarStreamArrayImpl<Value, Extractor, Context> MyType; + typedef VarStreamArrayBase<Value, Extractor, WrappedContext> BaseType; + +public: + typedef Context ContextType; + + VarStreamArrayImpl() = default; + VarStreamArrayImpl(BinaryStreamRef Stream, Context &&Ctx) + : BaseType(Stream, WrappedContext(std::forward<Context>(Ctx))) {} +}; + +template <typename Value, typename Extractor> +class VarStreamArrayImpl<Value, Extractor, void> + : public VarStreamArrayBase<Value, Extractor, ContextWrapper<Value, void>> { + typedef ContextWrapper<Value, void> WrappedContext; + typedef VarStreamArrayImpl<Value, Extractor, void> MyType; + typedef VarStreamArrayBase<Value, Extractor, WrappedContext> BaseType; + +public: + VarStreamArrayImpl() = default; + VarStreamArrayImpl(BinaryStreamRef Stream) + : BaseType(Stream, WrappedContext()) {} +}; + +template <typename Value, typename Extractor = VarStreamArrayExtractor<Value>> +using VarStreamArray = + VarStreamArrayImpl<Value, Extractor, typename Extractor::ContextType>; + template <typename T> class FixedStreamArrayIterator; /// FixedStreamArray is similar to VarStreamArray, except with each record diff --git a/include/llvm/Support/BinaryStreamReader.h b/include/llvm/Support/BinaryStreamReader.h index f30d82d81b25..77738077f5ff 100644 --- a/include/llvm/Support/BinaryStreamReader.h +++ b/include/llvm/Support/BinaryStreamReader.h @@ -31,6 +31,7 @@ namespace llvm { /// are overridable. class BinaryStreamReader { public: + BinaryStreamReader() = default; explicit BinaryStreamReader(BinaryStreamRef Stream); virtual ~BinaryStreamReader() {} @@ -172,13 +173,29 @@ public: /// \returns a success error code if the data was successfully read, otherwise /// returns an appropriate error code. template <typename T, typename U> - Error - readArray(VarStreamArray<T, U> &Array, uint32_t Size, - typename VarStreamArray<T, U>::ContextType *Context = nullptr) { + Error readArray(VarStreamArray<T, U> &Array, uint32_t Size) { BinaryStreamRef S; if (auto EC = readStreamRef(S, Size)) return EC; - Array = VarStreamArray<T, U>(S, Context); + Array = VarStreamArray<T, U>(S); + return Error::success(); + } + + /// Read a VarStreamArray of size \p Size bytes and store the result into + /// \p Array. Updates the stream's offset to point after the newly read + /// array. Never causes a copy (although iterating the elements of the + /// VarStreamArray may, depending upon the implementation of the underlying + /// stream). + /// + /// \returns a success error code if the data was successfully read, otherwise + /// returns an appropriate error code. + template <typename T, typename U, typename ContextType> + Error readArray(VarStreamArray<T, U> &Array, uint32_t Size, + ContextType &&Context) { + BinaryStreamRef S; + if (auto EC = readStreamRef(S, Size)) + return EC; + Array = VarStreamArray<T, U>(S, std::move(Context)); return Error::success(); } @@ -227,6 +244,9 @@ public: /// \returns the next byte in the stream. uint8_t peek() const; + std::pair<BinaryStreamReader, BinaryStreamReader> + split(uint32_t Offset) const; + private: BinaryStreamRef Stream; uint32_t Offset; diff --git a/include/llvm/Support/BinaryStreamRef.h b/include/llvm/Support/BinaryStreamRef.h index 23ce02fd7ca4..465e724a6886 100644 --- a/include/llvm/Support/BinaryStreamRef.h +++ b/include/llvm/Support/BinaryStreamRef.h @@ -98,6 +98,9 @@ public: BinaryStreamRef(BinaryStreamRef &S, uint32_t Offset, uint32_t Length) = delete; + /// Check if a Stream is valid. + bool valid() const { return Stream != nullptr; } + /// Given an Offset into this StreamRef and a Size, return a reference to a /// buffer owned by the stream. /// diff --git a/include/llvm/Support/BinaryStreamWriter.h b/include/llvm/Support/BinaryStreamWriter.h index 6734a797ccc4..1b61c32a2541 100644 --- a/include/llvm/Support/BinaryStreamWriter.h +++ b/include/llvm/Support/BinaryStreamWriter.h @@ -20,6 +20,7 @@ #include "llvm/Support/Error.h" #include <cstdint> #include <type_traits> +#include <utility> namespace llvm { @@ -30,8 +31,6 @@ namespace llvm { /// although no methods are overridable. class BinaryStreamWriter { public: - // FIXME: We should be able to slice and drop_front etc on Writers / Readers. - BinaryStreamWriter() = default; explicit BinaryStreamWriter(WritableBinaryStreamRef Stream); virtual ~BinaryStreamWriter() {} @@ -152,6 +151,9 @@ public: return writeStreamRef(Array.getUnderlyingStream()); } + /// Splits the Writer into two Writers at a given offset. + std::pair<BinaryStreamWriter, BinaryStreamWriter> split(uint32_t Off) const; + void setOffset(uint32_t Off) { Offset = Off; } uint32_t getOffset() const { return Offset; } uint32_t getLength() const { return Stream.getLength(); } diff --git a/include/llvm/Support/DataExtractor.h b/include/llvm/Support/DataExtractor.h index 2d1180c228e3..380b628fd95f 100644 --- a/include/llvm/Support/DataExtractor.h +++ b/include/llvm/Support/DataExtractor.h @@ -58,6 +58,28 @@ public: /// NULL will be returned. const char *getCStr(uint32_t *offset_ptr) const; + /// Extract a C string from \a *OffsetPtr. + /// + /// Returns a StringRef for the C String from the data at the offset + /// pointed to by \a OffsetPtr. A variable length NULL terminated C + /// string will be extracted and the \a OffsetPtr will be + /// updated with the offset of the byte that follows the NULL + /// terminator byte. + /// + /// \param[in,out] OffsetPtr + /// A pointer to an offset within the data that will be advanced + /// by the appropriate number of bytes if the value is extracted + /// correctly. If the offset is out of bounds or there are not + /// enough bytes to extract this value, the offset will be left + /// unmodified. + /// + /// \return + /// A StringRef for the C string value in the data. If the offset + /// pointed to by \a OffsetPtr is out of bounds, or if the + /// offset plus the length of the C string is out of bounds, + /// a default-initialized StringRef will be returned. + StringRef getCStrRef(uint32_t *OffsetPtr) const; + /// Extract an unsigned integer of size \a byte_size from \a /// *offset_ptr. /// diff --git a/include/llvm/Support/ELFRelocs/Hexagon.def b/include/llvm/Support/ELFRelocs/Hexagon.def index 74e1d405cebd..5021e2b26ce5 100644 --- a/include/llvm/Support/ELFRelocs/Hexagon.def +++ b/include/llvm/Support/ELFRelocs/Hexagon.def @@ -99,3 +99,8 @@ ELF_RELOC(R_HEX_LD_GOT_32_6_X, 91) ELF_RELOC(R_HEX_LD_GOT_16_X, 92) ELF_RELOC(R_HEX_LD_GOT_11_X, 93) ELF_RELOC(R_HEX_23_REG, 94) +ELF_RELOC(R_HEX_GD_PLT_B22_PCREL_X, 95) +ELF_RELOC(R_HEX_GD_PLT_B32_PCREL_X, 96) +ELF_RELOC(R_HEX_LD_PLT_B22_PCREL_X, 97) +ELF_RELOC(R_HEX_LD_PLT_B32_PCREL_X, 98) +ELF_RELOC(R_HEX_27_REG, 99) diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index aa9230044b1f..ced183852146 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -2061,6 +2061,14 @@ public: return false; } + // Return true if the instruction that performs a << b actually performs + // a << (b % (sizeof(a) * 8)). + virtual bool supportsModuloShift(ISD::NodeType Inst, EVT ReturnType) const { + assert((Inst == ISD::SHL || Inst == ISD::SRA || Inst == ISD::SRL) && + "Expect a shift instruction"); + return false; + } + //===--------------------------------------------------------------------===// // Runtime Library hooks // diff --git a/lib/Analysis/CFLGraph.h b/lib/Analysis/CFLGraph.h index 06410bf01dd6..a8fb12b72568 100644 --- a/lib/Analysis/CFLGraph.h +++ b/lib/Analysis/CFLGraph.h @@ -429,7 +429,7 @@ template <typename CFLAA> class CFLGraphBuilder { if (Inst->getType()->isPointerTy()) { auto *Fn = CS.getCalledFunction(); - if (Fn == nullptr || !Fn->doesNotAlias(AttributeList::ReturnIndex)) + if (Fn == nullptr || !Fn->returnDoesNotAlias()) // No need to call addNode() since we've added Inst at the // beginning of this function and we know it is not a global. Graph.addAttr(InstantiatedValue{Inst, 0}, getAttrUnknown()); diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index 2f25a1183668..7aa6abf8fa48 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -4056,13 +4056,20 @@ static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask, unsigned MaskNumElts = Mask->getType()->getVectorNumElements(); unsigned InVecNumElts = InVecTy->getVectorNumElements(); + auto *Op0Const = dyn_cast<Constant>(Op0); + auto *Op1Const = dyn_cast<Constant>(Op1); + + // If all operands are constant, constant fold the shuffle. + if (Op0Const && Op1Const) + return ConstantFoldShuffleVectorInstruction(Op0Const, Op1Const, Mask); + SmallVector<int, 32> Indices; ShuffleVectorInst::getShuffleMask(Mask, Indices); assert(MaskNumElts == Indices.size() && "Size of Indices not same as number of mask elements?"); - // Canonicalization: If mask does not select elements from an input vector, - // replace that input vector with undef. + // If only one of the operands is constant, constant fold the shuffle if the + // mask does not select elements from the variable operand. bool MaskSelects0 = false, MaskSelects1 = false; for (unsigned i = 0; i != MaskNumElts; ++i) { if (Indices[i] == -1) @@ -4072,39 +4079,23 @@ static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask, else MaskSelects1 = true; } - if (!MaskSelects0) - Op0 = UndefValue::get(InVecTy); - if (!MaskSelects1) - Op1 = UndefValue::get(InVecTy); - - auto *Op0Const = dyn_cast<Constant>(Op0); - auto *Op1Const = dyn_cast<Constant>(Op1); - - // If all operands are constant, constant fold the shuffle. - if (Op0Const && Op1Const) - return ConstantFoldShuffleVectorInstruction(Op0Const, Op1Const, Mask); - - // Canonicalization: if only one input vector is constant, it shall be the - // second one. - if (Op0Const && !Op1Const) { - std::swap(Op0, Op1); - for (auto &Idx : Indices) { - if (Idx == -1) - continue; - Idx = Idx < (int)MaskNumElts ? Idx + MaskNumElts : Idx - MaskNumElts; - } - Mask = ConstantDataVector::get( - Mask->getContext(), - makeArrayRef(reinterpret_cast<uint32_t *>(Indices.data()), - MaskNumElts)); - } + if (!MaskSelects0 && Op1Const) + return ConstantFoldShuffleVectorInstruction(UndefValue::get(InVecTy), + Op1Const, Mask); + if (!MaskSelects1 && Op0Const) + return ConstantFoldShuffleVectorInstruction(Op0Const, + UndefValue::get(InVecTy), Mask); // A shuffle of a splat is always the splat itself. Legal if the shuffle's // value type is same as the input vectors' type. if (auto *OpShuf = dyn_cast<ShuffleVectorInst>(Op0)) - if (isa<UndefValue>(Op1) && RetTy == InVecTy && + if (!MaskSelects1 && RetTy == InVecTy && OpShuf->getMask()->getSplatValue()) return Op0; + if (auto *OpShuf = dyn_cast<ShuffleVectorInst>(Op1)) + if (!MaskSelects0 && RetTy == InVecTy && + OpShuf->getMask()->getSplatValue()) + return Op1; // Don't fold a shuffle with undef mask elements. This may get folded in a // better way using demanded bits or other analysis. diff --git a/lib/Analysis/ModuleSummaryAnalysis.cpp b/lib/Analysis/ModuleSummaryAnalysis.cpp index f6d9a73e4e9a..a83412506a07 100644 --- a/lib/Analysis/ModuleSummaryAnalysis.cpp +++ b/lib/Analysis/ModuleSummaryAnalysis.cpp @@ -451,12 +451,6 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex( auto &Summary = GlobalList.second[0]; bool AllRefsCanBeExternallyReferenced = llvm::all_of(Summary->refs(), [&](const ValueInfo &VI) { - // If a global value definition references an unnamed global, - // be conservative. They're valid IR so we don't want to crash - // when we encounter any of them but they're infrequent enough - // that we don't bother optimizing them. - if (!VI.getValue()->hasName()) - return false; return !CantBePromoted.count(VI.getValue()->getGUID()); }); if (!AllRefsCanBeExternallyReferenced) { diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index dc151f232670..6ec175fc84e2 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -3320,67 +3320,10 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V, case Instruction::Call: { auto *CI = cast<const CallInst>(Inst); const Function *Callee = CI->getCalledFunction(); - if (Callee && Callee->isSpeculatable()) - return true; - if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { - switch (II->getIntrinsicID()) { - // These synthetic intrinsics have no side-effects and just mark - // information about their operands. - // FIXME: There are other no-op synthetic instructions that potentially - // should be considered at least *safe* to speculate... - // FIXME: The speculatable attribute should be added to all these - // intrinsics and this case statement should be removed. - case Intrinsic::dbg_declare: - case Intrinsic::dbg_value: - return true; - case Intrinsic::bitreverse: - case Intrinsic::bswap: - case Intrinsic::ctlz: - case Intrinsic::ctpop: - case Intrinsic::cttz: - case Intrinsic::objectsize: - case Intrinsic::sadd_with_overflow: - case Intrinsic::smul_with_overflow: - case Intrinsic::ssub_with_overflow: - case Intrinsic::uadd_with_overflow: - case Intrinsic::umul_with_overflow: - case Intrinsic::usub_with_overflow: - return true; - // These intrinsics are defined to have the same behavior as libm - // functions except for setting errno. - case Intrinsic::sqrt: - case Intrinsic::fma: - case Intrinsic::fmuladd: - return true; - // These intrinsics are defined to have the same behavior as libm - // functions, and the corresponding libm functions never set errno. - case Intrinsic::trunc: - case Intrinsic::copysign: - case Intrinsic::fabs: - case Intrinsic::minnum: - case Intrinsic::maxnum: - return true; - // These intrinsics are defined to have the same behavior as libm - // functions, which never overflow when operating on the IEEE754 types - // that we support, and never set errno otherwise. - case Intrinsic::ceil: - case Intrinsic::floor: - case Intrinsic::nearbyint: - case Intrinsic::rint: - case Intrinsic::round: - return true; - // These intrinsics do not correspond to any libm function, and - // do not set errno. - case Intrinsic::powi: - return true; - // TODO: are convert_{from,to}_fp16 safe? - // TODO: can we list target-specific intrinsics here? - default: break; - } - } - return false; // The called function could have undefined behavior or - // side-effects, even if marked readnone nounwind. + // The called function could have undefined behavior or side-effects, even + // if marked readnone nounwind. + return Callee && Callee->isSpeculatable(); } case Instruction::VAArg: case Instruction::Alloca: diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index 8aa7d0daf070..485d9b6ac0bc 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -340,146 +340,28 @@ public: // in writing out the call graph edges. Save the mapping from GUID // to the new global value id to use when writing those edges, which // are currently saved in the index in terms of GUID. - for (const auto &I : *this) + forEachSummary([&](GVInfo I) { GUIDToValueIdMap[I.first] = ++GlobalValueId; + }); } /// The below iterator returns the GUID and associated summary. typedef std::pair<GlobalValue::GUID, GlobalValueSummary *> GVInfo; - /// Iterator over the value GUID and summaries to be written to bitcode, - /// hides the details of whether they are being pulled from the entire - /// index or just those in a provided ModuleToSummariesForIndex map. - class iterator - : public llvm::iterator_facade_base<iterator, std::forward_iterator_tag, - GVInfo> { - /// Enables access to parent class. - const IndexBitcodeWriter &Writer; - - // Iterators used when writing only those summaries in a provided - // ModuleToSummariesForIndex map: - - /// Points to the last element in outer ModuleToSummariesForIndex map. - std::map<std::string, GVSummaryMapTy>::const_iterator ModuleSummariesBack; - /// Iterator on outer ModuleToSummariesForIndex map. - std::map<std::string, GVSummaryMapTy>::const_iterator ModuleSummariesIter; - /// Iterator on an inner global variable summary map. - GVSummaryMapTy::const_iterator ModuleGVSummariesIter; - - // Iterators used when writing all summaries in the index: - - /// Points to the last element in the Index outer GlobalValueMap. - const_gvsummary_iterator IndexSummariesBack; - /// Iterator on outer GlobalValueMap. - const_gvsummary_iterator IndexSummariesIter; - /// Iterator on an inner GlobalValueSummaryList. - GlobalValueSummaryList::const_iterator IndexGVSummariesIter; - - public: - /// Construct iterator from parent \p Writer and indicate if we are - /// constructing the end iterator. - iterator(const IndexBitcodeWriter &Writer, bool IsAtEnd) : Writer(Writer) { - // Set up the appropriate set of iterators given whether we are writing - // the full index or just a subset. - // Can't setup the Back or inner iterators if the corresponding map - // is empty. This will be handled specially in operator== as well. - if (Writer.ModuleToSummariesForIndex && - !Writer.ModuleToSummariesForIndex->empty()) { - for (ModuleSummariesBack = Writer.ModuleToSummariesForIndex->begin(); - std::next(ModuleSummariesBack) != - Writer.ModuleToSummariesForIndex->end(); - ModuleSummariesBack++) - ; - ModuleSummariesIter = !IsAtEnd - ? Writer.ModuleToSummariesForIndex->begin() - : ModuleSummariesBack; - ModuleGVSummariesIter = !IsAtEnd ? ModuleSummariesIter->second.begin() - : ModuleSummariesBack->second.end(); - } else if (!Writer.ModuleToSummariesForIndex && - Writer.Index.begin() != Writer.Index.end()) { - for (IndexSummariesBack = Writer.Index.begin(); - std::next(IndexSummariesBack) != Writer.Index.end(); - IndexSummariesBack++) - ; - IndexSummariesIter = - !IsAtEnd ? Writer.Index.begin() : IndexSummariesBack; - IndexGVSummariesIter = !IsAtEnd ? IndexSummariesIter->second.begin() - : IndexSummariesBack->second.end(); - } - } - - /// Increment the appropriate set of iterators. - iterator &operator++() { - // First the inner iterator is incremented, then if it is at the end - // and there are more outer iterations to go, the inner is reset to - // the start of the next inner list. - if (Writer.ModuleToSummariesForIndex) { - ++ModuleGVSummariesIter; - if (ModuleGVSummariesIter == ModuleSummariesIter->second.end() && - ModuleSummariesIter != ModuleSummariesBack) { - ++ModuleSummariesIter; - ModuleGVSummariesIter = ModuleSummariesIter->second.begin(); - } - } else { - ++IndexGVSummariesIter; - if (IndexGVSummariesIter == IndexSummariesIter->second.end() && - IndexSummariesIter != IndexSummariesBack) { - ++IndexSummariesIter; - IndexGVSummariesIter = IndexSummariesIter->second.begin(); - } - } - return *this; - } - - /// Access the <GUID,GlobalValueSummary*> pair corresponding to the current - /// outer and inner iterator positions. - GVInfo operator*() { - if (Writer.ModuleToSummariesForIndex) - return std::make_pair(ModuleGVSummariesIter->first, - ModuleGVSummariesIter->second); - return std::make_pair(IndexSummariesIter->first, - IndexGVSummariesIter->get()); - } - - /// Checks if the iterators are equal, with special handling for empty - /// indexes. - bool operator==(const iterator &RHS) const { - if (Writer.ModuleToSummariesForIndex) { - // First ensure that both are writing the same subset. - if (Writer.ModuleToSummariesForIndex != - RHS.Writer.ModuleToSummariesForIndex) - return false; - // Already determined above that maps are the same, so if one is - // empty, they both are. - if (Writer.ModuleToSummariesForIndex->empty()) - return true; - // Ensure the ModuleGVSummariesIter are iterating over the same - // container before checking them below. - if (ModuleSummariesIter != RHS.ModuleSummariesIter) - return false; - return ModuleGVSummariesIter == RHS.ModuleGVSummariesIter; - } - // First ensure RHS also writing the full index, and that both are - // writing the same full index. - if (RHS.Writer.ModuleToSummariesForIndex || - &Writer.Index != &RHS.Writer.Index) - return false; - // Already determined above that maps are the same, so if one is - // empty, they both are. - if (Writer.Index.begin() == Writer.Index.end()) - return true; - // Ensure the IndexGVSummariesIter are iterating over the same - // container before checking them below. - if (IndexSummariesIter != RHS.IndexSummariesIter) - return false; - return IndexGVSummariesIter == RHS.IndexGVSummariesIter; + /// Calls the callback for each value GUID and summary to be written to + /// bitcode. This hides the details of whether they are being pulled from the + /// entire index or just those in a provided ModuleToSummariesForIndex map. + void forEachSummary(std::function<void(GVInfo)> Callback) { + if (ModuleToSummariesForIndex) { + for (auto &M : *ModuleToSummariesForIndex) + for (auto &Summary : M.second) + Callback(Summary); + } else { + for (auto &Summaries : Index) + for (auto &Summary : Summaries.second) + Callback({Summaries.first, Summary.get()}); } - }; - - /// Obtain the start iterator over the summaries to be written. - iterator begin() { return iterator(*this, /*IsAtEnd=*/false); } - /// Obtain the end iterator over the summaries to be written. - iterator end() { return iterator(*this, /*IsAtEnd=*/true); } + } /// Main entry point for writing a combined index to bitcode. void write(); @@ -3528,16 +3410,16 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() { Stream.EmitRecord(bitc::FS_VERSION, ArrayRef<uint64_t>{INDEX_VERSION}); // Create value IDs for undefined references. - for (const auto &I : *this) { + forEachSummary([&](GVInfo I) { if (auto *VS = dyn_cast<GlobalVarSummary>(I.second)) { for (auto &RI : VS->refs()) assignValueId(RI.getGUID()); - continue; + return; } auto *FS = dyn_cast<FunctionSummary>(I.second); if (!FS) - continue; + return; for (auto &RI : FS->refs()) assignValueId(RI.getGUID()); @@ -3553,7 +3435,7 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() { } assignValueId(GUID); } - } + }); for (const auto &GVI : valueIds()) { Stream.EmitRecord(bitc::FS_VALUE_GUID, @@ -3624,7 +3506,7 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() { NameVals.clear(); }; - for (const auto &I : *this) { + forEachSummary([&](GVInfo I) { GlobalValueSummary *S = I.second; assert(S); @@ -3636,7 +3518,7 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() { // Will process aliases as a post-pass because the reader wants all // global to be loaded first. Aliases.push_back(AS); - continue; + return; } if (auto *VS = dyn_cast<GlobalVarSummary>(S)) { @@ -3652,7 +3534,7 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() { FSModRefsAbbrev); NameVals.clear(); MaybeEmitOriginalName(*S); - continue; + return; } auto *FS = cast<FunctionSummary>(S); @@ -3700,7 +3582,7 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() { Stream.EmitRecord(Code, NameVals, FSAbbrev); NameVals.clear(); MaybeEmitOriginalName(*S); - } + }); for (auto *AS : Aliases) { auto AliasValueId = SummaryToValueIdMap[AS]; diff --git a/lib/CodeGen/GlobalISel/CallLowering.cpp b/lib/CodeGen/GlobalISel/CallLowering.cpp index ebfe6cb3b733..be0c5c2bb70e 100644 --- a/lib/CodeGen/GlobalISel/CallLowering.cpp +++ b/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -37,7 +37,7 @@ bool CallLowering::lowerCall( for (auto &Arg : CS.args()) { ArgInfo OrigArg{ArgRegs[i], Arg->getType(), ISD::ArgFlagsTy{}, i < NumFixedArgs}; - setArgFlags(OrigArg, i + 1, DL, CS); + setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, CS); OrigArgs.push_back(OrigArg); ++i; } @@ -83,8 +83,8 @@ void CallLowering::setArgFlags(CallLowering::ArgInfo &Arg, unsigned OpIdx, // For ByVal, alignment should be passed from FE. BE will guess if // this info is not there but there are cases it cannot get right. unsigned FrameAlign; - if (FuncInfo.getParamAlignment(OpIdx - 1)) - FrameAlign = FuncInfo.getParamAlignment(OpIdx - 1); + if (FuncInfo.getParamAlignment(OpIdx - 2)) + FrameAlign = FuncInfo.getParamAlignment(OpIdx - 2); else FrameAlign = getTLI()->getByValTypeAlignment(ElementTy, DL); Arg.Flags.setByValAlign(FrameAlign); diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index 570a0cd0ba90..549f07ecd9ce 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -761,6 +761,9 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { } else if (MaxCSFrameIndex >= MinCSFrameIndex) { // Be careful about underflow in comparisons agains MinCSFrameIndex. for (unsigned i = MaxCSFrameIndex; i != MinCSFrameIndex - 1; --i) { + if (MFI.isDeadObjectIndex(i)) + continue; + unsigned Align = MFI.getObjectAlignment(i); // Adjust to alignment boundary Offset = alignTo(Offset, Align, Skew); diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index dc0276d57667..03698ac862af 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -242,6 +242,7 @@ namespace { SDValue visitUSUBO(SDNode *N); SDValue visitADDE(SDNode *N); SDValue visitADDCARRY(SDNode *N); + SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N); SDValue visitSUBE(SDNode *N); SDValue visitSUBCARRY(SDNode *N); SDValue visitMUL(SDNode *N); @@ -2142,6 +2143,24 @@ SDValue DAGCombiner::visitADDCARRY(SDNode *N) { if (isNullConstant(CarryIn)) return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(), N0, N1); + if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N)) + return Combined; + + if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N)) + return Combined; + + return SDValue(); +} + +SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, + SDNode *N) { + // Iff the flag result is dead: + // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry) + if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::UADDO) && + isNullConstant(N1) && !N->hasAnyUseOfValue(1)) + return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), + N0.getOperand(0), N0.getOperand(1), CarryIn); + return SDValue(); } @@ -5294,6 +5313,17 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { } } + // If the target supports masking y in (shl, y), + // fold (shl x, (and y, ((1 << numbits(x)) - 1))) -> (shl x, y) + if (TLI.isOperationLegal(ISD::SHL, VT) && + TLI.supportsModuloShift(ISD::SHL, VT) && N1->getOpcode() == ISD::AND) { + if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1))) { + if (Mask->getZExtValue() == OpSizeInBits - 1) { + return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N1->getOperand(0)); + } + } + } + ConstantSDNode *N1C = isConstOrConstSplat(N1); // fold (shl c1, c2) -> c1<<c2 @@ -5492,6 +5522,17 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { EVT VT = N0.getValueType(); unsigned OpSizeInBits = VT.getScalarSizeInBits(); + // If the target supports masking y in (sra, y), + // fold (sra x, (and y, ((1 << numbits(x)) - 1))) -> (sra x, y) + if (TLI.isOperationLegal(ISD::SRA, VT) && + TLI.supportsModuloShift(ISD::SRA, VT) && N1->getOpcode() == ISD::AND) { + if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1))) { + if (Mask->getZExtValue() == OpSizeInBits - 1) { + return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, N1->getOperand(0)); + } + } + } + // Arithmetic shifting an all-sign-bit value is a no-op. if (DAG.ComputeNumSignBits(N0) == OpSizeInBits) return N0; @@ -5650,6 +5691,17 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { EVT VT = N0.getValueType(); unsigned OpSizeInBits = VT.getScalarSizeInBits(); + // If the target supports masking y in (srl, y), + // fold (srl x, (and y, ((1 << numbits(x)) - 1))) -> (srl x, y) + if (TLI.isOperationLegal(ISD::SRL, VT) && + TLI.supportsModuloShift(ISD::SRL, VT) && N1->getOpcode() == ISD::AND) { + if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1))) { + if (Mask->getZExtValue() == OpSizeInBits - 1) { + return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1->getOperand(0)); + } + } + } + // fold vector ops if (VT.isVector()) if (SDValue FoldedVOp = SimplifyVBinOp(N)) diff --git a/lib/DebugInfo/CodeView/CMakeLists.txt b/lib/DebugInfo/CodeView/CMakeLists.txt index 421f22ca5d8d..410d5a3777d4 100644 --- a/lib/DebugInfo/CodeView/CMakeLists.txt +++ b/lib/DebugInfo/CodeView/CMakeLists.txt @@ -15,6 +15,7 @@ add_llvm_library(LLVMDebugInfoCodeView ModuleDebugLineFragment.cpp ModuleDebugUnknownFragment.cpp RecordSerialization.cpp + StringTable.cpp SymbolRecordMapping.cpp SymbolDumper.cpp SymbolSerializer.cpp diff --git a/lib/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.cpp b/lib/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.cpp index c349e7ecce96..42f0afc3e2d7 100644 --- a/lib/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.cpp +++ b/lib/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.cpp @@ -10,6 +10,7 @@ #include "llvm/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.h" #include "llvm/DebugInfo/CodeView/CodeViewError.h" +#include "llvm/DebugInfo/CodeView/StringTable.h" #include "llvm/Support/BinaryStreamReader.h" using namespace llvm; @@ -25,7 +26,7 @@ struct FileChecksumEntryHeader { }; Error llvm::VarStreamArrayExtractor<FileChecksumEntry>::extract( - BinaryStreamRef Stream, uint32_t &Len, FileChecksumEntry &Item, void *Ctx) { + BinaryStreamRef Stream, uint32_t &Len, FileChecksumEntry &Item) { BinaryStreamReader Reader(Stream); const FileChecksumEntryHeader *Header; @@ -49,10 +50,12 @@ Error ModuleDebugFileChecksumFragmentRef::initialize( return Error::success(); } -ModuleDebugFileChecksumFragment::ModuleDebugFileChecksumFragment() - : ModuleDebugFragment(ModuleDebugFragmentKind::FileChecksums) {} +ModuleDebugFileChecksumFragment::ModuleDebugFileChecksumFragment( + StringTable &Strings) + : ModuleDebugFragment(ModuleDebugFragmentKind::FileChecksums), + Strings(Strings) {} -void ModuleDebugFileChecksumFragment::addChecksum(uint32_t StringTableOffset, +void ModuleDebugFileChecksumFragment::addChecksum(StringRef FileName, FileChecksumKind Kind, ArrayRef<uint8_t> Bytes) { FileChecksumEntry Entry; @@ -61,13 +64,14 @@ void ModuleDebugFileChecksumFragment::addChecksum(uint32_t StringTableOffset, ::memcpy(Copy, Bytes.data(), Bytes.size()); Entry.Checksum = makeArrayRef(Copy, Bytes.size()); } - Entry.FileNameOffset = StringTableOffset; + + Entry.FileNameOffset = Strings.insert(FileName); Entry.Kind = Kind; Checksums.push_back(Entry); // This maps the offset of this string in the string table to the offset // of this checksum entry in the checksum buffer. - OffsetMap[StringTableOffset] = SerializedSize; + OffsetMap[Entry.FileNameOffset] = SerializedSize; assert(SerializedSize % 4 == 0); uint32_t Len = alignTo(sizeof(FileChecksumEntryHeader) + Bytes.size(), 4); @@ -94,9 +98,10 @@ Error ModuleDebugFileChecksumFragment::commit(BinaryStreamWriter &Writer) { return Error::success(); } -uint32_t ModuleDebugFileChecksumFragment::mapChecksumOffset( - uint32_t StringTableOffset) const { - auto Iter = OffsetMap.find(StringTableOffset); +uint32_t +ModuleDebugFileChecksumFragment::mapChecksumOffset(StringRef FileName) const { + uint32_t Offset = Strings.getStringId(FileName); + auto Iter = OffsetMap.find(Offset); assert(Iter != OffsetMap.end()); return Iter->second; } diff --git a/lib/DebugInfo/CodeView/ModuleDebugInlineeLinesFragment.cpp b/lib/DebugInfo/CodeView/ModuleDebugInlineeLinesFragment.cpp index 483f7cb5c5ad..cb6a8478797f 100644 --- a/lib/DebugInfo/CodeView/ModuleDebugInlineeLinesFragment.cpp +++ b/lib/DebugInfo/CodeView/ModuleDebugInlineeLinesFragment.cpp @@ -10,20 +10,22 @@ #include "llvm/DebugInfo/CodeView/ModuleDebugInlineeLinesFragment.h" #include "llvm/DebugInfo/CodeView/CodeViewError.h" +#include "llvm/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.h" #include "llvm/DebugInfo/CodeView/ModuleDebugFragmentRecord.h" +#include "llvm/DebugInfo/CodeView/StringTable.h" using namespace llvm; using namespace llvm::codeview; Error VarStreamArrayExtractor<InlineeSourceLine>::extract( BinaryStreamRef Stream, uint32_t &Len, InlineeSourceLine &Item, - ContextType *Fragment) { + bool HasExtraFiles) { BinaryStreamReader Reader(Stream); if (auto EC = Reader.readObject(Item.Header)) return EC; - if (Fragment->hasExtraFiles()) { + if (HasExtraFiles) { uint32_t ExtraFileCount; if (auto EC = Reader.readInteger(ExtraFileCount)) return EC; @@ -42,7 +44,8 @@ Error ModuleDebugInlineeLineFragmentRef::initialize(BinaryStreamReader Reader) { if (auto EC = Reader.readEnum(Signature)) return EC; - if (auto EC = Reader.readArray(Lines, Reader.bytesRemaining(), this)) + if (auto EC = + Reader.readArray(Lines, Reader.bytesRemaining(), hasExtraFiles())) return EC; assert(Reader.bytesRemaining() == 0); @@ -54,9 +57,9 @@ bool ModuleDebugInlineeLineFragmentRef::hasExtraFiles() const { } ModuleDebugInlineeLineFragment::ModuleDebugInlineeLineFragment( - bool HasExtraFiles) + ModuleDebugFileChecksumFragment &Checksums, bool HasExtraFiles) : ModuleDebugFragment(ModuleDebugFragmentKind::InlineeLines), - HasExtraFiles(HasExtraFiles) {} + Checksums(Checksums), HasExtraFiles(HasExtraFiles) {} uint32_t ModuleDebugInlineeLineFragment::calculateSerializedLength() { // 4 bytes for the signature @@ -99,18 +102,22 @@ Error ModuleDebugInlineeLineFragment::commit(BinaryStreamWriter &Writer) { return Error::success(); } -void ModuleDebugInlineeLineFragment::addExtraFile(uint32_t FileOffset) { +void ModuleDebugInlineeLineFragment::addExtraFile(StringRef FileName) { + uint32_t Offset = Checksums.mapChecksumOffset(FileName); + auto &Entry = Entries.back(); - Entry.ExtraFiles.push_back(ulittle32_t(FileOffset)); + Entry.ExtraFiles.push_back(ulittle32_t(Offset)); ++ExtraFileCount; } void ModuleDebugInlineeLineFragment::addInlineSite(TypeIndex FuncId, - uint32_t FileOffset, + StringRef FileName, uint32_t SourceLine) { + uint32_t Offset = Checksums.mapChecksumOffset(FileName); + Entries.emplace_back(); auto &Entry = Entries.back(); - Entry.Header.FileID = FileOffset; + Entry.Header.FileID = Offset; Entry.Header.SourceLineNum = SourceLine; Entry.Header.Inlinee = FuncId; } diff --git a/lib/DebugInfo/CodeView/ModuleDebugLineFragment.cpp b/lib/DebugInfo/CodeView/ModuleDebugLineFragment.cpp index 103010ca2833..e0ee934709ba 100644 --- a/lib/DebugInfo/CodeView/ModuleDebugLineFragment.cpp +++ b/lib/DebugInfo/CodeView/ModuleDebugLineFragment.cpp @@ -10,7 +10,9 @@ #include "llvm/DebugInfo/CodeView/ModuleDebugLineFragment.h" #include "llvm/DebugInfo/CodeView/CodeViewError.h" +#include "llvm/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.h" #include "llvm/DebugInfo/CodeView/ModuleDebugFragmentRecord.h" +#include "llvm/DebugInfo/CodeView/StringTable.h" using namespace llvm; using namespace llvm::codeview; @@ -65,11 +67,15 @@ bool ModuleDebugLineFragmentRef::hasColumnInfo() const { return !!(Header->Flags & LF_HaveColumns); } -ModuleDebugLineFragment::ModuleDebugLineFragment() - : ModuleDebugFragment(ModuleDebugFragmentKind::Lines) {} +ModuleDebugLineFragment::ModuleDebugLineFragment( + ModuleDebugFileChecksumFragment &Checksums, StringTable &Strings) + : ModuleDebugFragment(ModuleDebugFragmentKind::Lines), + Checksums(Checksums) {} -void ModuleDebugLineFragment::createBlock(uint32_t ChecksumBufferOffset) { - Blocks.emplace_back(ChecksumBufferOffset); +void ModuleDebugLineFragment::createBlock(StringRef FileName) { + uint32_t Offset = Checksums.mapChecksumOffset(FileName); + + Blocks.emplace_back(Offset); } void ModuleDebugLineFragment::addLineInfo(uint32_t Offset, diff --git a/lib/DebugInfo/CodeView/StringTable.cpp b/lib/DebugInfo/CodeView/StringTable.cpp new file mode 100644 index 000000000000..21f11204686b --- /dev/null +++ b/lib/DebugInfo/CodeView/StringTable.cpp @@ -0,0 +1,71 @@ +//===- StringTable.cpp - CodeView String Table Reader/Writer ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/CodeView/StringTable.h" + +#include "llvm/Support/BinaryStream.h" +#include "llvm/Support/BinaryStreamReader.h" +#include "llvm/Support/BinaryStreamWriter.h" + +using namespace llvm; +using namespace llvm::codeview; + +StringTableRef::StringTableRef() {} + +Error StringTableRef::initialize(BinaryStreamRef Contents) { + Stream = Contents; + return Error::success(); +} + +Expected<StringRef> StringTableRef::getString(uint32_t Offset) const { + BinaryStreamReader Reader(Stream); + Reader.setOffset(Offset); + StringRef Result; + if (auto EC = Reader.readCString(Result)) + return std::move(EC); + return Result; +} + +uint32_t StringTable::insert(StringRef S) { + auto P = Strings.insert({S, StringSize}); + + // If a given string didn't exist in the string table, we want to increment + // the string table size. + if (P.second) + StringSize += S.size() + 1; // +1 for '\0' + return P.first->second; +} + +uint32_t StringTable::calculateSerializedSize() const { return StringSize; } + +Error StringTable::commit(BinaryStreamWriter &Writer) const { + assert(Writer.bytesRemaining() == StringSize); + uint32_t MaxOffset = 1; + + for (auto &Pair : Strings) { + StringRef S = Pair.getKey(); + uint32_t Offset = Pair.getValue(); + Writer.setOffset(Offset); + if (auto EC = Writer.writeCString(S)) + return EC; + MaxOffset = std::max<uint32_t>(MaxOffset, Offset + S.size() + 1); + } + + Writer.setOffset(MaxOffset); + assert(Writer.bytesRemaining() == 0); + return Error::success(); +} + +uint32_t StringTable::size() const { return Strings.size(); } + +uint32_t StringTable::getStringId(StringRef S) const { + auto P = Strings.find(S); + assert(P != Strings.end()); + return P->second; +} diff --git a/lib/DebugInfo/CodeView/SymbolDumper.cpp b/lib/DebugInfo/CodeView/SymbolDumper.cpp index 134471e81cac..5395e4349b28 100644 --- a/lib/DebugInfo/CodeView/SymbolDumper.cpp +++ b/lib/DebugInfo/CodeView/SymbolDumper.cpp @@ -13,6 +13,7 @@ #include "llvm/DebugInfo/CodeView/CVSymbolVisitor.h" #include "llvm/DebugInfo/CodeView/CVTypeDumper.h" #include "llvm/DebugInfo/CodeView/EnumTables.h" +#include "llvm/DebugInfo/CodeView/StringTable.h" #include "llvm/DebugInfo/CodeView/SymbolDeserializer.h" #include "llvm/DebugInfo/CodeView/SymbolDumpDelegate.h" #include "llvm/DebugInfo/CodeView/SymbolRecord.h" @@ -369,14 +370,14 @@ Error CVSymbolDumperImpl::visitKnownRecord( DictScope S(W, "DefRangeSubfield"); if (ObjDelegate) { - StringRef StringTable = ObjDelegate->getStringTable(); - auto ProgramStringTableOffset = DefRangeSubfield.Program; - if (ProgramStringTableOffset >= StringTable.size()) + StringTableRef Strings = ObjDelegate->getStringTable(); + auto ExpectedProgram = Strings.getString(DefRangeSubfield.Program); + if (!ExpectedProgram) { + consumeError(ExpectedProgram.takeError()); return llvm::make_error<CodeViewError>( "String table offset outside of bounds of String Table!"); - StringRef Program = - StringTable.drop_front(ProgramStringTableOffset).split('\0').first; - W.printString("Program", Program); + } + W.printString("Program", *ExpectedProgram); } W.printNumber("OffsetInParent", DefRangeSubfield.OffsetInParent); printLocalVariableAddrRange(DefRangeSubfield.Range, @@ -390,14 +391,14 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, DictScope S(W, "DefRange"); if (ObjDelegate) { - StringRef StringTable = ObjDelegate->getStringTable(); - auto ProgramStringTableOffset = DefRange.Program; - if (ProgramStringTableOffset >= StringTable.size()) + StringTableRef Strings = ObjDelegate->getStringTable(); + auto ExpectedProgram = Strings.getString(DefRange.Program); + if (!ExpectedProgram) { + consumeError(ExpectedProgram.takeError()); return llvm::make_error<CodeViewError>( "String table offset outside of bounds of String Table!"); - StringRef Program = - StringTable.drop_front(ProgramStringTableOffset).split('\0').first; - W.printString("Program", Program); + } + W.printString("Program", *ExpectedProgram); } printLocalVariableAddrRange(DefRange.Range, DefRange.getRelocationOffset()); printLocalVariableAddrGap(DefRange.Gaps); diff --git a/lib/DebugInfo/DWARF/CMakeLists.txt b/lib/DebugInfo/DWARF/CMakeLists.txt index 495edb7b48db..6ca6e64bd8e6 100644 --- a/lib/DebugInfo/DWARF/CMakeLists.txt +++ b/lib/DebugInfo/DWARF/CMakeLists.txt @@ -19,6 +19,7 @@ add_llvm_library(LLVMDebugInfoDWARF DWARFTypeUnit.cpp DWARFUnitIndex.cpp DWARFUnit.cpp + DWARFVerifier.cpp SyntaxHighlighting.cpp ADDITIONAL_HEADER_DIRS diff --git a/lib/DebugInfo/DWARF/DWARFContext.cpp b/lib/DebugInfo/DWARF/DWARFContext.cpp index b4ecbf805d1e..573d37d77fee 100644 --- a/lib/DebugInfo/DWARF/DWARFContext.cpp +++ b/lib/DebugInfo/DWARF/DWARFContext.cpp @@ -7,17 +7,17 @@ // //===----------------------------------------------------------------------===// +#include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h" #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h" -#include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h" -#include "llvm/DebugInfo/DWARF/DWARFDebugAranges.h" #include "llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h" +#include "llvm/DebugInfo/DWARF/DWARFDebugAranges.h" #include "llvm/DebugInfo/DWARF/DWARFDebugFrame.h" #include "llvm/DebugInfo/DWARF/DWARFDebugLine.h" #include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h" @@ -29,6 +29,7 @@ #include "llvm/DebugInfo/DWARF/DWARFGdbIndex.h" #include "llvm/DebugInfo/DWARF/DWARFSection.h" #include "llvm/DebugInfo/DWARF/DWARFUnitIndex.h" +#include "llvm/DebugInfo/DWARF/DWARFVerifier.h" #include "llvm/Object/Decompressor.h" #include "llvm/Object/MachO.h" #include "llvm/Object/ObjectFile.h" @@ -42,6 +43,8 @@ #include "llvm/Support/raw_ostream.h" #include <algorithm> #include <cstdint> +#include <map> +#include <set> #include <string> #include <utility> #include <vector> @@ -284,11 +287,30 @@ void DWARFContext::dump(raw_ostream &OS, DIDumpType DumpType, bool DumpEH, getStringSection(), isLittleEndian()); } -bool DWARFContext::verify(raw_ostream &OS, DIDumpType DumpType) { - bool Success = true; - if (DumpType == DIDT_All || DumpType == DIDT_Info) { +DWARFDie DWARFContext::getDIEForOffset(uint32_t Offset) { + parseCompileUnits(); + if (auto *CU = CUs.getUnitForOffset(Offset)) + return CU->getDIEForOffset(Offset); + return DWARFDie(); +} + +namespace { + +class Verifier { + raw_ostream &OS; + DWARFContext &DCtx; +public: + Verifier(raw_ostream &S, DWARFContext &D) : OS(S), DCtx(D) {} + + bool HandleDebugInfo() { + bool Success = true; + // A map that tracks all references (converted absolute references) so we + // can verify each reference points to a valid DIE and not an offset that + // lies between to valid DIEs. + std::map<uint64_t, std::set<uint32_t>> ReferenceToDIEOffsets; + OS << "Verifying .debug_info...\n"; - for (const auto &CU : compile_units()) { + for (const auto &CU : DCtx.compile_units()) { unsigned NumDies = CU->getNumDIEs(); for (unsigned I = 0; I < NumDies; ++I) { auto Die = CU->getDIEAtIndex(I); @@ -299,101 +321,231 @@ bool DWARFContext::verify(raw_ostream &OS, DIDumpType DumpType) { const auto Attr = AttrValue.Attr; const auto Form = AttrValue.Value.getForm(); switch (Attr) { - case DW_AT_ranges: - // Make sure the offset in the DW_AT_ranges attribute is valid. - if (auto SectionOffset = AttrValue.Value.getAsSectionOffset()) { - if (*SectionOffset >= getRangeSection().Data.size()) { + case DW_AT_ranges: + // Make sure the offset in the DW_AT_ranges attribute is valid. + if (auto SectionOffset = AttrValue.Value.getAsSectionOffset()) { + if (*SectionOffset >= DCtx.getRangeSection().Data.size()) { + Success = false; + OS << "error: DW_AT_ranges offset is beyond .debug_ranges " + "bounds:\n"; + Die.dump(OS, 0); + OS << "\n"; + } + } else { Success = false; - OS << "error: DW_AT_ranges offset is beyond .debug_ranges " - "bounds:\n"; + OS << "error: DIE has invalid DW_AT_ranges encoding:\n"; Die.dump(OS, 0); OS << "\n"; } - } else { - Success = false; - OS << "error: DIE has invalid DW_AT_ranges encoding:\n"; - Die.dump(OS, 0); - OS << "\n"; - } - break; - case DW_AT_stmt_list: - // Make sure the offset in the DW_AT_stmt_list attribute is valid. - if (auto SectionOffset = AttrValue.Value.getAsSectionOffset()) { - if (*SectionOffset >= getLineSection().Data.size()) { + break; + case DW_AT_stmt_list: + // Make sure the offset in the DW_AT_stmt_list attribute is valid. + if (auto SectionOffset = AttrValue.Value.getAsSectionOffset()) { + if (*SectionOffset >= DCtx.getLineSection().Data.size()) { + Success = false; + OS << "error: DW_AT_stmt_list offset is beyond .debug_line " + "bounds: " + << format("0x%08" PRIx32, *SectionOffset) << "\n"; + CU->getUnitDIE().dump(OS, 0); + OS << "\n"; + } + } else { Success = false; - OS << "error: DW_AT_stmt_list offset is beyond .debug_line " - "bounds: " - << format("0x%08" PRIx32, *SectionOffset) << "\n"; - CU->getUnitDIE().dump(OS, 0); + OS << "error: DIE has invalid DW_AT_stmt_list encoding:\n"; + Die.dump(OS, 0); OS << "\n"; } - } else { - Success = false; - OS << "error: DIE has invalid DW_AT_stmt_list encoding:\n"; - Die.dump(OS, 0); - OS << "\n"; - } - break; - - default: - break; + break; + + default: + break; } switch (Form) { - case DW_FORM_ref1: - case DW_FORM_ref2: - case DW_FORM_ref4: - case DW_FORM_ref8: - case DW_FORM_ref_udata: { - // Verify all CU relative references are valid CU offsets. - Optional<uint64_t> RefVal = AttrValue.Value.getAsReference(); - assert(RefVal); - if (RefVal) { - auto DieCU = Die.getDwarfUnit(); - auto CUSize = DieCU->getNextUnitOffset() - DieCU->getOffset(); - auto CUOffset = AttrValue.Value.getRawUValue(); - if (CUOffset >= CUSize) { + case DW_FORM_ref1: + case DW_FORM_ref2: + case DW_FORM_ref4: + case DW_FORM_ref8: + case DW_FORM_ref_udata: { + // Verify all CU relative references are valid CU offsets. + Optional<uint64_t> RefVal = AttrValue.Value.getAsReference(); + assert(RefVal); + if (RefVal) { + auto DieCU = Die.getDwarfUnit(); + auto CUSize = DieCU->getNextUnitOffset() - DieCU->getOffset(); + auto CUOffset = AttrValue.Value.getRawUValue(); + if (CUOffset >= CUSize) { + Success = false; + OS << "error: " << FormEncodingString(Form) << " CU offset " + << format("0x%08" PRIx32, CUOffset) + << " is invalid (must be less than CU size of " + << format("0x%08" PRIx32, CUSize) << "):\n"; + Die.dump(OS, 0); + OS << "\n"; + } else { + // Valid reference, but we will verify it points to an actual + // DIE later. + ReferenceToDIEOffsets[*RefVal].insert(Die.getOffset()); + } + } + break; + } + case DW_FORM_ref_addr: { + // Verify all absolute DIE references have valid offsets in the + // .debug_info section. + Optional<uint64_t> RefVal = AttrValue.Value.getAsReference(); + assert(RefVal); + if (RefVal) { + if(*RefVal >= DCtx.getInfoSection().Data.size()) { + Success = false; + OS << "error: DW_FORM_ref_addr offset beyond .debug_info " + "bounds:\n"; + Die.dump(OS, 0); + OS << "\n"; + } else { + // Valid reference, but we will verify it points to an actual + // DIE later. + ReferenceToDIEOffsets[*RefVal].insert(Die.getOffset()); + } + } + break; + } + case DW_FORM_strp: { + auto SecOffset = AttrValue.Value.getAsSectionOffset(); + assert(SecOffset); // DW_FORM_strp is a section offset. + if (SecOffset && *SecOffset >= DCtx.getStringSection().size()) { Success = false; - OS << "error: " << FormEncodingString(Form) << " CU offset " - << format("0x%08" PRIx32, CUOffset) - << " is invalid (must be less than CU size of " - << format("0x%08" PRIx32, CUSize) << "):\n"; + OS << "error: DW_FORM_strp offset beyond .debug_str bounds:\n"; Die.dump(OS, 0); OS << "\n"; } + break; } - break; + default: + break; } - case DW_FORM_ref_addr: { - // Verify all absolute DIE references have valid offsets in the - // .debug_info section. - Optional<uint64_t> RefVal = AttrValue.Value.getAsReference(); - assert(RefVal); - if (RefVal && *RefVal >= getInfoSection().Data.size()) { - Success = false; - OS << "error: DW_FORM_ref_addr offset beyond .debug_info " - "bounds:\n"; - Die.dump(OS, 0); - OS << "\n"; - } - break; - } - case DW_FORM_strp: { - auto SecOffset = AttrValue.Value.getAsSectionOffset(); - assert(SecOffset); // DW_FORM_strp is a section offset. - if (SecOffset && *SecOffset >= getStringSection().size()) { - Success = false; - OS << "error: DW_FORM_strp offset beyond .debug_str bounds:\n"; - Die.dump(OS, 0); - OS << "\n"; - } - break; - } - default: - break; + } + } + } + + // Take all references and make sure they point to an actual DIE by + // getting the DIE by offset and emitting an error + OS << "Verifying .debug_info references...\n"; + for (auto Pair: ReferenceToDIEOffsets) { + auto Die = DCtx.getDIEForOffset(Pair.first); + if (Die) + continue; + Success = false; + OS << "error: invalid DIE reference " << format("0x%08" PRIx64, Pair.first) + << ". Offset is in between DIEs:\n"; + for (auto Offset: Pair.second) { + auto ReferencingDie = DCtx.getDIEForOffset(Offset); + ReferencingDie.dump(OS, 0); + OS << "\n"; + } + OS << "\n"; + } + return Success; + } + + bool HandleDebugLine() { + std::map<uint64_t, DWARFDie> StmtListToDie; + bool Success = true; + OS << "Verifying .debug_line...\n"; + for (const auto &CU : DCtx.compile_units()) { + uint32_t LineTableOffset = 0; + auto CUDie = CU->getUnitDIE(); + auto StmtFormValue = CUDie.find(DW_AT_stmt_list); + if (!StmtFormValue) { + // No line table for this compile unit. + continue; + } + // Get the attribute value as a section offset. No need to produce an + // error here if the encoding isn't correct because we validate this in + // the .debug_info verifier. + if (auto StmtSectionOffset = toSectionOffset(StmtFormValue)) { + LineTableOffset = *StmtSectionOffset; + if (LineTableOffset >= DCtx.getLineSection().Data.size()) { + // Make sure we don't get a valid line table back if the offset + // is wrong. + assert(DCtx.getLineTableForUnit(CU.get()) == nullptr); + // Skip this line table as it isn't valid. No need to create an error + // here because we validate this in the .debug_info verifier. + continue; + } else { + auto Iter = StmtListToDie.find(LineTableOffset); + if (Iter != StmtListToDie.end()) { + Success = false; + OS << "error: two compile unit DIEs, " + << format("0x%08" PRIx32, Iter->second.getOffset()) << " and " + << format("0x%08" PRIx32, CUDie.getOffset()) + << ", have the same DW_AT_stmt_list section offset:\n"; + Iter->second.dump(OS, 0); + CUDie.dump(OS, 0); + OS << '\n'; + // Already verified this line table before, no need to do it again. + continue; } + StmtListToDie[LineTableOffset] = CUDie; } } + auto LineTable = DCtx.getLineTableForUnit(CU.get()); + if (!LineTable) { + Success = false; + OS << "error: .debug_line[" << format("0x%08" PRIx32, LineTableOffset) + << "] was not able to be parsed for CU:\n"; + CUDie.dump(OS, 0); + OS << '\n'; + continue; + } + uint32_t MaxFileIndex = LineTable->Prologue.FileNames.size(); + uint64_t PrevAddress = 0; + uint32_t RowIndex = 0; + for (const auto &Row : LineTable->Rows) { + if (Row.Address < PrevAddress) { + Success = false; + OS << "error: .debug_line[" << format("0x%08" PRIx32, LineTableOffset) + << "] row[" << RowIndex + << "] decreases in address from previous row:\n"; + + DWARFDebugLine::Row::dumpTableHeader(OS); + if (RowIndex > 0) + LineTable->Rows[RowIndex - 1].dump(OS); + Row.dump(OS); + OS << '\n'; + } + + if (Row.File > MaxFileIndex) { + Success = false; + OS << "error: .debug_line[" << format("0x%08" PRIx32, LineTableOffset) + << "][" << RowIndex << "] has invalid file index " << Row.File + << " (valid values are [1," << MaxFileIndex << "]):\n"; + DWARFDebugLine::Row::dumpTableHeader(OS); + Row.dump(OS); + OS << '\n'; + } + if (Row.EndSequence) + PrevAddress = 0; + else + PrevAddress = Row.Address; + ++RowIndex; + } } + return Success; + } +}; + +} // anonymous namespace + +bool DWARFContext::verify(raw_ostream &OS, DIDumpType DumpType) { + bool Success = true; + DWARFVerifier verifier(OS, *this); + if (DumpType == DIDT_All || DumpType == DIDT_Info) { + if (!verifier.handleDebugInfo()) + Success = false; + } + if (DumpType == DIDT_All || DumpType == DIDT_Line) { + if (!verifier.handleDebugLine()) + Success = false; } return Success; } diff --git a/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/lib/DebugInfo/DWARF/DWARFDebugLine.cpp index 77f3c00cc03f..f32e8fe76357 100644 --- a/lib/DebugInfo/DWARF/DWARFDebugLine.cpp +++ b/lib/DebugInfo/DWARF/DWARFDebugLine.cpp @@ -10,6 +10,7 @@ #include "llvm/DebugInfo/DWARF/DWARFDebugLine.h" #include "llvm/ADT/SmallString.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/DebugInfo/DWARF/DWARFFormValue.h" #include "llvm/DebugInfo/DWARF/DWARFRelocMap.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/Format.h" @@ -26,11 +27,19 @@ using namespace llvm; using namespace dwarf; typedef DILineInfoSpecifier::FileLineInfoKind FileLineInfoKind; +namespace { +struct ContentDescriptor { + dwarf::LineNumberEntryFormat Type; + dwarf::Form Form; +}; +typedef SmallVector<ContentDescriptor, 4> ContentDescriptors; +} // end anonmyous namespace DWARFDebugLine::Prologue::Prologue() { clear(); } void DWARFDebugLine::Prologue::clear() { TotalLength = Version = PrologueLength = 0; + AddressSize = SegSelectorSize = 0; MinInstLength = MaxOpsPerInst = DefaultIsStmt = LineBase = LineRange = 0; OpcodeBase = 0; IsDWARF64 = false; @@ -43,6 +52,8 @@ void DWARFDebugLine::Prologue::dump(raw_ostream &OS) const { OS << "Line table prologue:\n" << format(" total_length: 0x%8.8" PRIx64 "\n", TotalLength) << format(" version: %u\n", Version) + << format(Version >= 5 ? " address_size: %u\n" : "", AddressSize) + << format(Version >= 5 ? " seg_select_size: %u\n" : "", SegSelectorSize) << format(" prologue_length: 0x%8.8" PRIx64 "\n", PrologueLength) << format(" min_inst_length: %u\n", MinInstLength) << format(Version >= 4 ? "max_ops_per_inst: %u\n" : "", MaxOpsPerInst) @@ -74,6 +85,125 @@ void DWARFDebugLine::Prologue::dump(raw_ostream &OS) const { } } +// Parse v2-v4 directory and file tables. +static void +parseV2DirFileTables(DataExtractor DebugLineData, uint32_t *OffsetPtr, + uint64_t EndPrologueOffset, + std::vector<StringRef> &IncludeDirectories, + std::vector<DWARFDebugLine::FileNameEntry> &FileNames) { + while (*OffsetPtr < EndPrologueOffset) { + StringRef S = DebugLineData.getCStrRef(OffsetPtr); + if (S.empty()) + break; + IncludeDirectories.push_back(S); + } + + while (*OffsetPtr < EndPrologueOffset) { + StringRef Name = DebugLineData.getCStrRef(OffsetPtr); + if (Name.empty()) + break; + DWARFDebugLine::FileNameEntry FileEntry; + FileEntry.Name = Name; + FileEntry.DirIdx = DebugLineData.getULEB128(OffsetPtr); + FileEntry.ModTime = DebugLineData.getULEB128(OffsetPtr); + FileEntry.Length = DebugLineData.getULEB128(OffsetPtr); + FileNames.push_back(FileEntry); + } +} + +// Parse v5 directory/file entry content descriptions. +// Returns the descriptors, or an empty vector if we did not find a path or +// ran off the end of the prologue. +static ContentDescriptors +parseV5EntryFormat(DataExtractor DebugLineData, uint32_t *OffsetPtr, + uint64_t EndPrologueOffset) { + ContentDescriptors Descriptors; + int FormatCount = DebugLineData.getU8(OffsetPtr); + bool HasPath = false; + for (int I = 0; I != FormatCount; ++I) { + if (*OffsetPtr >= EndPrologueOffset) + return ContentDescriptors(); + ContentDescriptor Descriptor; + Descriptor.Type = + dwarf::LineNumberEntryFormat(DebugLineData.getULEB128(OffsetPtr)); + Descriptor.Form = dwarf::Form(DebugLineData.getULEB128(OffsetPtr)); + if (Descriptor.Type == dwarf::DW_LNCT_path) + HasPath = true; + Descriptors.push_back(Descriptor); + } + return HasPath ? Descriptors : ContentDescriptors(); +} + +static bool +parseV5DirFileTables(DataExtractor DebugLineData, uint32_t *OffsetPtr, + uint64_t EndPrologueOffset, + std::vector<StringRef> &IncludeDirectories, + std::vector<DWARFDebugLine::FileNameEntry> &FileNames) { + // Get the directory entry description. + ContentDescriptors DirDescriptors = + parseV5EntryFormat(DebugLineData, OffsetPtr, EndPrologueOffset); + if (DirDescriptors.empty()) + return false; + + // Get the directory entries, according to the format described above. + int DirEntryCount = DebugLineData.getU8(OffsetPtr); + for (int I = 0; I != DirEntryCount; ++I) { + if (*OffsetPtr >= EndPrologueOffset) + return false; + for (auto Descriptor : DirDescriptors) { + DWARFFormValue Value(Descriptor.Form); + switch (Descriptor.Type) { + case DW_LNCT_path: + if (!Value.extractValue(DebugLineData, OffsetPtr, nullptr)) + return false; + IncludeDirectories.push_back(Value.getAsCString().getValue()); + break; + default: + if (!Value.skipValue(DebugLineData, OffsetPtr, nullptr)) + return false; + } + } + } + + // Get the file entry description. + ContentDescriptors FileDescriptors = + parseV5EntryFormat(DebugLineData, OffsetPtr, EndPrologueOffset); + if (FileDescriptors.empty()) + return false; + + // Get the file entries, according to the format described above. + int FileEntryCount = DebugLineData.getU8(OffsetPtr); + for (int I = 0; I != FileEntryCount; ++I) { + if (*OffsetPtr >= EndPrologueOffset) + return false; + DWARFDebugLine::FileNameEntry FileEntry; + for (auto Descriptor : FileDescriptors) { + DWARFFormValue Value(Descriptor.Form); + if (!Value.extractValue(DebugLineData, OffsetPtr, nullptr)) + return false; + switch (Descriptor.Type) { + case DW_LNCT_path: + FileEntry.Name = Value.getAsCString().getValue(); + break; + case DW_LNCT_directory_index: + FileEntry.DirIdx = Value.getAsUnsignedConstant().getValue(); + break; + case DW_LNCT_timestamp: + FileEntry.ModTime = Value.getAsUnsignedConstant().getValue(); + break; + case DW_LNCT_size: + FileEntry.Length = Value.getAsUnsignedConstant().getValue(); + break; + // FIXME: Add MD5 + default: + break; + } + } + FileNames.push_back(FileEntry); + } + return true; +} + bool DWARFDebugLine::Prologue::parse(DataExtractor DebugLineData, uint32_t *OffsetPtr) { const uint64_t PrologueOffset = *OffsetPtr; @@ -90,6 +220,11 @@ bool DWARFDebugLine::Prologue::parse(DataExtractor DebugLineData, if (Version < 2) return false; + if (Version >= 5) { + AddressSize = DebugLineData.getU8(OffsetPtr); + SegSelectorSize = DebugLineData.getU8(OffsetPtr); + } + PrologueLength = DebugLineData.getUnsigned(OffsetPtr, sizeofPrologueLength()); const uint64_t EndPrologueOffset = PrologueLength + *OffsetPtr; MinInstLength = DebugLineData.getU8(OffsetPtr); @@ -106,27 +241,18 @@ bool DWARFDebugLine::Prologue::parse(DataExtractor DebugLineData, StandardOpcodeLengths.push_back(OpLen); } - while (*OffsetPtr < EndPrologueOffset) { - const char *S = DebugLineData.getCStr(OffsetPtr); - if (S && S[0]) - IncludeDirectories.push_back(S); - else - break; - } - - while (*OffsetPtr < EndPrologueOffset) { - const char *Name = DebugLineData.getCStr(OffsetPtr); - if (Name && Name[0]) { - FileNameEntry FileEntry; - FileEntry.Name = Name; - FileEntry.DirIdx = DebugLineData.getULEB128(OffsetPtr); - FileEntry.ModTime = DebugLineData.getULEB128(OffsetPtr); - FileEntry.Length = DebugLineData.getULEB128(OffsetPtr); - FileNames.push_back(FileEntry); - } else { - break; + if (Version >= 5) { + if (!parseV5DirFileTables(DebugLineData, OffsetPtr, EndPrologueOffset, + IncludeDirectories, FileNames)) { + fprintf(stderr, + "warning: parsing line table prologue at 0x%8.8" PRIx64 + " found an invalid directory or file table description at" + " 0x%8.8" PRIx64 "\n", PrologueOffset, (uint64_t)*OffsetPtr); + return false; } - } + } else + parseV2DirFileTables(DebugLineData, OffsetPtr, EndPrologueOffset, + IncludeDirectories, FileNames); if (*OffsetPtr != EndPrologueOffset) { fprintf(stderr, @@ -161,6 +287,12 @@ void DWARFDebugLine::Row::reset(bool DefaultIsStmt) { EpilogueBegin = false; } +void DWARFDebugLine::Row::dumpTableHeader(raw_ostream &OS) { + OS << "Address Line Column File ISA Discriminator Flags\n" + << "------------------ ------ ------ ------ --- ------------- " + "-------------\n"; +} + void DWARFDebugLine::Row::dump(raw_ostream &OS) const { OS << format("0x%16.16" PRIx64 " %6u %6u", Address, Line, Column) << format(" %6u %3u %13u ", File, Isa, Discriminator) @@ -187,9 +319,7 @@ void DWARFDebugLine::LineTable::dump(raw_ostream &OS) const { OS << '\n'; if (!Rows.empty()) { - OS << "Address Line Column File ISA Discriminator Flags\n" - << "------------------ ------ ------ ------ --- ------------- " - "-------------\n"; + Row::dumpTableHeader(OS); for (const Row &R : Rows) { R.dump(OS); } @@ -637,7 +767,7 @@ bool DWARFDebugLine::LineTable::getFileNameByIndex(uint64_t FileIndex, if (Kind == FileLineInfoKind::None || !hasFileAtIndex(FileIndex)) return false; const FileNameEntry &Entry = Prologue.FileNames[FileIndex - 1]; - const char *FileName = Entry.Name; + StringRef FileName = Entry.Name; if (Kind != FileLineInfoKind::AbsoluteFilePath || sys::path::is_absolute(FileName)) { Result = FileName; @@ -646,7 +776,7 @@ bool DWARFDebugLine::LineTable::getFileNameByIndex(uint64_t FileIndex, SmallString<16> FilePath; uint64_t IncludeDirIndex = Entry.DirIdx; - const char *IncludeDir = ""; + StringRef IncludeDir; // Be defensive about the contents of Entry. if (IncludeDirIndex > 0 && IncludeDirIndex <= Prologue.IncludeDirectories.size()) diff --git a/lib/DebugInfo/DWARF/DWARFVerifier.cpp b/lib/DebugInfo/DWARF/DWARFVerifier.cpp new file mode 100644 index 000000000000..9494e876da15 --- /dev/null +++ b/lib/DebugInfo/DWARF/DWARFVerifier.cpp @@ -0,0 +1,277 @@ +//===- DWARFVerifier.cpp --------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/DWARF/DWARFVerifier.h" +#include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/DebugInfo/DWARF/DWARFDebugLine.h" +#include "llvm/DebugInfo/DWARF/DWARFDie.h" +#include "llvm/DebugInfo/DWARF/DWARFFormValue.h" +#include "llvm/DebugInfo/DWARF/DWARFSection.h" +#include "llvm/Support/raw_ostream.h" +#include <map> +#include <set> +#include <vector> + +using namespace llvm; +using namespace dwarf; +using namespace object; + +void DWARFVerifier::verifyDebugInfoAttribute(DWARFDie &Die, + DWARFAttribute &AttrValue) { + const auto Attr = AttrValue.Attr; + switch (Attr) { + case DW_AT_ranges: + // Make sure the offset in the DW_AT_ranges attribute is valid. + if (auto SectionOffset = AttrValue.Value.getAsSectionOffset()) { + if (*SectionOffset >= DCtx.getRangeSection().Data.size()) { + ++NumDebugInfoErrors; + OS << "error: DW_AT_ranges offset is beyond .debug_ranges " + "bounds:\n"; + Die.dump(OS, 0); + OS << "\n"; + } + } else { + ++NumDebugInfoErrors; + OS << "error: DIE has invalid DW_AT_ranges encoding:\n"; + Die.dump(OS, 0); + OS << "\n"; + } + break; + case DW_AT_stmt_list: + // Make sure the offset in the DW_AT_stmt_list attribute is valid. + if (auto SectionOffset = AttrValue.Value.getAsSectionOffset()) { + if (*SectionOffset >= DCtx.getLineSection().Data.size()) { + ++NumDebugInfoErrors; + OS << "error: DW_AT_stmt_list offset is beyond .debug_line " + "bounds: " + << format("0x%08" PRIx32, *SectionOffset) << "\n"; + Die.dump(OS, 0); + OS << "\n"; + } + } else { + ++NumDebugInfoErrors; + OS << "error: DIE has invalid DW_AT_stmt_list encoding:\n"; + Die.dump(OS, 0); + OS << "\n"; + } + break; + + default: + break; + } +} + +void DWARFVerifier::verifyDebugInfoForm(DWARFDie &Die, + DWARFAttribute &AttrValue) { + const auto Form = AttrValue.Value.getForm(); + switch (Form) { + case DW_FORM_ref1: + case DW_FORM_ref2: + case DW_FORM_ref4: + case DW_FORM_ref8: + case DW_FORM_ref_udata: { + // Verify all CU relative references are valid CU offsets. + Optional<uint64_t> RefVal = AttrValue.Value.getAsReference(); + assert(RefVal); + if (RefVal) { + auto DieCU = Die.getDwarfUnit(); + auto CUSize = DieCU->getNextUnitOffset() - DieCU->getOffset(); + auto CUOffset = AttrValue.Value.getRawUValue(); + if (CUOffset >= CUSize) { + ++NumDebugInfoErrors; + OS << "error: " << FormEncodingString(Form) << " CU offset " + << format("0x%08" PRIx32, CUOffset) + << " is invalid (must be less than CU size of " + << format("0x%08" PRIx32, CUSize) << "):\n"; + Die.dump(OS, 0); + OS << "\n"; + } else { + // Valid reference, but we will verify it points to an actual + // DIE later. + ReferenceToDIEOffsets[*RefVal].insert(Die.getOffset()); + } + } + break; + } + case DW_FORM_ref_addr: { + // Verify all absolute DIE references have valid offsets in the + // .debug_info section. + Optional<uint64_t> RefVal = AttrValue.Value.getAsReference(); + assert(RefVal); + if (RefVal) { + if (*RefVal >= DCtx.getInfoSection().Data.size()) { + ++NumDebugInfoErrors; + OS << "error: DW_FORM_ref_addr offset beyond .debug_info " + "bounds:\n"; + Die.dump(OS, 0); + OS << "\n"; + } else { + // Valid reference, but we will verify it points to an actual + // DIE later. + ReferenceToDIEOffsets[*RefVal].insert(Die.getOffset()); + } + } + break; + } + case DW_FORM_strp: { + auto SecOffset = AttrValue.Value.getAsSectionOffset(); + assert(SecOffset); // DW_FORM_strp is a section offset. + if (SecOffset && *SecOffset >= DCtx.getStringSection().size()) { + ++NumDebugInfoErrors; + OS << "error: DW_FORM_strp offset beyond .debug_str bounds:\n"; + Die.dump(OS, 0); + OS << "\n"; + } + break; + } + default: + break; + } +} + +void DWARFVerifier::veifyDebugInfoReferences() { + // Take all references and make sure they point to an actual DIE by + // getting the DIE by offset and emitting an error + OS << "Verifying .debug_info references...\n"; + for (auto Pair : ReferenceToDIEOffsets) { + auto Die = DCtx.getDIEForOffset(Pair.first); + if (Die) + continue; + ++NumDebugInfoErrors; + OS << "error: invalid DIE reference " << format("0x%08" PRIx64, Pair.first) + << ". Offset is in between DIEs:\n"; + for (auto Offset : Pair.second) { + auto ReferencingDie = DCtx.getDIEForOffset(Offset); + ReferencingDie.dump(OS, 0); + OS << "\n"; + } + OS << "\n"; + } +} + +bool DWARFVerifier::handleDebugInfo() { + NumDebugInfoErrors = 0; + OS << "Verifying .debug_info...\n"; + for (const auto &CU : DCtx.compile_units()) { + unsigned NumDies = CU->getNumDIEs(); + for (unsigned I = 0; I < NumDies; ++I) { + auto Die = CU->getDIEAtIndex(I); + const auto Tag = Die.getTag(); + if (Tag == DW_TAG_null) + continue; + for (auto AttrValue : Die.attributes()) { + verifyDebugInfoAttribute(Die, AttrValue); + verifyDebugInfoForm(Die, AttrValue); + } + } + } + veifyDebugInfoReferences(); + return NumDebugInfoErrors == 0; +} + +void DWARFVerifier::verifyDebugLineStmtOffsets() { + std::map<uint64_t, DWARFDie> StmtListToDie; + for (const auto &CU : DCtx.compile_units()) { + auto Die = CU->getUnitDIE(); + // Get the attribute value as a section offset. No need to produce an + // error here if the encoding isn't correct because we validate this in + // the .debug_info verifier. + auto StmtSectionOffset = toSectionOffset(Die.find(DW_AT_stmt_list)); + if (!StmtSectionOffset) + continue; + const uint32_t LineTableOffset = *StmtSectionOffset; + auto LineTable = DCtx.getLineTableForUnit(CU.get()); + if (LineTableOffset < DCtx.getLineSection().Data.size()) { + if (!LineTable) { + ++NumDebugLineErrors; + OS << "error: .debug_line[" << format("0x%08" PRIx32, LineTableOffset) + << "] was not able to be parsed for CU:\n"; + Die.dump(OS, 0); + OS << '\n'; + continue; + } + } else { + // Make sure we don't get a valid line table back if the offset is wrong. + assert(LineTable == nullptr); + // Skip this line table as it isn't valid. No need to create an error + // here because we validate this in the .debug_info verifier. + continue; + } + auto Iter = StmtListToDie.find(LineTableOffset); + if (Iter != StmtListToDie.end()) { + ++NumDebugLineErrors; + OS << "error: two compile unit DIEs, " + << format("0x%08" PRIx32, Iter->second.getOffset()) << " and " + << format("0x%08" PRIx32, Die.getOffset()) + << ", have the same DW_AT_stmt_list section offset:\n"; + Iter->second.dump(OS, 0); + Die.dump(OS, 0); + OS << '\n'; + // Already verified this line table before, no need to do it again. + continue; + } + StmtListToDie[LineTableOffset] = Die; + } +} + +void DWARFVerifier::verifyDebugLineRows() { + for (const auto &CU : DCtx.compile_units()) { + auto Die = CU->getUnitDIE(); + auto LineTable = DCtx.getLineTableForUnit(CU.get()); + // If there is no line table we will have created an error in the + // .debug_info verifier or in verifyDebugLineStmtOffsets(). + if (!LineTable) + continue; + uint32_t MaxFileIndex = LineTable->Prologue.FileNames.size(); + uint64_t PrevAddress = 0; + uint32_t RowIndex = 0; + for (const auto &Row : LineTable->Rows) { + if (Row.Address < PrevAddress) { + ++NumDebugLineErrors; + OS << "error: .debug_line[" + << format("0x%08" PRIx32, + *toSectionOffset(Die.find(DW_AT_stmt_list))) + << "] row[" << RowIndex + << "] decreases in address from previous row:\n"; + + DWARFDebugLine::Row::dumpTableHeader(OS); + if (RowIndex > 0) + LineTable->Rows[RowIndex - 1].dump(OS); + Row.dump(OS); + OS << '\n'; + } + + if (Row.File > MaxFileIndex) { + ++NumDebugLineErrors; + OS << "error: .debug_line[" + << format("0x%08" PRIx32, + *toSectionOffset(Die.find(DW_AT_stmt_list))) + << "][" << RowIndex << "] has invalid file index " << Row.File + << " (valid values are [1," << MaxFileIndex << "]):\n"; + DWARFDebugLine::Row::dumpTableHeader(OS); + Row.dump(OS); + OS << '\n'; + } + if (Row.EndSequence) + PrevAddress = 0; + else + PrevAddress = Row.Address; + ++RowIndex; + } + } +} + +bool DWARFVerifier::handleDebugLine() { + NumDebugLineErrors = 0; + OS << "Verifying .debug_line...\n"; + verifyDebugLineStmtOffsets(); + verifyDebugLineRows(); + return NumDebugLineErrors == 0; +} diff --git a/lib/DebugInfo/PDB/CMakeLists.txt b/lib/DebugInfo/PDB/CMakeLists.txt index bd35efb51c74..e1753018c7df 100644 --- a/lib/DebugInfo/PDB/CMakeLists.txt +++ b/lib/DebugInfo/PDB/CMakeLists.txt @@ -48,11 +48,11 @@ add_pdb_impl_folder(Native Native/NativeSession.cpp Native/PDBFile.cpp Native/PDBFileBuilder.cpp + Native/PDBStringTable.cpp + Native/PDBStringTableBuilder.cpp Native/PDBTypeServerHandler.cpp Native/PublicsStream.cpp Native/RawError.cpp - Native/StringTable.cpp - Native/StringTableBuilder.cpp Native/SymbolStream.cpp Native/TpiHashing.cpp Native/TpiStream.cpp diff --git a/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp b/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp index f994b4538ef0..867864e47dce 100644 --- a/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp +++ b/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp @@ -89,6 +89,14 @@ uint32_t DbiModuleDescriptorBuilder::calculateSerializedLength() const { return alignTo(L + M + O, sizeof(uint32_t)); } +template <typename T> struct Foo { + explicit Foo(T &&Answer) : Answer(Answer) {} + + T Answer; +}; + +template <typename T> Foo<T> makeFoo(T &&t) { return Foo<T>(std::move(t)); } + void DbiModuleDescriptorBuilder::finalize() { Layout.FileNameOffs = 0; // TODO: Fix this Layout.Flags = 0; // TODO: Fix this diff --git a/lib/DebugInfo/PDB/Native/DbiStream.cpp b/lib/DebugInfo/PDB/Native/DbiStream.cpp index 4802cc6e8197..db703809f7c9 100644 --- a/lib/DebugInfo/PDB/Native/DbiStream.cpp +++ b/lib/DebugInfo/PDB/Native/DbiStream.cpp @@ -146,7 +146,7 @@ Error DbiStream::reload() { if (ECSubstream.getLength() > 0) { BinaryStreamReader ECReader(ECSubstream); - if (auto EC = ECNames.load(ECReader)) + if (auto EC = ECNames.reload(ECReader)) return EC; } diff --git a/lib/DebugInfo/PDB/Native/PDBFile.cpp b/lib/DebugInfo/PDB/Native/PDBFile.cpp index 943e7fa13ab7..859295d2c7d3 100644 --- a/lib/DebugInfo/PDB/Native/PDBFile.cpp +++ b/lib/DebugInfo/PDB/Native/PDBFile.cpp @@ -15,9 +15,9 @@ #include "llvm/DebugInfo/PDB/Native/DbiStream.h" #include "llvm/DebugInfo/PDB/Native/GlobalsStream.h" #include "llvm/DebugInfo/PDB/Native/InfoStream.h" +#include "llvm/DebugInfo/PDB/Native/PDBStringTable.h" #include "llvm/DebugInfo/PDB/Native/PublicsStream.h" #include "llvm/DebugInfo/PDB/Native/RawError.h" -#include "llvm/DebugInfo/PDB/Native/StringTable.h" #include "llvm/DebugInfo/PDB/Native/SymbolStream.h" #include "llvm/DebugInfo/PDB/Native/TpiStream.h" #include "llvm/Support/BinaryStream.h" @@ -337,8 +337,8 @@ Expected<SymbolStream &> PDBFile::getPDBSymbolStream() { return *Symbols; } -Expected<StringTable &> PDBFile::getStringTable() { - if (!Strings || !StringTableStream) { +Expected<PDBStringTable &> PDBFile::getStringTable() { + if (!Strings) { auto IS = getPDBInfoStream(); if (!IS) return IS.takeError(); @@ -350,12 +350,13 @@ Expected<StringTable &> PDBFile::getStringTable() { if (!NS) return NS.takeError(); + auto N = llvm::make_unique<PDBStringTable>(); BinaryStreamReader Reader(**NS); - auto N = llvm::make_unique<StringTable>(); - if (auto EC = N->load(Reader)) + if (auto EC = N->reload(Reader)) return std::move(EC); - Strings = std::move(N); + assert(Reader.bytesRemaining() == 0); StringTableStream = std::move(*NS); + Strings = std::move(N); } return *Strings; } @@ -389,7 +390,7 @@ bool PDBFile::hasPDBSymbolStream() { bool PDBFile::hasPDBTpiStream() const { return StreamTPI < getNumStreams(); } -bool PDBFile::hasStringTable() { +bool PDBFile::hasPDBStringTable() { auto IS = getPDBInfoStream(); if (!IS) return false; diff --git a/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp b/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp index b3c84903bc7e..4dd965c69071 100644 --- a/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp +++ b/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp @@ -17,8 +17,8 @@ #include "llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h" #include "llvm/DebugInfo/PDB/Native/InfoStream.h" #include "llvm/DebugInfo/PDB/Native/InfoStreamBuilder.h" +#include "llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h" #include "llvm/DebugInfo/PDB/Native/RawError.h" -#include "llvm/DebugInfo/PDB/Native/StringTableBuilder.h" #include "llvm/DebugInfo/PDB/Native/TpiStream.h" #include "llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h" #include "llvm/Support/BinaryStream.h" @@ -67,7 +67,9 @@ TpiStreamBuilder &PDBFileBuilder::getIpiBuilder() { return *Ipi; } -StringTableBuilder &PDBFileBuilder::getStringTableBuilder() { return Strings; } +PDBStringTableBuilder &PDBFileBuilder::getStringTableBuilder() { + return Strings; +} Error PDBFileBuilder::addNamedStream(StringRef Name, uint32_t Size) { auto ExpectedStream = Msf->addStream(Size); @@ -78,9 +80,9 @@ Error PDBFileBuilder::addNamedStream(StringRef Name, uint32_t Size) { } Expected<msf::MSFLayout> PDBFileBuilder::finalizeMsfLayout() { - uint32_t StringTableSize = Strings.finalize(); + uint32_t StringsLen = Strings.calculateSerializedSize(); - if (auto EC = addNamedStream("/names", StringTableSize)) + if (auto EC = addNamedStream("/names", StringsLen)) return std::move(EC); if (auto EC = addNamedStream("/LinkInfo", 0)) return std::move(EC); @@ -107,6 +109,13 @@ Expected<msf::MSFLayout> PDBFileBuilder::finalizeMsfLayout() { return Msf->build(); } +Expected<uint32_t> PDBFileBuilder::getNamedStreamIndex(StringRef Name) const { + uint32_t SN = 0; + if (!NamedStreams.get(Name, SN)) + return llvm::make_error<pdb::RawError>(raw_error_code::no_stream); + return SN; +} + Error PDBFileBuilder::commit(StringRef Filename) { auto ExpectedLayout = finalizeMsfLayout(); if (!ExpectedLayout) @@ -144,12 +153,12 @@ Error PDBFileBuilder::commit(StringRef Filename) { return EC; } - uint32_t StringTableStreamNo = 0; - if (!NamedStreams.get("/names", StringTableStreamNo)) - return llvm::make_error<pdb::RawError>(raw_error_code::no_stream); + auto ExpectedSN = getNamedStreamIndex("/names"); + if (!ExpectedSN) + return ExpectedSN.takeError(); auto NS = WritableMappedBlockStream::createIndexedStream(Layout, Buffer, - StringTableStreamNo); + *ExpectedSN); BinaryStreamWriter NSWriter(*NS); if (auto EC = Strings.commit(NSWriter)) return EC; diff --git a/lib/DebugInfo/PDB/Native/PDBStringTable.cpp b/lib/DebugInfo/PDB/Native/PDBStringTable.cpp new file mode 100644 index 000000000000..e84573fe07b8 --- /dev/null +++ b/lib/DebugInfo/PDB/Native/PDBStringTable.cpp @@ -0,0 +1,134 @@ +//===- PDBStringTable.cpp - PDB String Table ---------------------*- C++-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Native/PDBStringTable.h" + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/DebugInfo/MSF/MappedBlockStream.h" +#include "llvm/DebugInfo/PDB/Native/Hash.h" +#include "llvm/DebugInfo/PDB/Native/RawError.h" +#include "llvm/DebugInfo/PDB/Native/RawTypes.h" +#include "llvm/Support/BinaryStreamReader.h" +#include "llvm/Support/Endian.h" + +using namespace llvm; +using namespace llvm::support; +using namespace llvm::pdb; + +uint32_t PDBStringTable::getByteSize() const { return ByteSize; } +uint32_t PDBStringTable::getNameCount() const { return NameCount; } +uint32_t PDBStringTable::getHashVersion() const { return Header->HashVersion; } +uint32_t PDBStringTable::getSignature() const { return Header->Signature; } + +Error PDBStringTable::readHeader(BinaryStreamReader &Reader) { + if (auto EC = Reader.readObject(Header)) + return EC; + + if (Header->Signature != PDBStringTableSignature) + return make_error<RawError>(raw_error_code::corrupt_file, + "Invalid hash table signature"); + if (Header->HashVersion != 1 && Header->HashVersion != 2) + return make_error<RawError>(raw_error_code::corrupt_file, + "Unsupported hash version"); + + assert(Reader.bytesRemaining() == 0); + return Error::success(); +} + +Error PDBStringTable::readStrings(BinaryStreamReader &Reader) { + BinaryStreamRef Stream; + if (auto EC = Reader.readStreamRef(Stream)) + return EC; + + if (auto EC = Strings.initialize(Stream)) { + return joinErrors(std::move(EC), + make_error<RawError>(raw_error_code::corrupt_file, + "Invalid hash table byte length")); + } + + assert(Reader.bytesRemaining() == 0); + return Error::success(); +} + +Error PDBStringTable::readHashTable(BinaryStreamReader &Reader) { + const support::ulittle32_t *HashCount; + if (auto EC = Reader.readObject(HashCount)) + return EC; + + if (auto EC = Reader.readArray(IDs, *HashCount)) { + return joinErrors(std::move(EC), + make_error<RawError>(raw_error_code::corrupt_file, + "Could not read bucket array")); + } + + return Error::success(); +} + +Error PDBStringTable::readEpilogue(BinaryStreamReader &Reader) { + if (auto EC = Reader.readInteger(NameCount)) + return EC; + + assert(Reader.bytesRemaining() == 0); + return Error::success(); +} + +Error PDBStringTable::reload(BinaryStreamReader &Reader) { + + BinaryStreamReader SectionReader; + + std::tie(SectionReader, Reader) = Reader.split(sizeof(PDBStringTableHeader)); + if (auto EC = readHeader(SectionReader)) + return EC; + + std::tie(SectionReader, Reader) = Reader.split(Header->ByteSize); + if (auto EC = readStrings(SectionReader)) + return EC; + + // We don't know how long the hash table is until we parse it, so let the + // function responsible for doing that figure it out. + if (auto EC = readHashTable(Reader)) + return EC; + + std::tie(SectionReader, Reader) = Reader.split(sizeof(uint32_t)); + if (auto EC = readEpilogue(SectionReader)) + return EC; + + assert(Reader.bytesRemaining() == 0); + return Error::success(); +} + +Expected<StringRef> PDBStringTable::getStringForID(uint32_t ID) const { + return Strings.getString(ID); +} + +Expected<uint32_t> PDBStringTable::getIDForString(StringRef Str) const { + uint32_t Hash = + (Header->HashVersion == 1) ? hashStringV1(Str) : hashStringV2(Str); + size_t Count = IDs.size(); + uint32_t Start = Hash % Count; + for (size_t I = 0; I < Count; ++I) { + // The hash is just a starting point for the search, but if it + // doesn't work we should find the string no matter what, because + // we iterate the entire array. + uint32_t Index = (Start + I) % Count; + + uint32_t ID = IDs[Index]; + auto ExpectedStr = getStringForID(ID); + if (!ExpectedStr) + return ExpectedStr.takeError(); + + if (*ExpectedStr == Str) + return ID; + } + return make_error<RawError>(raw_error_code::no_entry); +} + +FixedStreamArray<support::ulittle32_t> PDBStringTable::name_ids() const { + return IDs; +} diff --git a/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp b/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp new file mode 100644 index 000000000000..a472181a4895 --- /dev/null +++ b/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp @@ -0,0 +1,133 @@ +//===- PDBStringTableBuilder.cpp - PDB String Table -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h" + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/DebugInfo/MSF/MappedBlockStream.h" +#include "llvm/DebugInfo/PDB/Native/Hash.h" +#include "llvm/DebugInfo/PDB/Native/PDBFileBuilder.h" +#include "llvm/DebugInfo/PDB/Native/RawTypes.h" +#include "llvm/Support/BinaryStreamWriter.h" +#include "llvm/Support/Endian.h" + +using namespace llvm; +using namespace llvm::msf; +using namespace llvm::support; +using namespace llvm::support::endian; +using namespace llvm::pdb; + +uint32_t PDBStringTableBuilder::insert(StringRef S) { + return Strings.insert(S); +} + +static uint32_t computeBucketCount(uint32_t NumStrings) { + // The /names stream is basically an on-disk open-addressing hash table. + // Hash collisions are resolved by linear probing. We cannot make + // utilization 100% because it will make the linear probing extremely + // slow. But lower utilization wastes disk space. As a reasonable + // load factor, we choose 80%. We need +1 because slot 0 is reserved. + return (NumStrings + 1) * 1.25; +} + +uint32_t PDBStringTableBuilder::calculateHashTableSize() const { + uint32_t Size = sizeof(uint32_t); // Hash table begins with 4-byte size field. + Size += sizeof(uint32_t) * computeBucketCount(Strings.size()); + + return Size; +} + +uint32_t PDBStringTableBuilder::calculateSerializedSize() const { + uint32_t Size = 0; + Size += sizeof(PDBStringTableHeader); + Size += Strings.calculateSerializedSize(); + Size += calculateHashTableSize(); + Size += sizeof(uint32_t); // The /names stream ends with the string count. + return Size; +} + +Error PDBStringTableBuilder::writeHeader(BinaryStreamWriter &Writer) const { + // Write a header + PDBStringTableHeader H; + H.Signature = PDBStringTableSignature; + H.HashVersion = 1; + H.ByteSize = Strings.calculateSerializedSize(); + if (auto EC = Writer.writeObject(H)) + return EC; + assert(Writer.bytesRemaining() == 0); + return Error::success(); +} + +Error PDBStringTableBuilder::writeStrings(BinaryStreamWriter &Writer) const { + if (auto EC = Strings.commit(Writer)) + return EC; + + assert(Writer.bytesRemaining() == 0); + return Error::success(); +} + +Error PDBStringTableBuilder::writeHashTable(BinaryStreamWriter &Writer) const { + // Write a hash table. + uint32_t BucketCount = computeBucketCount(Strings.size()); + if (auto EC = Writer.writeInteger(BucketCount)) + return EC; + std::vector<ulittle32_t> Buckets(BucketCount); + + for (auto &Pair : Strings) { + StringRef S = Pair.getKey(); + uint32_t Offset = Pair.getValue(); + uint32_t Hash = hashStringV1(S); + + for (uint32_t I = 0; I != BucketCount; ++I) { + uint32_t Slot = (Hash + I) % BucketCount; + if (Slot == 0) + continue; // Skip reserved slot + if (Buckets[Slot] != 0) + continue; + Buckets[Slot] = Offset; + break; + } + } + + if (auto EC = Writer.writeArray(ArrayRef<ulittle32_t>(Buckets))) + return EC; + + assert(Writer.bytesRemaining() == 0); + return Error::success(); +} + +Error PDBStringTableBuilder::writeEpilogue(BinaryStreamWriter &Writer) const { + if (auto EC = Writer.writeInteger<uint32_t>(Strings.size())) + return EC; + assert(Writer.bytesRemaining() == 0); + return Error::success(); +} + +Error PDBStringTableBuilder::commit(BinaryStreamWriter &Writer) const { + BinaryStreamWriter SectionWriter; + + std::tie(SectionWriter, Writer) = Writer.split(sizeof(PDBStringTableHeader)); + if (auto EC = writeHeader(SectionWriter)) + return EC; + + std::tie(SectionWriter, Writer) = + Writer.split(Strings.calculateSerializedSize()); + if (auto EC = writeStrings(SectionWriter)) + return EC; + + std::tie(SectionWriter, Writer) = Writer.split(calculateHashTableSize()); + if (auto EC = writeHashTable(SectionWriter)) + return EC; + + std::tie(SectionWriter, Writer) = Writer.split(sizeof(uint32_t)); + if (auto EC = writeEpilogue(SectionWriter)) + return EC; + + return Error::success(); +} diff --git a/lib/DebugInfo/PDB/Native/StringTable.cpp b/lib/DebugInfo/PDB/Native/StringTable.cpp deleted file mode 100644 index 7e28389b8383..000000000000 --- a/lib/DebugInfo/PDB/Native/StringTable.cpp +++ /dev/null @@ -1,109 +0,0 @@ -//===- StringTable.cpp - PDB String Table -----------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "llvm/DebugInfo/PDB/Native/StringTable.h" - -#include "llvm/ADT/ArrayRef.h" -#include "llvm/DebugInfo/PDB/Native/Hash.h" -#include "llvm/DebugInfo/PDB/Native/RawError.h" -#include "llvm/DebugInfo/PDB/Native/RawTypes.h" -#include "llvm/Support/BinaryStreamReader.h" -#include "llvm/Support/Endian.h" - -using namespace llvm; -using namespace llvm::support; -using namespace llvm::pdb; - -StringTable::StringTable() {} - -Error StringTable::load(BinaryStreamReader &Stream) { - ByteSize = Stream.getLength(); - - const StringTableHeader *H; - if (auto EC = Stream.readObject(H)) - return EC; - - if (H->Signature != StringTableSignature) - return make_error<RawError>(raw_error_code::corrupt_file, - "Invalid hash table signature"); - if (H->HashVersion != 1 && H->HashVersion != 2) - return make_error<RawError>(raw_error_code::corrupt_file, - "Unsupported hash version"); - - Signature = H->Signature; - HashVersion = H->HashVersion; - if (auto EC = Stream.readStreamRef(NamesBuffer, H->ByteSize)) - return joinErrors(std::move(EC), - make_error<RawError>(raw_error_code::corrupt_file, - "Invalid hash table byte length")); - - const support::ulittle32_t *HashCount; - if (auto EC = Stream.readObject(HashCount)) - return EC; - - if (auto EC = Stream.readArray(IDs, *HashCount)) - return joinErrors(std::move(EC), - make_error<RawError>(raw_error_code::corrupt_file, - "Could not read bucket array")); - - if (Stream.bytesRemaining() < sizeof(support::ulittle32_t)) - return make_error<RawError>(raw_error_code::corrupt_file, - "Missing name count"); - - if (auto EC = Stream.readInteger(NameCount)) - return EC; - - if (Stream.bytesRemaining() > 0) - return make_error<RawError>(raw_error_code::stream_too_long, - "Unexpected bytes found in string table"); - - return Error::success(); -} - -uint32_t StringTable::getByteSize() const { - return ByteSize; -} - -StringRef StringTable::getStringForID(uint32_t ID) const { - if (ID == IDs[0]) - return StringRef(); - - // NamesBuffer is a buffer of null terminated strings back to back. ID is - // the starting offset of the string we're looking for. So just seek into - // the desired offset and a read a null terminated stream from that offset. - StringRef Result; - BinaryStreamReader NameReader(NamesBuffer); - NameReader.setOffset(ID); - if (auto EC = NameReader.readCString(Result)) - consumeError(std::move(EC)); - return Result; -} - -uint32_t StringTable::getIDForString(StringRef Str) const { - uint32_t Hash = (HashVersion == 1) ? hashStringV1(Str) : hashStringV2(Str); - size_t Count = IDs.size(); - uint32_t Start = Hash % Count; - for (size_t I = 0; I < Count; ++I) { - // The hash is just a starting point for the search, but if it - // doesn't work we should find the string no matter what, because - // we iterate the entire array. - uint32_t Index = (Start + I) % Count; - - uint32_t ID = IDs[Index]; - StringRef S = getStringForID(ID); - if (S == Str) - return ID; - } - // IDs[0] contains the ID of the "invalid" entry. - return IDs[0]; -} - -FixedStreamArray<support::ulittle32_t> StringTable::name_ids() const { - return IDs; -} diff --git a/lib/DebugInfo/PDB/Native/StringTableBuilder.cpp b/lib/DebugInfo/PDB/Native/StringTableBuilder.cpp deleted file mode 100644 index 40dc8e1bfcb1..000000000000 --- a/lib/DebugInfo/PDB/Native/StringTableBuilder.cpp +++ /dev/null @@ -1,108 +0,0 @@ -//===- StringTableBuilder.cpp - PDB String Table ----------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "llvm/DebugInfo/PDB/Native/StringTableBuilder.h" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/DebugInfo/PDB/Native/Hash.h" -#include "llvm/DebugInfo/PDB/Native/RawTypes.h" -#include "llvm/Support/BinaryStreamWriter.h" -#include "llvm/Support/Endian.h" - -using namespace llvm; -using namespace llvm::support; -using namespace llvm::support::endian; -using namespace llvm::pdb; - -uint32_t StringTableBuilder::insert(StringRef S) { - auto P = Strings.insert({S, StringSize}); - - // If a given string didn't exist in the string table, we want to increment - // the string table size. - if (P.second) - StringSize += S.size() + 1; // +1 for '\0' - return P.first->second; -} - -uint32_t StringTableBuilder::getStringIndex(StringRef S) { - auto Iter = Strings.find(S); - assert(Iter != Strings.end()); - return Iter->second; -} - -static uint32_t computeBucketCount(uint32_t NumStrings) { - // The /names stream is basically an on-disk open-addressing hash table. - // Hash collisions are resolved by linear probing. We cannot make - // utilization 100% because it will make the linear probing extremely - // slow. But lower utilization wastes disk space. As a reasonable - // load factor, we choose 80%. We need +1 because slot 0 is reserved. - return (NumStrings + 1) * 1.25; -} - -uint32_t StringTableBuilder::finalize() { - uint32_t Size = 0; - Size += sizeof(StringTableHeader); - Size += StringSize; - Size += sizeof(uint32_t); // Hash table begins with 4-byte size field. - - uint32_t BucketCount = computeBucketCount(Strings.size()); - Size += BucketCount * sizeof(uint32_t); - - Size += - sizeof(uint32_t); // The /names stream ends with the number of strings. - return Size; -} - -Error StringTableBuilder::commit(BinaryStreamWriter &Writer) const { - // Write a header - StringTableHeader H; - H.Signature = StringTableSignature; - H.HashVersion = 1; - H.ByteSize = StringSize; - if (auto EC = Writer.writeObject(H)) - return EC; - - // Write a string table. - uint32_t StringStart = Writer.getOffset(); - for (auto Pair : Strings) { - StringRef S = Pair.first; - uint32_t Offset = Pair.second; - Writer.setOffset(StringStart + Offset); - if (auto EC = Writer.writeCString(S)) - return EC; - } - Writer.setOffset(StringStart + StringSize); - - // Write a hash table. - uint32_t BucketCount = computeBucketCount(Strings.size()); - if (auto EC = Writer.writeInteger(BucketCount)) - return EC; - std::vector<ulittle32_t> Buckets(BucketCount); - - for (auto Pair : Strings) { - StringRef S = Pair.first; - uint32_t Offset = Pair.second; - uint32_t Hash = hashStringV1(S); - - for (uint32_t I = 0; I != BucketCount; ++I) { - uint32_t Slot = (Hash + I) % BucketCount; - if (Slot == 0) - continue; // Skip reserved slot - if (Buckets[Slot] != 0) - continue; - Buckets[Slot] = Offset; - break; - } - } - - if (auto EC = Writer.writeArray(ArrayRef<ulittle32_t>(Buckets))) - return EC; - if (auto EC = Writer.writeInteger(static_cast<uint32_t>(Strings.size()))) - return EC; - return Error::success(); -} diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp index f780137d0874..50f63fb8dd39 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp @@ -819,6 +819,34 @@ void RuntimeDyldELF::resolveSystemZRelocation(const SectionEntry &Section, } } +void RuntimeDyldELF::resolveBPFRelocation(const SectionEntry &Section, + uint64_t Offset, uint64_t Value, + uint32_t Type, int64_t Addend) { + bool isBE = Arch == Triple::bpfeb; + + switch (Type) { + default: + llvm_unreachable("Relocation type not implemented yet!"); + break; + case ELF::R_BPF_NONE: + break; + case ELF::R_BPF_64_64: { + write(isBE, Section.getAddressWithOffset(Offset), Value + Addend); + DEBUG(dbgs() << "Writing " << format("%p", (Value + Addend)) << " at " + << format("%p\n", Section.getAddressWithOffset(Offset))); + break; + } + case ELF::R_BPF_64_32: { + Value += Addend; + assert(Value <= UINT32_MAX); + write(isBE, Section.getAddressWithOffset(Offset), static_cast<uint32_t>(Value)); + DEBUG(dbgs() << "Writing " << format("%p", Value) << " at " + << format("%p\n", Section.getAddressWithOffset(Offset))); + break; + } + } +} + // The target location for the relocation is described by RE.SectionID and // RE.Offset. RE.SectionID can be used to find the SectionEntry. Each // SectionEntry has three members describing its location. @@ -879,6 +907,10 @@ void RuntimeDyldELF::resolveRelocation(const SectionEntry &Section, case Triple::systemz: resolveSystemZRelocation(Section, Offset, Value, Type, Addend); break; + case Triple::bpfel: + case Triple::bpfeb: + resolveBPFRelocation(Section, Offset, Value, Type, Addend); + break; default: llvm_unreachable("Unsupported CPU type!"); } diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h index 498979705b77..84dd810101f3 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h @@ -58,6 +58,9 @@ class RuntimeDyldELF : public RuntimeDyldImpl { void resolveSystemZRelocation(const SectionEntry &Section, uint64_t Offset, uint64_t Value, uint32_t Type, int64_t Addend); + void resolveBPFRelocation(const SectionEntry &Section, uint64_t Offset, + uint64_t Value, uint32_t Type, int64_t Addend); + unsigned getMaxStubSize() override { if (Arch == Triple::aarch64 || Arch == Triple::aarch64_be) return 20; // movz; movk; movk; movk; br diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index 62f127bd02e0..3b1140ab542c 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -936,7 +936,9 @@ AttributeList AttributeList::get(LLVMContext &C, AttributeList AttributeList::addAttribute(LLVMContext &C, unsigned Index, Attribute::AttrKind Kind) const { if (hasAttribute(Index, Kind)) return *this; - return addAttributes(C, Index, AttributeList::get(C, Index, Kind)); + AttrBuilder B; + B.addAttribute(Kind); + return addAttributes(C, Index, B); } AttributeList AttributeList::addAttribute(LLVMContext &C, unsigned Index, @@ -944,7 +946,7 @@ AttributeList AttributeList::addAttribute(LLVMContext &C, unsigned Index, StringRef Value) const { AttrBuilder B; B.addAttribute(Kind, Value); - return addAttributes(C, Index, AttributeList::get(C, Index, B)); + return addAttributes(C, Index, B); } AttributeList AttributeList::addAttribute(LLVMContext &C, @@ -978,14 +980,6 @@ AttributeList AttributeList::addAttribute(LLVMContext &C, } AttributeList AttributeList::addAttributes(LLVMContext &C, unsigned Index, - AttributeList Attrs) const { - if (!pImpl) return Attrs; - if (!Attrs.pImpl) return *this; - - return addAttributes(C, Index, Attrs.getAttributes(Index)); -} - -AttributeList AttributeList::addAttributes(LLVMContext &C, unsigned Index, const AttrBuilder &B) const { if (!B.hasAttributes()) return *this; @@ -1034,18 +1028,17 @@ AttributeList AttributeList::addAttributes(LLVMContext &C, unsigned Index, AttributeList AttributeList::removeAttribute(LLVMContext &C, unsigned Index, Attribute::AttrKind Kind) const { if (!hasAttribute(Index, Kind)) return *this; - return removeAttributes(C, Index, AttributeList::get(C, Index, Kind)); + AttrBuilder B; + B.addAttribute(Kind); + return removeAttributes(C, Index, B); } AttributeList AttributeList::removeAttribute(LLVMContext &C, unsigned Index, StringRef Kind) const { if (!hasAttribute(Index, Kind)) return *this; - return removeAttributes(C, Index, AttributeList::get(C, Index, Kind)); -} - -AttributeList AttributeList::removeAttributes(LLVMContext &C, unsigned Index, - AttributeList Attrs) const { - return removeAttributes(C, Index, AttrBuilder(Attrs.getAttributes(Index))); + AttrBuilder B; + B.addAttribute(Kind); + return removeAttributes(C, Index, B); } AttributeList AttributeList::removeAttributes(LLVMContext &C, unsigned Index, @@ -1103,7 +1096,7 @@ AttributeList AttributeList::addDereferenceableAttr(LLVMContext &C, uint64_t Bytes) const { AttrBuilder B; B.addDereferenceableAttr(Bytes); - return addAttributes(C, Index, AttributeList::get(C, Index, B)); + return addAttributes(C, Index, B); } AttributeList @@ -1111,7 +1104,7 @@ AttributeList::addDereferenceableOrNullAttr(LLVMContext &C, unsigned Index, uint64_t Bytes) const { AttrBuilder B; B.addDereferenceableOrNullAttr(Bytes); - return addAttributes(C, Index, AttributeList::get(C, Index, B)); + return addAttributes(C, Index, B); } AttributeList @@ -1120,7 +1113,7 @@ AttributeList::addAllocSizeAttr(LLVMContext &C, unsigned Index, const Optional<unsigned> &NumElemsArg) { AttrBuilder B; B.addAllocSizeAttr(ElemSizeArg, NumElemsArg); - return addAttributes(C, Index, AttributeList::get(C, Index, B)); + return addAttributes(C, Index, B); } //===----------------------------------------------------------------------===// @@ -1130,7 +1123,7 @@ AttributeList::addAllocSizeAttr(LLVMContext &C, unsigned Index, LLVMContext &AttributeList::getContext() const { return pImpl->getContext(); } AttributeSet AttributeList::getParamAttributes(unsigned ArgNo) const { - return getAttributes(ArgNo + 1); + return getAttributes(ArgNo + FirstArgIndex); } AttributeSet AttributeList::getRetAttributes() const { @@ -1196,7 +1189,7 @@ unsigned AttributeList::getRetAlignment() const { } unsigned AttributeList::getParamAlignment(unsigned ArgNo) const { - return getAttributes(ArgNo + 1).getAlignment(); + return getAttributes(ArgNo + FirstArgIndex).getAlignment(); } unsigned AttributeList::getStackAlignment(unsigned Index) const { @@ -1610,12 +1603,10 @@ static void adjustCallerSSPLevel(Function &Caller, const Function &Callee) { // If upgrading the SSP attribute, clear out the old SSP Attributes first. // Having multiple SSP attributes doesn't actually hurt, but it adds useless // clutter to the IR. - AttrBuilder B; - B.addAttribute(Attribute::StackProtect) - .addAttribute(Attribute::StackProtectStrong) - .addAttribute(Attribute::StackProtectReq); - AttributeList OldSSPAttr = - AttributeList::get(Caller.getContext(), AttributeList::FunctionIndex, B); + AttrBuilder OldSSPAttr; + OldSSPAttr.addAttribute(Attribute::StackProtect) + .addAttribute(Attribute::StackProtectStrong) + .addAttribute(Attribute::StackProtectReq); if (Callee.hasFnAttribute(Attribute::StackProtectReq)) { Caller.removeAttributes(AttributeList::FunctionIndex, OldSSPAttr); diff --git a/lib/IR/AutoUpgrade.cpp b/lib/IR/AutoUpgrade.cpp index 2897434a2b8d..8bcba7672315 100644 --- a/lib/IR/AutoUpgrade.cpp +++ b/lib/IR/AutoUpgrade.cpp @@ -467,6 +467,27 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { return true; } } + // Renaming gather/scatter intrinsics with no address space overloading + // to the new overload which includes an address space + if (Name.startswith("masked.gather.")) { + Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()}; + if (F->getName() != Intrinsic::getName(Intrinsic::masked_gather, Tys)) { + rename(F); + NewFn = Intrinsic::getDeclaration(F->getParent(), + Intrinsic::masked_gather, Tys); + return true; + } + } + if (Name.startswith("masked.scatter.")) { + auto Args = F->getFunctionType()->params(); + Type *Tys[] = {Args[0], Args[1]}; + if (F->getName() != Intrinsic::getName(Intrinsic::masked_scatter, Tys)) { + rename(F); + NewFn = Intrinsic::getDeclaration(F->getParent(), + Intrinsic::masked_scatter, Tys); + return true; + } + } break; } case 'n': { @@ -2072,7 +2093,9 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { case Intrinsic::invariant_start: case Intrinsic::invariant_end: case Intrinsic::masked_load: - case Intrinsic::masked_store: { + case Intrinsic::masked_store: + case Intrinsic::masked_gather: + case Intrinsic::masked_scatter: { SmallVector<Value *, 4> Args(CI->arg_operands().begin(), CI->arg_operands().end()); NewCall = Builder.CreateCall(NewFn, Args); diff --git a/lib/IR/Function.cpp b/lib/IR/Function.cpp index fc61ba7439ba..58c060550322 100644 --- a/lib/IR/Function.cpp +++ b/lib/IR/Function.cpp @@ -90,13 +90,15 @@ unsigned Argument::getParamAlignment() const { uint64_t Argument::getDereferenceableBytes() const { assert(getType()->isPointerTy() && "Only pointers have dereferenceable bytes"); - return getParent()->getDereferenceableBytes(getArgNo()+1); + return getParent()->getDereferenceableBytes(getArgNo() + + AttributeList::FirstArgIndex); } uint64_t Argument::getDereferenceableOrNullBytes() const { assert(getType()->isPointerTy() && "Only pointers have dereferenceable bytes"); - return getParent()->getDereferenceableOrNullBytes(getArgNo()+1); + return getParent()->getDereferenceableOrNullBytes( + getArgNo() + AttributeList::FirstArgIndex); } bool Argument::hasNestAttr() const { @@ -139,20 +141,21 @@ bool Argument::onlyReadsMemory() const { void Argument::addAttrs(AttrBuilder &B) { AttributeList AL = getParent()->getAttributes(); - AL = AL.addAttributes(Parent->getContext(), getArgNo() + 1, B); + AL = AL.addAttributes(Parent->getContext(), + getArgNo() + AttributeList::FirstArgIndex, B); getParent()->setAttributes(AL); } void Argument::addAttr(Attribute::AttrKind Kind) { - getParent()->addAttribute(getArgNo() + 1, Kind); + getParent()->addAttribute(getArgNo() + AttributeList::FirstArgIndex, Kind); } void Argument::addAttr(Attribute Attr) { - getParent()->addAttribute(getArgNo() + 1, Attr); + getParent()->addAttribute(getArgNo() + AttributeList::FirstArgIndex, Attr); } void Argument::removeAttr(Attribute::AttrKind Kind) { - getParent()->removeAttribute(getArgNo() + 1, Kind); + getParent()->removeAttribute(getArgNo() + AttributeList::FirstArgIndex, Kind); } bool Argument::hasAttribute(Attribute::AttrKind Kind) const { @@ -328,7 +331,7 @@ void Function::addAttribute(unsigned i, Attribute Attr) { setAttributes(PAL); } -void Function::addAttributes(unsigned i, AttributeList Attrs) { +void Function::addAttributes(unsigned i, const AttrBuilder &Attrs) { AttributeList PAL = getAttributes(); PAL = PAL.addAttributes(getContext(), i, Attrs); setAttributes(PAL); @@ -346,7 +349,7 @@ void Function::removeAttribute(unsigned i, StringRef Kind) { setAttributes(PAL); } -void Function::removeAttributes(unsigned i, AttributeList Attrs) { +void Function::removeAttributes(unsigned i, const AttrBuilder &Attrs) { AttributeList PAL = getAttributes(); PAL = PAL.removeAttributes(getContext(), i, Attrs); setAttributes(PAL); @@ -574,13 +577,12 @@ enum IIT_Info { IIT_SAME_VEC_WIDTH_ARG = 31, IIT_PTR_TO_ARG = 32, IIT_PTR_TO_ELT = 33, - IIT_VEC_OF_PTRS_TO_ELT = 34, + IIT_VEC_OF_ANYPTRS_TO_ELT = 34, IIT_I128 = 35, IIT_V512 = 36, IIT_V1024 = 37 }; - static void DecodeIITType(unsigned &NextElt, ArrayRef<unsigned char> Infos, SmallVectorImpl<Intrinsic::IITDescriptor> &OutputTable) { IIT_Info Info = IIT_Info(Infos[NextElt++]); @@ -716,10 +718,11 @@ static void DecodeIITType(unsigned &NextElt, ArrayRef<unsigned char> Infos, OutputTable.push_back(IITDescriptor::get(IITDescriptor::PtrToElt, ArgInfo)); return; } - case IIT_VEC_OF_PTRS_TO_ELT: { - unsigned ArgInfo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]); - OutputTable.push_back(IITDescriptor::get(IITDescriptor::VecOfPtrsToElt, - ArgInfo)); + case IIT_VEC_OF_ANYPTRS_TO_ELT: { + unsigned short ArgNo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]); + unsigned short RefNo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]); + OutputTable.push_back( + IITDescriptor::get(IITDescriptor::VecOfAnyPtrsToElt, ArgNo, RefNo)); return; } case IIT_EMPTYSTRUCT: @@ -808,7 +811,6 @@ static Type *DecodeFixedType(ArrayRef<Intrinsic::IITDescriptor> &Infos, Elts[i] = DecodeFixedType(Infos, Tys, Context); return StructType::get(Context, makeArrayRef(Elts,D.Struct_NumElements)); } - case IITDescriptor::Argument: return Tys[D.getArgumentNumber()]; case IITDescriptor::ExtendArgument: { @@ -850,15 +852,9 @@ static Type *DecodeFixedType(ArrayRef<Intrinsic::IITDescriptor> &Infos, Type *EltTy = VTy->getVectorElementType(); return PointerType::getUnqual(EltTy); } - case IITDescriptor::VecOfPtrsToElt: { - Type *Ty = Tys[D.getArgumentNumber()]; - VectorType *VTy = dyn_cast<VectorType>(Ty); - if (!VTy) - llvm_unreachable("Expected an argument of Vector Type"); - Type *EltTy = VTy->getVectorElementType(); - return VectorType::get(PointerType::getUnqual(EltTy), - VTy->getNumElements()); - } + case IITDescriptor::VecOfAnyPtrsToElt: + // Return the overloaded type (which determines the pointers address space) + return Tys[D.getOverloadArgNumber()]; } llvm_unreachable("unhandled"); } @@ -1054,11 +1050,22 @@ bool Intrinsic::matchIntrinsicType(Type *Ty, ArrayRef<Intrinsic::IITDescriptor> return (!ThisArgType || !ReferenceType || ThisArgType->getElementType() != ReferenceType->getElementType()); } - case IITDescriptor::VecOfPtrsToElt: { - if (D.getArgumentNumber() >= ArgTys.size()) + case IITDescriptor::VecOfAnyPtrsToElt: { + unsigned RefArgNumber = D.getRefArgNumber(); + + // This may only be used when referring to a previous argument. + if (RefArgNumber >= ArgTys.size()) return true; - VectorType * ReferenceType = - dyn_cast<VectorType> (ArgTys[D.getArgumentNumber()]); + + // Record the overloaded type + assert(D.getOverloadArgNumber() == ArgTys.size() && + "Table consistency error"); + ArgTys.push_back(Ty); + + // Verify the overloaded type "matches" the Ref type. + // i.e. Ty is a vector with the same width as Ref. + // Composed of pointers to the same element type as Ref. + VectorType *ReferenceType = dyn_cast<VectorType>(ArgTys[RefArgNumber]); VectorType *ThisArgVecTy = dyn_cast<VectorType>(Ty); if (!ThisArgVecTy || !ReferenceType || (ReferenceType->getVectorNumElements() != diff --git a/lib/IR/IRBuilder.cpp b/lib/IR/IRBuilder.cpp index fd5ae71a2f3c..e265a823687f 100644 --- a/lib/IR/IRBuilder.cpp +++ b/lib/IR/IRBuilder.cpp @@ -293,11 +293,13 @@ CallInst *IRBuilderBase::CreateMaskedGather(Value *Ptrs, unsigned Align, Mask = Constant::getAllOnesValue(VectorType::get(Type::getInt1Ty(Context), NumElts)); + Type *OverloadedTypes[] = {DataTy, PtrsTy}; Value * Ops[] = {Ptrs, getInt32(Align), Mask, UndefValue::get(DataTy)}; // We specify only one type when we create this intrinsic. Types of other // arguments are derived from this type. - return CreateMaskedIntrinsic(Intrinsic::masked_gather, Ops, { DataTy }, Name); + return CreateMaskedIntrinsic(Intrinsic::masked_gather, Ops, OverloadedTypes, + Name); } /// \brief Create a call to a Masked Scatter intrinsic. @@ -323,11 +325,13 @@ CallInst *IRBuilderBase::CreateMaskedScatter(Value *Data, Value *Ptrs, if (!Mask) Mask = Constant::getAllOnesValue(VectorType::get(Type::getInt1Ty(Context), NumElts)); + + Type *OverloadedTypes[] = {DataTy, PtrsTy}; Value * Ops[] = {Data, Ptrs, getInt32(Align), Mask}; // We specify only one type when we create this intrinsic. Types of other // arguments are derived from this type. - return CreateMaskedIntrinsic(Intrinsic::masked_scatter, Ops, { DataTy }); + return CreateMaskedIntrinsic(Intrinsic::masked_scatter, Ops, OverloadedTypes); } template <typename T0, typename T1, typename T2, typename T3> diff --git a/lib/IR/Instructions.cpp b/lib/IR/Instructions.cpp index 59500992abb4..a60cc375d568 100644 --- a/lib/IR/Instructions.cpp +++ b/lib/IR/Instructions.cpp @@ -335,12 +335,12 @@ Value *CallInst::getReturnedArgOperand() const { unsigned Index; if (Attrs.hasAttrSomewhere(Attribute::Returned, &Index) && Index) - return getArgOperand(Index-1); + return getArgOperand(Index - AttributeList::FirstArgIndex); if (const Function *F = getCalledFunction()) if (F->getAttributes().hasAttrSomewhere(Attribute::Returned, &Index) && Index) - return getArgOperand(Index-1); - + return getArgOperand(Index - AttributeList::FirstArgIndex); + return nullptr; } @@ -356,6 +356,10 @@ void CallInst::addAttribute(unsigned i, Attribute Attr) { setAttributes(PAL); } +void CallInst::addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) { + addAttribute(ArgNo + AttributeList::FirstArgIndex, Kind); +} + void CallInst::removeAttribute(unsigned i, Attribute::AttrKind Kind) { AttributeList PAL = getAttributes(); PAL = PAL.removeAttribute(getContext(), i, Kind); @@ -368,6 +372,10 @@ void CallInst::removeAttribute(unsigned i, StringRef Kind) { setAttributes(PAL); } +void CallInst::removeParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) { + removeAttribute(ArgNo + AttributeList::FirstArgIndex, Kind); +} + void CallInst::addDereferenceableAttr(unsigned i, uint64_t Bytes) { AttributeList PAL = getAttributes(); PAL = PAL.addDereferenceableAttr(getContext(), i, Bytes); @@ -501,8 +509,8 @@ static Instruction *createMalloc(Instruction *InsertBefore, MCall->setTailCall(); if (Function *F = dyn_cast<Function>(MallocFunc)) { MCall->setCallingConv(F->getCallingConv()); - if (!F->doesNotAlias(AttributeList::ReturnIndex)) - F->setDoesNotAlias(AttributeList::ReturnIndex); + if (!F->returnDoesNotAlias()) + F->setReturnDoesNotAlias(); } assert(!MCall->getType()->isVoidTy() && "Malloc has void return type"); @@ -695,12 +703,12 @@ Value *InvokeInst::getReturnedArgOperand() const { unsigned Index; if (Attrs.hasAttrSomewhere(Attribute::Returned, &Index) && Index) - return getArgOperand(Index-1); + return getArgOperand(Index - AttributeList::FirstArgIndex); if (const Function *F = getCalledFunction()) if (F->getAttributes().hasAttrSomewhere(Attribute::Returned, &Index) && Index) - return getArgOperand(Index-1); - + return getArgOperand(Index - AttributeList::FirstArgIndex); + return nullptr; } @@ -756,6 +764,10 @@ void InvokeInst::addAttribute(unsigned i, Attribute Attr) { setAttributes(PAL); } +void InvokeInst::addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) { + addAttribute(ArgNo + AttributeList::FirstArgIndex, Kind); +} + void InvokeInst::removeAttribute(unsigned i, Attribute::AttrKind Kind) { AttributeList PAL = getAttributes(); PAL = PAL.removeAttribute(getContext(), i, Kind); @@ -768,6 +780,10 @@ void InvokeInst::removeAttribute(unsigned i, StringRef Kind) { setAttributes(PAL); } +void InvokeInst::removeParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) { + removeAttribute(ArgNo + AttributeList::FirstArgIndex, Kind); +} + void InvokeInst::addDereferenceableAttr(unsigned i, uint64_t Bytes) { AttributeList PAL = getAttributes(); PAL = PAL.addDereferenceableAttr(getContext(), i, Bytes); diff --git a/lib/IR/LLVMContextImpl.h b/lib/IR/LLVMContextImpl.h index 7185736fa2e0..9db30da89ed0 100644 --- a/lib/IR/LLVMContextImpl.h +++ b/lib/IR/LLVMContextImpl.h @@ -52,12 +52,12 @@ class Value; struct DenseMapAPIntKeyInfo { static inline APInt getEmptyKey() { APInt V(nullptr, 0); - V.VAL = 0; + V.U.VAL = 0; return V; } static inline APInt getTombstoneKey() { APInt V(nullptr, 0); - V.VAL = 1; + V.U.VAL = 1; return V; } static unsigned getHashValue(const APInt &Key) { diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp index 9f94264684f9..b685790910d0 100644 --- a/lib/MC/MCObjectFileInfo.cpp +++ b/lib/MC/MCObjectFileInfo.cpp @@ -286,6 +286,10 @@ void MCObjectFileInfo::initELFMCObjectFileInfo(const Triple &T) { ((CMModel == CodeModel::Large) ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4); break; + case Triple::bpfel: + case Triple::bpfeb: + FDECFIEncoding = dwarf::DW_EH_PE_sdata8; + break; default: FDECFIEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; break; diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp index b6c8cbee66df..fa81b28cd083 100644 --- a/lib/Support/APInt.cpp +++ b/lib/Support/APInt.cpp @@ -76,34 +76,31 @@ inline static unsigned getDigit(char cdigit, uint8_t radix) { void APInt::initSlowCase(uint64_t val, bool isSigned) { - VAL = 0; - pVal = getClearedMemory(getNumWords()); - pVal[0] = val; + U.pVal = getClearedMemory(getNumWords()); + U.pVal[0] = val; if (isSigned && int64_t(val) < 0) for (unsigned i = 1; i < getNumWords(); ++i) - pVal[i] = WORD_MAX; + U.pVal[i] = WORD_MAX; clearUnusedBits(); } void APInt::initSlowCase(const APInt& that) { - VAL = 0; - pVal = getMemory(getNumWords()); - memcpy(pVal, that.pVal, getNumWords() * APINT_WORD_SIZE); + U.pVal = getMemory(getNumWords()); + memcpy(U.pVal, that.U.pVal, getNumWords() * APINT_WORD_SIZE); } void APInt::initFromArray(ArrayRef<uint64_t> bigVal) { assert(BitWidth && "Bitwidth too small"); assert(bigVal.data() && "Null pointer detected!"); if (isSingleWord()) - VAL = bigVal[0]; + U.VAL = bigVal[0]; else { // Get memory, cleared to 0 - VAL = 0; - pVal = getClearedMemory(getNumWords()); + U.pVal = getClearedMemory(getNumWords()); // Calculate the number of words to copy unsigned words = std::min<unsigned>(bigVal.size(), getNumWords()); // Copy the words from bigVal to pVal - memcpy(pVal, bigVal.data(), words * APINT_WORD_SIZE); + memcpy(U.pVal, bigVal.data(), words * APINT_WORD_SIZE); } // Make sure unused high bits are cleared clearUnusedBits(); @@ -120,7 +117,7 @@ APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[]) } APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix) - : VAL(0), BitWidth(numbits) { + : BitWidth(numbits) { assert(BitWidth && "Bitwidth too small"); fromString(numbits, Str, radix); } @@ -133,25 +130,24 @@ void APInt::AssignSlowCase(const APInt& RHS) { if (BitWidth == RHS.getBitWidth()) { // assume same bit-width single-word case is already handled assert(!isSingleWord()); - memcpy(pVal, RHS.pVal, getNumWords() * APINT_WORD_SIZE); + memcpy(U.pVal, RHS.U.pVal, getNumWords() * APINT_WORD_SIZE); return; } if (isSingleWord()) { // assume case where both are single words is already handled assert(!RHS.isSingleWord()); - VAL = 0; - pVal = getMemory(RHS.getNumWords()); - memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE); + U.pVal = getMemory(RHS.getNumWords()); + memcpy(U.pVal, RHS.U.pVal, RHS.getNumWords() * APINT_WORD_SIZE); } else if (getNumWords() == RHS.getNumWords()) - memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE); + memcpy(U.pVal, RHS.U.pVal, RHS.getNumWords() * APINT_WORD_SIZE); else if (RHS.isSingleWord()) { - delete [] pVal; - VAL = RHS.VAL; + delete [] U.pVal; + U.VAL = RHS.U.VAL; } else { - delete [] pVal; - pVal = getMemory(RHS.getNumWords()); - memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE); + delete [] U.pVal; + U.pVal = getMemory(RHS.getNumWords()); + memcpy(U.pVal, RHS.U.pVal, RHS.getNumWords() * APINT_WORD_SIZE); } BitWidth = RHS.BitWidth; clearUnusedBits(); @@ -162,30 +158,30 @@ void APInt::Profile(FoldingSetNodeID& ID) const { ID.AddInteger(BitWidth); if (isSingleWord()) { - ID.AddInteger(VAL); + ID.AddInteger(U.VAL); return; } unsigned NumWords = getNumWords(); for (unsigned i = 0; i < NumWords; ++i) - ID.AddInteger(pVal[i]); + ID.AddInteger(U.pVal[i]); } /// @brief Prefix increment operator. Increments the APInt by one. APInt& APInt::operator++() { if (isSingleWord()) - ++VAL; + ++U.VAL; else - tcIncrement(pVal, getNumWords()); + tcIncrement(U.pVal, getNumWords()); return clearUnusedBits(); } /// @brief Prefix decrement operator. Decrements the APInt by one. APInt& APInt::operator--() { if (isSingleWord()) - --VAL; + --U.VAL; else - tcDecrement(pVal, getNumWords()); + tcDecrement(U.pVal, getNumWords()); return clearUnusedBits(); } @@ -195,17 +191,17 @@ APInt& APInt::operator--() { APInt& APInt::operator+=(const APInt& RHS) { assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); if (isSingleWord()) - VAL += RHS.VAL; + U.VAL += RHS.U.VAL; else - tcAdd(pVal, RHS.pVal, 0, getNumWords()); + tcAdd(U.pVal, RHS.U.pVal, 0, getNumWords()); return clearUnusedBits(); } APInt& APInt::operator+=(uint64_t RHS) { if (isSingleWord()) - VAL += RHS; + U.VAL += RHS; else - tcAddPart(pVal, RHS, getNumWords()); + tcAddPart(U.pVal, RHS, getNumWords()); return clearUnusedBits(); } @@ -215,17 +211,17 @@ APInt& APInt::operator+=(uint64_t RHS) { APInt& APInt::operator-=(const APInt& RHS) { assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); if (isSingleWord()) - VAL -= RHS.VAL; + U.VAL -= RHS.U.VAL; else - tcSubtract(pVal, RHS.pVal, 0, getNumWords()); + tcSubtract(U.pVal, RHS.U.pVal, 0, getNumWords()); return clearUnusedBits(); } APInt& APInt::operator-=(uint64_t RHS) { if (isSingleWord()) - VAL -= RHS; + U.VAL -= RHS; else - tcSubtractPart(pVal, RHS, getNumWords()); + tcSubtractPart(U.pVal, RHS, getNumWords()); return clearUnusedBits(); } @@ -300,7 +296,7 @@ static void mul(uint64_t dest[], uint64_t x[], unsigned xlen, uint64_t y[], APInt& APInt::operator*=(const APInt& RHS) { assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); if (isSingleWord()) { - VAL *= RHS.VAL; + U.VAL *= RHS.U.VAL; clearUnusedBits(); return *this; } @@ -326,12 +322,12 @@ APInt& APInt::operator*=(const APInt& RHS) { uint64_t *dest = getMemory(destWords); // Perform the long multiply - mul(dest, pVal, lhsWords, RHS.pVal, rhsWords); + mul(dest, U.pVal, lhsWords, RHS.U.pVal, rhsWords); // Copy result back into *this clearAllBits(); unsigned wordsToCopy = destWords >= getNumWords() ? getNumWords() : destWords; - memcpy(pVal, dest, wordsToCopy * APINT_WORD_SIZE); + memcpy(U.pVal, dest, wordsToCopy * APINT_WORD_SIZE); clearUnusedBits(); // delete dest array and return @@ -340,43 +336,43 @@ APInt& APInt::operator*=(const APInt& RHS) { } void APInt::AndAssignSlowCase(const APInt& RHS) { - tcAnd(pVal, RHS.pVal, getNumWords()); + tcAnd(U.pVal, RHS.U.pVal, getNumWords()); } void APInt::OrAssignSlowCase(const APInt& RHS) { - tcOr(pVal, RHS.pVal, getNumWords()); + tcOr(U.pVal, RHS.U.pVal, getNumWords()); } void APInt::XorAssignSlowCase(const APInt& RHS) { - tcXor(pVal, RHS.pVal, getNumWords()); + tcXor(U.pVal, RHS.U.pVal, getNumWords()); } APInt APInt::operator*(const APInt& RHS) const { assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); if (isSingleWord()) - return APInt(BitWidth, VAL * RHS.VAL); + return APInt(BitWidth, U.VAL * RHS.U.VAL); APInt Result(*this); Result *= RHS; return Result; } bool APInt::EqualSlowCase(const APInt& RHS) const { - return std::equal(pVal, pVal + getNumWords(), RHS.pVal); + return std::equal(U.pVal, U.pVal + getNumWords(), RHS.U.pVal); } int APInt::compare(const APInt& RHS) const { assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison"); if (isSingleWord()) - return VAL < RHS.VAL ? -1 : VAL > RHS.VAL; + return U.VAL < RHS.U.VAL ? -1 : U.VAL > RHS.U.VAL; - return tcCompare(pVal, RHS.pVal, getNumWords()); + return tcCompare(U.pVal, RHS.U.pVal, getNumWords()); } int APInt::compareSigned(const APInt& RHS) const { assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison"); if (isSingleWord()) { - int64_t lhsSext = SignExtend64(VAL, BitWidth); - int64_t rhsSext = SignExtend64(RHS.VAL, BitWidth); + int64_t lhsSext = SignExtend64(U.VAL, BitWidth); + int64_t rhsSext = SignExtend64(RHS.U.VAL, BitWidth); return lhsSext < rhsSext ? -1 : lhsSext > rhsSext; } @@ -389,7 +385,7 @@ int APInt::compareSigned(const APInt& RHS) const { // Otherwise we can just use an unsigned comparison, because even negative // numbers compare correctly this way if both have the same signed-ness. - return tcCompare(pVal, RHS.pVal, getNumWords()); + return tcCompare(U.pVal, RHS.U.pVal, getNumWords()); } void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) { @@ -409,19 +405,19 @@ void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) { if (hiWord == loWord) loMask &= hiMask; else - pVal[hiWord] |= hiMask; + U.pVal[hiWord] |= hiMask; } // Apply the mask to the low word. - pVal[loWord] |= loMask; + U.pVal[loWord] |= loMask; // Fill any words between loWord and hiWord with all ones. for (unsigned word = loWord + 1; word < hiWord; ++word) - pVal[word] = WORD_MAX; + U.pVal[word] = WORD_MAX; } /// @brief Toggle every bit to its opposite value. void APInt::flipAllBitsSlowCase() { - tcComplement(pVal, getNumWords()); + tcComplement(U.pVal, getNumWords()); clearUnusedBits(); } @@ -448,8 +444,8 @@ void APInt::insertBits(const APInt &subBits, unsigned bitPosition) { // Single word result can be done as a direct bitmask. if (isSingleWord()) { uint64_t mask = WORD_MAX >> (APINT_BITS_PER_WORD - subBitWidth); - VAL &= ~(mask << bitPosition); - VAL |= (subBits.VAL << bitPosition); + U.VAL &= ~(mask << bitPosition); + U.VAL |= (subBits.U.VAL << bitPosition); return; } @@ -460,8 +456,8 @@ void APInt::insertBits(const APInt &subBits, unsigned bitPosition) { // Insertion within a single word can be done as a direct bitmask. if (loWord == hi1Word) { uint64_t mask = WORD_MAX >> (APINT_BITS_PER_WORD - subBitWidth); - pVal[loWord] &= ~(mask << loBit); - pVal[loWord] |= (subBits.VAL << loBit); + U.pVal[loWord] &= ~(mask << loBit); + U.pVal[loWord] |= (subBits.U.VAL << loBit); return; } @@ -469,15 +465,15 @@ void APInt::insertBits(const APInt &subBits, unsigned bitPosition) { if (loBit == 0) { // Direct copy whole words. unsigned numWholeSubWords = subBitWidth / APINT_BITS_PER_WORD; - memcpy(pVal + loWord, subBits.getRawData(), + memcpy(U.pVal + loWord, subBits.getRawData(), numWholeSubWords * APINT_WORD_SIZE); // Mask+insert remaining bits. unsigned remainingBits = subBitWidth % APINT_BITS_PER_WORD; if (remainingBits != 0) { uint64_t mask = WORD_MAX >> (APINT_BITS_PER_WORD - remainingBits); - pVal[hi1Word] &= ~mask; - pVal[hi1Word] |= subBits.getWord(subBitWidth - 1); + U.pVal[hi1Word] &= ~mask; + U.pVal[hi1Word] |= subBits.getWord(subBitWidth - 1); } return; } @@ -499,7 +495,7 @@ APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const { "Illegal bit extraction"); if (isSingleWord()) - return APInt(numBits, VAL >> bitPosition); + return APInt(numBits, U.VAL >> bitPosition); unsigned loBit = whichBit(bitPosition); unsigned loWord = whichWord(bitPosition); @@ -507,12 +503,12 @@ APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const { // Single word result extracting bits from a single word source. if (loWord == hiWord) - return APInt(numBits, pVal[loWord] >> loBit); + return APInt(numBits, U.pVal[loWord] >> loBit); // Extracting bits that start on a source word boundary can be done // as a fast memory copy. if (loBit == 0) - return APInt(numBits, makeArrayRef(pVal + loWord, 1 + hiWord - loWord)); + return APInt(numBits, makeArrayRef(U.pVal + loWord, 1 + hiWord - loWord)); // General case - shift + copy source words directly into place. APInt Result(numBits, 0); @@ -520,10 +516,10 @@ APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const { unsigned NumDstWords = Result.getNumWords(); for (unsigned word = 0; word < NumDstWords; ++word) { - uint64_t w0 = pVal[loWord + word]; + uint64_t w0 = U.pVal[loWord + word]; uint64_t w1 = - (loWord + word + 1) < NumSrcWords ? pVal[loWord + word + 1] : 0; - Result.pVal[word] = (w0 >> loBit) | (w1 << (APINT_BITS_PER_WORD - loBit)); + (loWord + word + 1) < NumSrcWords ? U.pVal[loWord + word + 1] : 0; + Result.U.pVal[word] = (w0 >> loBit) | (w1 << (APINT_BITS_PER_WORD - loBit)); } return Result.clearUnusedBits(); @@ -584,9 +580,9 @@ unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) { hash_code llvm::hash_value(const APInt &Arg) { if (Arg.isSingleWord()) - return hash_combine(Arg.VAL); + return hash_combine(Arg.U.VAL); - return hash_combine_range(Arg.pVal, Arg.pVal + Arg.getNumWords()); + return hash_combine_range(Arg.U.pVal, Arg.U.pVal + Arg.getNumWords()); } bool APInt::isSplat(unsigned SplatSizeInBits) const { @@ -623,7 +619,7 @@ APInt APInt::getSplat(unsigned NewLen, const APInt &V) { unsigned APInt::countLeadingZerosSlowCase() const { unsigned Count = 0; for (int i = getNumWords()-1; i >= 0; --i) { - uint64_t V = pVal[i]; + uint64_t V = U.pVal[i]; if (V == 0) Count += APINT_BITS_PER_WORD; else { @@ -639,7 +635,7 @@ unsigned APInt::countLeadingZerosSlowCase() const { unsigned APInt::countLeadingOnes() const { if (isSingleWord()) - return llvm::countLeadingOnes(VAL << (APINT_BITS_PER_WORD - BitWidth)); + return llvm::countLeadingOnes(U.VAL << (APINT_BITS_PER_WORD - BitWidth)); unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD; unsigned shift; @@ -650,13 +646,13 @@ unsigned APInt::countLeadingOnes() const { shift = APINT_BITS_PER_WORD - highWordBits; } int i = getNumWords() - 1; - unsigned Count = llvm::countLeadingOnes(pVal[i] << shift); + unsigned Count = llvm::countLeadingOnes(U.pVal[i] << shift); if (Count == highWordBits) { for (i--; i >= 0; --i) { - if (pVal[i] == WORD_MAX) + if (U.pVal[i] == WORD_MAX) Count += APINT_BITS_PER_WORD; else { - Count += llvm::countLeadingOnes(pVal[i]); + Count += llvm::countLeadingOnes(U.pVal[i]); break; } } @@ -666,23 +662,23 @@ unsigned APInt::countLeadingOnes() const { unsigned APInt::countTrailingZeros() const { if (isSingleWord()) - return std::min(unsigned(llvm::countTrailingZeros(VAL)), BitWidth); + return std::min(unsigned(llvm::countTrailingZeros(U.VAL)), BitWidth); unsigned Count = 0; unsigned i = 0; - for (; i < getNumWords() && pVal[i] == 0; ++i) + for (; i < getNumWords() && U.pVal[i] == 0; ++i) Count += APINT_BITS_PER_WORD; if (i < getNumWords()) - Count += llvm::countTrailingZeros(pVal[i]); + Count += llvm::countTrailingZeros(U.pVal[i]); return std::min(Count, BitWidth); } unsigned APInt::countTrailingOnesSlowCase() const { unsigned Count = 0; unsigned i = 0; - for (; i < getNumWords() && pVal[i] == WORD_MAX; ++i) + for (; i < getNumWords() && U.pVal[i] == WORD_MAX; ++i) Count += APINT_BITS_PER_WORD; if (i < getNumWords()) - Count += llvm::countTrailingOnes(pVal[i]); + Count += llvm::countTrailingOnes(U.pVal[i]); assert(Count <= BitWidth); return Count; } @@ -690,13 +686,13 @@ unsigned APInt::countTrailingOnesSlowCase() const { unsigned APInt::countPopulationSlowCase() const { unsigned Count = 0; for (unsigned i = 0; i < getNumWords(); ++i) - Count += llvm::countPopulation(pVal[i]); + Count += llvm::countPopulation(U.pVal[i]); return Count; } bool APInt::intersectsSlowCase(const APInt &RHS) const { for (unsigned i = 0, e = getNumWords(); i != e; ++i) - if ((pVal[i] & RHS.pVal[i]) != 0) + if ((U.pVal[i] & RHS.U.pVal[i]) != 0) return true; return false; @@ -704,7 +700,7 @@ bool APInt::intersectsSlowCase(const APInt &RHS) const { bool APInt::isSubsetOfSlowCase(const APInt &RHS) const { for (unsigned i = 0, e = getNumWords(); i != e; ++i) - if ((pVal[i] & ~RHS.pVal[i]) != 0) + if ((U.pVal[i] & ~RHS.U.pVal[i]) != 0) return false; return true; @@ -713,22 +709,22 @@ bool APInt::isSubsetOfSlowCase(const APInt &RHS) const { APInt APInt::byteSwap() const { assert(BitWidth >= 16 && BitWidth % 16 == 0 && "Cannot byteswap!"); if (BitWidth == 16) - return APInt(BitWidth, ByteSwap_16(uint16_t(VAL))); + return APInt(BitWidth, ByteSwap_16(uint16_t(U.VAL))); if (BitWidth == 32) - return APInt(BitWidth, ByteSwap_32(unsigned(VAL))); + return APInt(BitWidth, ByteSwap_32(unsigned(U.VAL))); if (BitWidth == 48) { - unsigned Tmp1 = unsigned(VAL >> 16); + unsigned Tmp1 = unsigned(U.VAL >> 16); Tmp1 = ByteSwap_32(Tmp1); - uint16_t Tmp2 = uint16_t(VAL); + uint16_t Tmp2 = uint16_t(U.VAL); Tmp2 = ByteSwap_16(Tmp2); return APInt(BitWidth, (uint64_t(Tmp2) << 32) | Tmp1); } if (BitWidth == 64) - return APInt(BitWidth, ByteSwap_64(VAL)); + return APInt(BitWidth, ByteSwap_64(U.VAL)); APInt Result(getNumWords() * APINT_BITS_PER_WORD, 0); for (unsigned I = 0, N = getNumWords(); I != N; ++I) - Result.pVal[I] = ByteSwap_64(pVal[N - I - 1]); + Result.U.pVal[I] = ByteSwap_64(U.pVal[N - I - 1]); if (Result.BitWidth != BitWidth) { Result.lshrInPlace(Result.BitWidth - BitWidth); Result.BitWidth = BitWidth; @@ -739,13 +735,13 @@ APInt APInt::byteSwap() const { APInt APInt::reverseBits() const { switch (BitWidth) { case 64: - return APInt(BitWidth, llvm::reverseBits<uint64_t>(VAL)); + return APInt(BitWidth, llvm::reverseBits<uint64_t>(U.VAL)); case 32: - return APInt(BitWidth, llvm::reverseBits<uint32_t>(VAL)); + return APInt(BitWidth, llvm::reverseBits<uint32_t>(U.VAL)); case 16: - return APInt(BitWidth, llvm::reverseBits<uint16_t>(VAL)); + return APInt(BitWidth, llvm::reverseBits<uint16_t>(U.VAL)); case 8: - return APInt(BitWidth, llvm::reverseBits<uint8_t>(VAL)); + return APInt(BitWidth, llvm::reverseBits<uint8_t>(U.VAL)); default: break; } @@ -890,13 +886,13 @@ double APInt::roundToDouble(bool isSigned) const { uint64_t mantissa; unsigned hiWord = whichWord(n-1); if (hiWord == 0) { - mantissa = Tmp.pVal[0]; + mantissa = Tmp.U.pVal[0]; if (n > 52) mantissa >>= n - 52; // shift down, we want the top 52 bits. } else { assert(hiWord > 0 && "huh?"); - uint64_t hibits = Tmp.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD); - uint64_t lobits = Tmp.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD); + uint64_t hibits = Tmp.U.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD); + uint64_t lobits = Tmp.U.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD); mantissa = hibits | lobits; } @@ -923,12 +919,12 @@ APInt APInt::trunc(unsigned width) const { // Copy full words. unsigned i; for (i = 0; i != width / APINT_BITS_PER_WORD; i++) - Result.pVal[i] = pVal[i]; + Result.U.pVal[i] = U.pVal[i]; // Truncate and copy any partial word. unsigned bits = (0 - width) % APINT_BITS_PER_WORD; if (bits != 0) - Result.pVal[i] = pVal[i] << bits >> bits; + Result.U.pVal[i] = U.pVal[i] << bits >> bits; return Result; } @@ -938,20 +934,20 @@ APInt APInt::sext(unsigned Width) const { assert(Width > BitWidth && "Invalid APInt SignExtend request"); if (Width <= APINT_BITS_PER_WORD) - return APInt(Width, SignExtend64(VAL, BitWidth)); + return APInt(Width, SignExtend64(U.VAL, BitWidth)); APInt Result(getMemory(getNumWords(Width)), Width); // Copy words. - std::memcpy(Result.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE); + std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE); // Sign extend the last word since there may be unused bits in the input. - Result.pVal[getNumWords() - 1] = - SignExtend64(Result.pVal[getNumWords() - 1], + Result.U.pVal[getNumWords() - 1] = + SignExtend64(Result.U.pVal[getNumWords() - 1], ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1); // Fill with sign bits. - std::memset(Result.pVal + getNumWords(), isNegative() ? -1 : 0, + std::memset(Result.U.pVal + getNumWords(), isNegative() ? -1 : 0, (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE); Result.clearUnusedBits(); return Result; @@ -962,15 +958,15 @@ APInt APInt::zext(unsigned width) const { assert(width > BitWidth && "Invalid APInt ZeroExtend request"); if (width <= APINT_BITS_PER_WORD) - return APInt(width, VAL); + return APInt(width, U.VAL); APInt Result(getMemory(getNumWords(width)), width); // Copy words. - std::memcpy(Result.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE); + std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE); // Zero remaining words. - std::memset(Result.pVal + getNumWords(), 0, + std::memset(Result.U.pVal + getNumWords(), 0, (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE); return Result; @@ -1027,28 +1023,28 @@ void APInt::ashrSlowCase(unsigned ShiftAmt) { unsigned WordsToMove = getNumWords() - WordShift; if (WordsToMove != 0) { // Sign extend the last word to fill in the unused bits. - pVal[getNumWords() - 1] = SignExtend64( - pVal[getNumWords() - 1], ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1); + U.pVal[getNumWords() - 1] = SignExtend64( + U.pVal[getNumWords() - 1], ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1); // Fastpath for moving by whole words. if (BitShift == 0) { - std::memmove(pVal, pVal + WordShift, WordsToMove * APINT_WORD_SIZE); + std::memmove(U.pVal, U.pVal + WordShift, WordsToMove * APINT_WORD_SIZE); } else { // Move the words containing significant bits. for (unsigned i = 0; i != WordsToMove - 1; ++i) - pVal[i] = (pVal[i + WordShift] >> BitShift) | - (pVal[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift)); + U.pVal[i] = (U.pVal[i + WordShift] >> BitShift) | + (U.pVal[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift)); // Handle the last word which has no high bits to copy. - pVal[WordsToMove - 1] = pVal[WordShift + WordsToMove - 1] >> BitShift; + U.pVal[WordsToMove - 1] = U.pVal[WordShift + WordsToMove - 1] >> BitShift; // Sign extend one more time. - pVal[WordsToMove - 1] = - SignExtend64(pVal[WordsToMove - 1], APINT_BITS_PER_WORD - BitShift); + U.pVal[WordsToMove - 1] = + SignExtend64(U.pVal[WordsToMove - 1], APINT_BITS_PER_WORD - BitShift); } } // Fill in the remainder based on the original sign. - std::memset(pVal + WordsToMove, Negative ? -1 : 0, + std::memset(U.pVal + WordsToMove, Negative ? -1 : 0, WordShift * APINT_WORD_SIZE); clearUnusedBits(); } @@ -1062,7 +1058,7 @@ void APInt::lshrInPlace(const APInt &shiftAmt) { /// Logical right-shift this APInt by shiftAmt. /// @brief Logical right-shift function. void APInt::lshrSlowCase(unsigned ShiftAmt) { - tcShiftRight(pVal, getNumWords(), ShiftAmt); + tcShiftRight(U.pVal, getNumWords(), ShiftAmt); } /// Left-shift this APInt by shiftAmt. @@ -1074,7 +1070,7 @@ APInt &APInt::operator<<=(const APInt &shiftAmt) { } void APInt::shlSlowCase(unsigned ShiftAmt) { - tcShiftLeft(pVal, getNumWords(), ShiftAmt); + tcShiftLeft(U.pVal, getNumWords(), ShiftAmt); clearUnusedBits(); } @@ -1137,7 +1133,7 @@ APInt APInt::sqrt() const { /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, /* 31 */ 6 }; - return APInt(BitWidth, results[ (isSingleWord() ? VAL : pVal[0]) ]); + return APInt(BitWidth, results[ (isSingleWord() ? U.VAL : U.pVal[0]) ]); } // If the magnitude of the value fits in less than 52 bits (the precision of @@ -1146,7 +1142,8 @@ APInt APInt::sqrt() const { // This should be faster than the algorithm below. if (magnitude < 52) { return APInt(BitWidth, - uint64_t(::round(::sqrt(double(isSingleWord()?VAL:pVal[0]))))); + uint64_t(::round(::sqrt(double(isSingleWord() ? U.VAL + : U.pVal[0]))))); } // Okay, all the short cuts are exhausted. We must compute it. The following @@ -1524,7 +1521,7 @@ void APInt::divide(const APInt &LHS, unsigned lhsWords, const APInt &RHS, // Initialize the dividend memset(U, 0, (m+n+1)*sizeof(unsigned)); for (unsigned i = 0; i < lhsWords; ++i) { - uint64_t tmp = (LHS.getNumWords() == 1 ? LHS.VAL : LHS.pVal[i]); + uint64_t tmp = (LHS.getNumWords() == 1 ? LHS.U.VAL : LHS.U.pVal[i]); U[i * 2] = (unsigned)(tmp & mask); U[i * 2 + 1] = (unsigned)(tmp >> (sizeof(unsigned)*CHAR_BIT)); } @@ -1533,7 +1530,7 @@ void APInt::divide(const APInt &LHS, unsigned lhsWords, const APInt &RHS, // Initialize the divisor memset(V, 0, (n)*sizeof(unsigned)); for (unsigned i = 0; i < rhsWords; ++i) { - uint64_t tmp = (RHS.getNumWords() == 1 ? RHS.VAL : RHS.pVal[i]); + uint64_t tmp = (RHS.getNumWords() == 1 ? RHS.U.VAL : RHS.U.pVal[i]); V[i * 2] = (unsigned)(tmp & mask); V[i * 2 + 1] = (unsigned)(tmp >> (sizeof(unsigned)*CHAR_BIT)); } @@ -1593,12 +1590,12 @@ void APInt::divide(const APInt &LHS, unsigned lhsWords, const APInt &RHS, // Set up the Quotient value's memory. if (Quotient->BitWidth != LHS.BitWidth) { if (Quotient->isSingleWord()) - Quotient->VAL = 0; + Quotient->U.VAL = 0; else - delete [] Quotient->pVal; + delete [] Quotient->U.pVal; Quotient->BitWidth = LHS.BitWidth; if (!Quotient->isSingleWord()) - Quotient->pVal = getClearedMemory(Quotient->getNumWords()); + Quotient->U.pVal = getClearedMemory(Quotient->getNumWords()); } else Quotient->clearAllBits(); @@ -1610,13 +1607,13 @@ void APInt::divide(const APInt &LHS, unsigned lhsWords, const APInt &RHS, uint64_t tmp = uint64_t(Q[0]) | (uint64_t(Q[1]) << (APINT_BITS_PER_WORD / 2)); if (Quotient->isSingleWord()) - Quotient->VAL = tmp; + Quotient->U.VAL = tmp; else - Quotient->pVal[0] = tmp; + Quotient->U.pVal[0] = tmp; } else { assert(!Quotient->isSingleWord() && "Quotient APInt not large enough"); for (unsigned i = 0; i < lhsWords; ++i) - Quotient->pVal[i] = + Quotient->U.pVal[i] = uint64_t(Q[i*2]) | (uint64_t(Q[i*2+1]) << (APINT_BITS_PER_WORD / 2)); } } @@ -1626,12 +1623,12 @@ void APInt::divide(const APInt &LHS, unsigned lhsWords, const APInt &RHS, // Set up the Remainder value's memory. if (Remainder->BitWidth != RHS.BitWidth) { if (Remainder->isSingleWord()) - Remainder->VAL = 0; + Remainder->U.VAL = 0; else - delete [] Remainder->pVal; + delete [] Remainder->U.pVal; Remainder->BitWidth = RHS.BitWidth; if (!Remainder->isSingleWord()) - Remainder->pVal = getClearedMemory(Remainder->getNumWords()); + Remainder->U.pVal = getClearedMemory(Remainder->getNumWords()); } else Remainder->clearAllBits(); @@ -1641,13 +1638,13 @@ void APInt::divide(const APInt &LHS, unsigned lhsWords, const APInt &RHS, uint64_t tmp = uint64_t(R[0]) | (uint64_t(R[1]) << (APINT_BITS_PER_WORD / 2)); if (Remainder->isSingleWord()) - Remainder->VAL = tmp; + Remainder->U.VAL = tmp; else - Remainder->pVal[0] = tmp; + Remainder->U.pVal[0] = tmp; } else { assert(!Remainder->isSingleWord() && "Remainder APInt not large enough"); for (unsigned i = 0; i < rhsWords; ++i) - Remainder->pVal[i] = + Remainder->U.pVal[i] = uint64_t(R[i*2]) | (uint64_t(R[i*2+1]) << (APINT_BITS_PER_WORD / 2)); } } @@ -1666,8 +1663,8 @@ APInt APInt::udiv(const APInt& RHS) const { // First, deal with the easy case if (isSingleWord()) { - assert(RHS.VAL != 0 && "Divide by zero?"); - return APInt(BitWidth, VAL / RHS.VAL); + assert(RHS.U.VAL != 0 && "Divide by zero?"); + return APInt(BitWidth, U.VAL / RHS.U.VAL); } // Get some facts about the LHS and RHS number of bits and words @@ -1689,7 +1686,7 @@ APInt APInt::udiv(const APInt& RHS) const { return APInt(BitWidth, 1); } else if (lhsWords == 1 && rhsWords == 1) { // All high words are zero, just use native divide - return APInt(BitWidth, this->pVal[0] / RHS.pVal[0]); + return APInt(BitWidth, this->U.pVal[0] / RHS.U.pVal[0]); } // We have to compute it the hard way. Invoke the Knuth divide algorithm. @@ -1712,8 +1709,8 @@ APInt APInt::sdiv(const APInt &RHS) const { APInt APInt::urem(const APInt& RHS) const { assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); if (isSingleWord()) { - assert(RHS.VAL != 0 && "Remainder by zero?"); - return APInt(BitWidth, VAL % RHS.VAL); + assert(RHS.U.VAL != 0 && "Remainder by zero?"); + return APInt(BitWidth, U.VAL % RHS.U.VAL); } // Get some facts about the LHS @@ -1737,7 +1734,7 @@ APInt APInt::urem(const APInt& RHS) const { return APInt(BitWidth, 0); } else if (lhsWords == 1) { // All high words are zero, just use native remainder - return APInt(BitWidth, pVal[0] % RHS.pVal[0]); + return APInt(BitWidth, U.pVal[0] % RHS.U.pVal[0]); } // We have to compute it the hard way. Invoke the Knuth divide algorithm. @@ -1763,9 +1760,9 @@ void APInt::udivrem(const APInt &LHS, const APInt &RHS, // First, deal with the easy case if (LHS.isSingleWord()) { - assert(RHS.VAL != 0 && "Divide by zero?"); - uint64_t QuotVal = LHS.VAL / RHS.VAL; - uint64_t RemVal = LHS.VAL % RHS.VAL; + assert(RHS.U.VAL != 0 && "Divide by zero?"); + uint64_t QuotVal = LHS.U.VAL / RHS.U.VAL; + uint64_t RemVal = LHS.U.VAL % RHS.U.VAL; Quotient = APInt(LHS.BitWidth, QuotVal); Remainder = APInt(LHS.BitWidth, RemVal); return; @@ -1798,8 +1795,8 @@ void APInt::udivrem(const APInt &LHS, const APInt &RHS, if (lhsWords == 1 && rhsWords == 1) { // There is only one word to consider so use the native versions. - uint64_t lhsValue = LHS.isSingleWord() ? LHS.VAL : LHS.pVal[0]; - uint64_t rhsValue = RHS.isSingleWord() ? RHS.VAL : RHS.pVal[0]; + uint64_t lhsValue = LHS.isSingleWord() ? LHS.U.VAL : LHS.U.pVal[0]; + uint64_t rhsValue = RHS.isSingleWord() ? RHS.U.VAL : RHS.U.pVal[0]; Quotient = APInt(LHS.getBitWidth(), lhsValue / rhsValue); Remainder = APInt(LHS.getBitWidth(), lhsValue % rhsValue); return; @@ -1926,9 +1923,11 @@ void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) { assert((((slen-1)*64)/22 <= numbits || radix != 10) && "Insufficient bit width"); - // Allocate memory - if (!isSingleWord()) - pVal = getClearedMemory(getNumWords()); + // Allocate memory if needed + if (isSingleWord()) + U.VAL = 0; + else + U.pVal = getClearedMemory(getNumWords()); // Figure out if we can shift instead of multiply unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0); diff --git a/lib/Support/BinaryStreamReader.cpp b/lib/Support/BinaryStreamReader.cpp index c7a2e0ddb179..702d98770e05 100644 --- a/lib/Support/BinaryStreamReader.cpp +++ b/lib/Support/BinaryStreamReader.cpp @@ -93,3 +93,16 @@ uint8_t BinaryStreamReader::peek() const { llvm::consumeError(std::move(EC)); return Buffer[0]; } + +std::pair<BinaryStreamReader, BinaryStreamReader> +BinaryStreamReader::split(uint32_t Off) const { + assert(getLength() >= Off); + + BinaryStreamRef First = Stream.drop_front(Offset); + + BinaryStreamRef Second = First.drop_front(Off); + First = First.keep_front(Off); + BinaryStreamReader W1{First}; + BinaryStreamReader W2{Second}; + return std::make_pair(W1, W2); +}
\ No newline at end of file diff --git a/lib/Support/BinaryStreamWriter.cpp b/lib/Support/BinaryStreamWriter.cpp index d60b75642d0f..d78dbc68f593 100644 --- a/lib/Support/BinaryStreamWriter.cpp +++ b/lib/Support/BinaryStreamWriter.cpp @@ -59,6 +59,19 @@ Error BinaryStreamWriter::writeStreamRef(BinaryStreamRef Ref, uint32_t Length) { return Error::success(); } +std::pair<BinaryStreamWriter, BinaryStreamWriter> +BinaryStreamWriter::split(uint32_t Off) const { + assert(getLength() >= Off); + + WritableBinaryStreamRef First = Stream.drop_front(Offset); + + WritableBinaryStreamRef Second = First.drop_front(Off); + First = First.keep_front(Off); + BinaryStreamWriter W1{First}; + BinaryStreamWriter W2{Second}; + return std::make_pair(W1, W2); +} + Error BinaryStreamWriter::padToAlignment(uint32_t Align) { uint32_t NewOffset = alignTo(Offset, Align); if (NewOffset > getLength()) diff --git a/lib/Support/DataExtractor.cpp b/lib/Support/DataExtractor.cpp index 5d6d60a87fbf..53c10bcc562e 100644 --- a/lib/Support/DataExtractor.cpp +++ b/lib/Support/DataExtractor.cpp @@ -128,6 +128,16 @@ const char *DataExtractor::getCStr(uint32_t *offset_ptr) const { return nullptr; } +StringRef DataExtractor::getCStrRef(uint32_t *OffsetPtr) const { + uint32_t Start = *OffsetPtr; + StringRef::size_type Pos = Data.find('\0', Start); + if (Pos != StringRef::npos) { + *OffsetPtr = Pos + 1; + return StringRef(Data.data() + Start, Pos - Start); + } + return StringRef(); +} + uint64_t DataExtractor::getULEB128(uint32_t *offset_ptr) const { uint64_t result = 0; if (Data.empty()) diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp index 970ecfd7df90..6a0b64fb884d 100644 --- a/lib/Support/Host.cpp +++ b/lib/Support/Host.cpp @@ -1363,6 +1363,7 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) { Features["sse4a"] = HasExtLeaf1 && ((ECX >> 6) & 1); Features["prfchw"] = HasExtLeaf1 && ((ECX >> 8) & 1); Features["xop"] = HasExtLeaf1 && ((ECX >> 11) & 1) && HasAVXSave; + Features["lwp"] = HasExtLeaf1 && ((ECX >> 15) & 1); Features["fma4"] = HasExtLeaf1 && ((ECX >> 16) & 1) && HasAVXSave; Features["tbm"] = HasExtLeaf1 && ((ECX >> 21) & 1); Features["mwaitx"] = HasExtLeaf1 && ((ECX >> 29) & 1); diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp index f3a654d7d2bd..eb8108908ac5 100644 --- a/lib/Support/Triple.cpp +++ b/lib/Support/Triple.cpp @@ -459,7 +459,7 @@ static Triple::OSType parseOS(StringRef OSName) { .StartsWith("kfreebsd", Triple::KFreeBSD) .StartsWith("linux", Triple::Linux) .StartsWith("lv2", Triple::Lv2) - .StartsWith("macosx", Triple::MacOSX) + .StartsWith("macos", Triple::MacOSX) .StartsWith("netbsd", Triple::NetBSD) .StartsWith("openbsd", Triple::OpenBSD) .StartsWith("solaris", Triple::Solaris) @@ -984,6 +984,8 @@ void Triple::getOSVersion(unsigned &Major, unsigned &Minor, StringRef OSTypeName = getOSTypeName(getOS()); if (OSName.startswith(OSTypeName)) OSName = OSName.substr(OSTypeName.size()); + else if (getOS() == MacOSX) + OSName.consume_front("macos"); parseVersionFromName(OSName, Major, Minor, Micro); } diff --git a/lib/Target/AArch64/AArch64CallLowering.cpp b/lib/Target/AArch64/AArch64CallLowering.cpp index b2f55a7e1e09..ff3e4c40e2c2 100644 --- a/lib/Target/AArch64/AArch64CallLowering.cpp +++ b/lib/Target/AArch64/AArch64CallLowering.cpp @@ -247,7 +247,7 @@ bool AArch64CallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, unsigned i = 0; for (auto &Arg : F.args()) { ArgInfo OrigArg{VRegs[i], Arg.getType()}; - setArgFlags(OrigArg, i + 1, DL, F); + setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, F); bool Split = false; LLT Ty = MRI.getType(VRegs[i]); unsigned Dst = VRegs[i]; diff --git a/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp index 4fb262c6277c..36dcc699d4ea 100644 --- a/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp +++ b/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp @@ -677,12 +677,19 @@ void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) { } const Function &ContainingFunction = *I.getParent()->getParent(); + CallingConv::ID CC = ContainingFunction.getCallingConv(); // Don't promote the alloca to LDS for shader calling conventions as the work // item ID intrinsics are not supported for these calling conventions. // Furthermore not all LDS is available for some of the stages. - if (AMDGPU::isShader(ContainingFunction.getCallingConv())) + switch (CC) { + case CallingConv::AMDGPU_KERNEL: + case CallingConv::SPIR_KERNEL: + break; + default: + DEBUG(dbgs() << " promote alloca to LDS not supported with calling convention.\n"); return; + } const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(ContainingFunction); diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index 005b74a68af3..46fd1f70ee99 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -577,6 +577,7 @@ def : Processor<"cortex-m0plus", ARMV6Itineraries, [ARMv6m]>; def : Processor<"cortex-m1", ARMV6Itineraries, [ARMv6m]>; def : Processor<"sc000", ARMV6Itineraries, [ARMv6m]>; +def : Processor<"arm1176j-s", ARMV6Itineraries, [ARMv6kz]>; def : Processor<"arm1176jz-s", ARMV6Itineraries, [ARMv6kz]>; def : Processor<"arm1176jzf-s", ARMV6Itineraries, [ARMv6kz, FeatureVFP2, diff --git a/lib/Target/ARM/ARMCallLowering.cpp b/lib/Target/ARM/ARMCallLowering.cpp index a8188411bf5f..9178c67afa6e 100644 --- a/lib/Target/ARM/ARMCallLowering.cpp +++ b/lib/Target/ARM/ARMCallLowering.cpp @@ -354,7 +354,7 @@ bool ARMCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, unsigned Idx = 0; for (auto &Arg : F.args()) { ArgInfo AInfo(VRegs[Idx], Arg.getType()); - setArgFlags(AInfo, Idx + 1, DL, F); + setArgFlags(AInfo, Idx + AttributeList::FirstArgIndex, DL, F); splitToValueTypes(AInfo, ArgInfos, DL, MF.getRegInfo()); Idx++; } diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index e9df9449103c..7f9fe55a5c38 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -740,7 +740,9 @@ bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, unsigned PowerOfTwo = 0; SDValue NewMulConst; if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) { + HandleSDNode Handle(Offset); replaceDAGValue(Offset.getOperand(1), NewMulConst); + Offset = Handle.getValue(); ShAmt = PowerOfTwo; ShOpcVal = ARM_AM::lsl; } @@ -1420,7 +1422,9 @@ bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N, unsigned PowerOfTwo = 0; SDValue NewMulConst; if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) { + HandleSDNode Handle(OffReg); replaceDAGValue(OffReg.getOperand(1), NewMulConst); + OffReg = Handle.getValue(); ShAmt = PowerOfTwo; } } diff --git a/lib/Target/AVR/AVRFrameLowering.cpp b/lib/Target/AVR/AVRFrameLowering.cpp index 25232d2e47e1..c297865db820 100644 --- a/lib/Target/AVR/AVRFrameLowering.cpp +++ b/lib/Target/AVR/AVRFrameLowering.cpp @@ -228,9 +228,8 @@ void AVRFrameLowering::emitEpilogue(MachineFunction &MF, bool AVRFrameLowering::hasFP(const MachineFunction &MF) const { const AVRMachineFunctionInfo *FuncInfo = MF.getInfo<AVRMachineFunctionInfo>(); - // TODO: We do not always need a frame pointer. - // This can be optimised. - return true; + return (FuncInfo->getHasSpills() || FuncInfo->getHasAllocas() || + FuncInfo->getHasStackArgs()); } bool AVRFrameLowering::spillCalleeSavedRegisters( diff --git a/lib/Target/AVR/AVRRegisterInfo.cpp b/lib/Target/AVR/AVRRegisterInfo.cpp index 5cc7eaf8add3..2813e24d2ac7 100644 --- a/lib/Target/AVR/AVRRegisterInfo.cpp +++ b/lib/Target/AVR/AVRRegisterInfo.cpp @@ -65,12 +65,18 @@ BitVector AVRRegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(AVR::SPH); Reserved.set(AVR::SP); - // Reserve the frame pointer registers r28 and r29 if the function requires one. - if (TFI->hasFP(MF)) { - Reserved.set(AVR::R28); - Reserved.set(AVR::R29); - Reserved.set(AVR::R29R28); - } + // We tenatively reserve the frame pointer register r29:r28 because the + // function may require one, but we cannot tell until register allocation + // is complete, which can be too late. + // + // Instead we just unconditionally reserve the Y register. + // + // TODO: Write a pass to enumerate functions which reserved the Y register + // but didn't end up needing a frame pointer. In these, we can + // convert one or two of the spills inside to use the Y register. + Reserved.set(AVR::R28); + Reserved.set(AVR::R29); + Reserved.set(AVR::R29R28); return Reserved; } diff --git a/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp b/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp index 42ff9cc3d18a..b0b2644fffbe 100644 --- a/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp +++ b/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp @@ -307,7 +307,7 @@ public: bool iss31_1Imm() const { return true; } bool iss30_2Imm() const { return true; } bool iss29_3Imm() const { return true; } - bool iss23_2Imm() const { return CheckImmRange(23, 2, true, true, false); } + bool iss27_2Imm() const { return CheckImmRange(27, 2, true, true, false); } bool iss10_0Imm() const { return CheckImmRange(10, 0, true, false, false); } bool iss10_6Imm() const { return CheckImmRange(10, 6, true, false, false); } bool iss9_0Imm() const { return CheckImmRange(9, 0, true, false, false); } @@ -1292,13 +1292,13 @@ int HexagonAsmParser::processInstruction(MCInst &Inst, case Hexagon::A2_iconst: { Inst.setOpcode(Hexagon::A2_addi); MCOperand Reg = Inst.getOperand(0); - MCOperand S16 = Inst.getOperand(1); - HexagonMCInstrInfo::setMustNotExtend(*S16.getExpr()); - HexagonMCInstrInfo::setS23_2_reloc(*S16.getExpr()); + MCOperand S27 = Inst.getOperand(1); + HexagonMCInstrInfo::setMustNotExtend(*S27.getExpr()); + HexagonMCInstrInfo::setS27_2_reloc(*S27.getExpr()); Inst.clear(); Inst.addOperand(Reg); Inst.addOperand(MCOperand::createReg(Hexagon::R0)); - Inst.addOperand(S16); + Inst.addOperand(S27); break; } case Hexagon::M4_mpyrr_addr: diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/lib/Target/Hexagon/HexagonAsmPrinter.cpp index c8483f7e6e76..bb5128e7500f 100644 --- a/lib/Target/Hexagon/HexagonAsmPrinter.cpp +++ b/lib/Target/Hexagon/HexagonAsmPrinter.cpp @@ -298,7 +298,7 @@ void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst, MCOperand Reg = Inst.getOperand(0); MCOperand S16 = Inst.getOperand(1); HexagonMCInstrInfo::setMustNotExtend(*S16.getExpr()); - HexagonMCInstrInfo::setS23_2_reloc(*S16.getExpr()); + HexagonMCInstrInfo::setS27_2_reloc(*S16.getExpr()); Inst.clear(); Inst.addOperand(Reg); Inst.addOperand(MCOperand::createReg(Hexagon::R0)); diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp index e5eb059b566f..861af94f1e38 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -1720,8 +1720,13 @@ HexagonTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, Hexagon::R0, Chain, InFlag); InFlag = Chain.getValue(1); + unsigned Flags = + static_cast<const HexagonSubtarget &>(DAG.getSubtarget()).useLongCalls() + ? HexagonII::MO_GDPLT | HexagonII::HMOTF_ConstExtended + : HexagonII::MO_GDPLT; + return GetDynamicTLSAddr(DAG, Chain, GA, InFlag, PtrVT, - Hexagon::R0, HexagonII::MO_GDPLT); + Hexagon::R0, Flags); } // diff --git a/lib/Target/Hexagon/HexagonInstrFormats.td b/lib/Target/Hexagon/HexagonInstrFormats.td index 7d1da5c3ba21..709d64585c0b 100644 --- a/lib/Target/Hexagon/HexagonInstrFormats.td +++ b/lib/Target/Hexagon/HexagonInstrFormats.td @@ -7,16 +7,6 @@ // //===----------------------------------------------------------------------===// -// Maintain list of valid subtargets for each instruction. -class SubTarget<bits<6> value> { - bits<6> Value = value; -} - -def HasAnySubT : SubTarget<0x3f>; // 111111 -def HasV5SubT : SubTarget<0x3e>; // 111110 -def HasV55SubT : SubTarget<0x3c>; // 111100 -def HasV60SubT : SubTarget<0x38>; // 111000 - // Addressing modes for load/store instructions class AddrModeType<bits<3> value> { bits<3> Value = value; @@ -131,12 +121,6 @@ class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern, bits<2> opExtentAlign = 0; let TSFlags{34-33} = opExtentAlign; // Alignment exponent before extending. - // If an instruction is valid on a subtarget, set the corresponding - // bit from validSubTargets. - // By default, instruction is valid on all subtargets. - SubTarget validSubTargets = HasAnySubT; - let TSFlags{40-35} = validSubTargets.Value; - // Addressing mode for load/store instructions. AddrModeType addrMode = NoAddrMode; let TSFlags{43-41} = addrMode.Value; diff --git a/lib/Target/Hexagon/HexagonInstrFormatsV60.td b/lib/Target/Hexagon/HexagonInstrFormatsV60.td index 1c46ae77b6c7..b913727972e5 100644 --- a/lib/Target/Hexagon/HexagonInstrFormatsV60.td +++ b/lib/Target/Hexagon/HexagonInstrFormatsV60.td @@ -15,8 +15,6 @@ // Instruction Classes Definitions + //----------------------------------------------------------------------------// -let validSubTargets = HasV60SubT in -{ class CVI_VA_Resource<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = "", InstrItinClass itin = CVI_VA> @@ -184,10 +182,7 @@ class CVI_HIST_Resource<dag outs, dag ins, string asmstr, InstrItinClass itin = CVI_HIST> : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_HIST>, OpcodeHexagon, Requires<[HasV60T, UseHVX]>; -} -let validSubTargets = HasV60SubT in -{ class CVI_VA_Resource1<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = "", InstrItinClass itin = CVI_VA> @@ -205,6 +200,3 @@ class CVI_HIST_Resource1<dag outs, dag ins, string asmstr, InstrItinClass itin = CVI_HIST> : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_HIST>, Requires<[HasV60T, UseHVX]>; -} - - diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp index b265a883da5c..852bfb1b4f54 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -869,6 +869,9 @@ void HexagonInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineFrameInfo &MFI = MF.getFrameInfo(); unsigned Align = MFI.getObjectAlignment(FI); unsigned KillFlag = getKillRegState(isKill); + bool HasAlloca = MFI.hasVarSizedObjects(); + const auto &HST = MF.getSubtarget<HexagonSubtarget>(); + const HexagonFrameLowering &HFI = *HST.getFrameLowering(); MachineMemOperand *MMO = MF.getMachineMemOperand( MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore, @@ -899,24 +902,36 @@ void HexagonInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, .addFrameIndex(FI).addImm(0) .addReg(SrcReg, KillFlag).addMemOperand(MMO); } else if (Hexagon::VectorRegs128BRegClass.hasSubClassEq(RC)) { + // If there are variable-sized objects, spills will not be aligned. + if (HasAlloca) + Align = HFI.getStackAlignment(); unsigned Opc = Align < 128 ? Hexagon::V6_vS32Ub_ai_128B : Hexagon::V6_vS32b_ai_128B; BuildMI(MBB, I, DL, get(Opc)) .addFrameIndex(FI).addImm(0) .addReg(SrcReg, KillFlag).addMemOperand(MMO); } else if (Hexagon::VectorRegsRegClass.hasSubClassEq(RC)) { + // If there are variable-sized objects, spills will not be aligned. + if (HasAlloca) + Align = HFI.getStackAlignment(); unsigned Opc = Align < 64 ? Hexagon::V6_vS32Ub_ai : Hexagon::V6_vS32b_ai; BuildMI(MBB, I, DL, get(Opc)) .addFrameIndex(FI).addImm(0) .addReg(SrcReg, KillFlag).addMemOperand(MMO); } else if (Hexagon::VecDblRegsRegClass.hasSubClassEq(RC)) { + // If there are variable-sized objects, spills will not be aligned. + if (HasAlloca) + Align = HFI.getStackAlignment(); unsigned Opc = Align < 64 ? Hexagon::PS_vstorerwu_ai : Hexagon::PS_vstorerw_ai; BuildMI(MBB, I, DL, get(Opc)) .addFrameIndex(FI).addImm(0) .addReg(SrcReg, KillFlag).addMemOperand(MMO); } else if (Hexagon::VecDblRegs128BRegClass.hasSubClassEq(RC)) { + // If there are variable-sized objects, spills will not be aligned. + if (HasAlloca) + Align = HFI.getStackAlignment(); unsigned Opc = Align < 128 ? Hexagon::PS_vstorerwu_ai_128B : Hexagon::PS_vstorerw_ai_128B; BuildMI(MBB, I, DL, get(Opc)) @@ -935,6 +950,9 @@ void HexagonInstrInfo::loadRegFromStackSlot( MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = MF.getFrameInfo(); unsigned Align = MFI.getObjectAlignment(FI); + bool HasAlloca = MFI.hasVarSizedObjects(); + const auto &HST = MF.getSubtarget<HexagonSubtarget>(); + const HexagonFrameLowering &HFI = *HST.getFrameLowering(); MachineMemOperand *MMO = MF.getMachineMemOperand( MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad, @@ -959,21 +977,33 @@ void HexagonInstrInfo::loadRegFromStackSlot( BuildMI(MBB, I, DL, get(Hexagon::PS_vloadrq_ai), DestReg) .addFrameIndex(FI).addImm(0).addMemOperand(MMO); } else if (Hexagon::VecDblRegs128BRegClass.hasSubClassEq(RC)) { + // If there are variable-sized objects, spills will not be aligned. + if (HasAlloca) + Align = HFI.getStackAlignment(); unsigned Opc = Align < 128 ? Hexagon::PS_vloadrwu_ai_128B : Hexagon::PS_vloadrw_ai_128B; BuildMI(MBB, I, DL, get(Opc), DestReg) .addFrameIndex(FI).addImm(0).addMemOperand(MMO); } else if (Hexagon::VectorRegs128BRegClass.hasSubClassEq(RC)) { + // If there are variable-sized objects, spills will not be aligned. + if (HasAlloca) + Align = HFI.getStackAlignment(); unsigned Opc = Align < 128 ? Hexagon::V6_vL32Ub_ai_128B : Hexagon::V6_vL32b_ai_128B; BuildMI(MBB, I, DL, get(Opc), DestReg) .addFrameIndex(FI).addImm(0).addMemOperand(MMO); } else if (Hexagon::VectorRegsRegClass.hasSubClassEq(RC)) { + // If there are variable-sized objects, spills will not be aligned. + if (HasAlloca) + Align = HFI.getStackAlignment(); unsigned Opc = Align < 64 ? Hexagon::V6_vL32Ub_ai : Hexagon::V6_vL32b_ai; BuildMI(MBB, I, DL, get(Opc), DestReg) .addFrameIndex(FI).addImm(0).addMemOperand(MMO); } else if (Hexagon::VecDblRegsRegClass.hasSubClassEq(RC)) { + // If there are variable-sized objects, spills will not be aligned. + if (HasAlloca) + Align = HFI.getStackAlignment(); unsigned Opc = Align < 64 ? Hexagon::PS_vloadrwu_ai : Hexagon::PS_vloadrw_ai; BuildMI(MBB, I, DL, get(Opc), DestReg) @@ -1110,8 +1140,9 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { unsigned Offset = Is128B ? VecOffset << 7 : VecOffset << 6; MachineInstr *MI1New = BuildMI(MBB, MI, DL, get(NewOpc), HRI.getSubReg(DstReg, Hexagon::vsub_lo)) - .add(MI.getOperand(1)) - .addImm(MI.getOperand(2).getImm()); + .add(MI.getOperand(1)) + .addImm(MI.getOperand(2).getImm()) + .setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); MI1New->getOperand(1).setIsKill(false); BuildMI(MBB, MI, DL, get(NewOpc), HRI.getSubReg(DstReg, Hexagon::vsub_hi)) .add(MI.getOperand(1)) @@ -1940,7 +1971,7 @@ bool HexagonInstrInfo::isDeallocRet(const MachineInstr &MI) const { case Hexagon::L4_return_fnew_pnt : case Hexagon::L4_return_tnew_pt : case Hexagon::L4_return_fnew_pt : - return true; + return true; } return false; } @@ -1967,12 +1998,12 @@ bool HexagonInstrInfo::isDependent(const MachineInstr &ProdMI, if (RegA == RegB) return true; - if (Hexagon::DoubleRegsRegClass.contains(RegA)) + if (TargetRegisterInfo::isPhysicalRegister(RegA)) for (MCSubRegIterator SubRegs(RegA, &HRI); SubRegs.isValid(); ++SubRegs) if (RegB == *SubRegs) return true; - if (Hexagon::DoubleRegsRegClass.contains(RegB)) + if (TargetRegisterInfo::isPhysicalRegister(RegB)) for (MCSubRegIterator SubRegs(RegB, &HRI); SubRegs.isValid(); ++SubRegs) if (RegA == *SubRegs) return true; @@ -2139,7 +2170,7 @@ bool HexagonInstrInfo::isJumpR(const MachineInstr &MI) const { bool HexagonInstrInfo::isJumpWithinBranchRange(const MachineInstr &MI, unsigned offset) const { // This selection of jump instructions matches to that what - // AnalyzeBranch can parse, plus NVJ. + // analyzeBranch can parse, plus NVJ. if (isNewValueJump(MI)) // r9:2 return isInt<11>(offset); @@ -2666,6 +2697,7 @@ bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset, case Hexagon::L2_loadrh_io: case Hexagon::L2_loadruh_io: case Hexagon::S2_storerh_io: + case Hexagon::S2_storerf_io: return (Offset >= Hexagon_MEMH_OFFSET_MIN) && (Offset <= Hexagon_MEMH_OFFSET_MAX); @@ -2876,6 +2908,11 @@ bool HexagonInstrInfo::getMemOpBaseRegImmOfs(MachineInstr &LdSt, /// \brief Can these instructions execute at the same time in a bundle. bool HexagonInstrInfo::canExecuteInBundle(const MachineInstr &First, const MachineInstr &Second) const { + if (Second.mayStore() && First.getOpcode() == Hexagon::S2_allocframe) { + const MachineOperand &Op = Second.getOperand(0); + if (Op.isReg() && Op.isUse() && Op.getReg() == Hexagon::R29) + return true; + } if (DisableNVSchedule) return false; if (mayBeNewStore(Second)) { @@ -3000,13 +3037,9 @@ bool HexagonInstrInfo::producesStall(const MachineInstr &MI, MachineBasicBlock::const_instr_iterator MII = BII; MachineBasicBlock::const_instr_iterator MIE = MII->getParent()->instr_end(); - if (!MII->isBundle()) { + if (!(*MII).isBundle()) { const MachineInstr &J = *MII; - if (!isV60VectorInstruction(J)) - return false; - else if (isVecUsableNextPacket(J, MI)) - return false; - return true; + return producesStall(J, MI); } for (++MII; MII != MIE && MII->isInsideBundle(); ++MII) { @@ -3034,12 +3067,14 @@ bool HexagonInstrInfo::predCanBeUsedAsDotNew(const MachineInstr &MI, } bool HexagonInstrInfo::PredOpcodeHasJMP_c(unsigned Opcode) const { - return (Opcode == Hexagon::J2_jumpt) || - (Opcode == Hexagon::J2_jumpf) || - (Opcode == Hexagon::J2_jumptnew) || - (Opcode == Hexagon::J2_jumpfnew) || - (Opcode == Hexagon::J2_jumptnewpt) || - (Opcode == Hexagon::J2_jumpfnewpt); + return Opcode == Hexagon::J2_jumpt || + Opcode == Hexagon::J2_jumptpt || + Opcode == Hexagon::J2_jumpf || + Opcode == Hexagon::J2_jumpfpt || + Opcode == Hexagon::J2_jumptnew || + Opcode == Hexagon::J2_jumpfnew || + Opcode == Hexagon::J2_jumptnewpt || + Opcode == Hexagon::J2_jumpfnewpt; } bool HexagonInstrInfo::predOpcodeHasNot(ArrayRef<MachineOperand> Cond) const { @@ -3341,9 +3376,30 @@ int HexagonInstrInfo::getDotCurOp(const MachineInstr &MI) const { return 0; } +// Return the regular version of the .cur instruction. +int HexagonInstrInfo::getNonDotCurOp(const MachineInstr &MI) const { + switch (MI.getOpcode()) { + default: llvm_unreachable("Unknown .cur type"); + case Hexagon::V6_vL32b_cur_pi: + return Hexagon::V6_vL32b_pi; + case Hexagon::V6_vL32b_cur_ai: + return Hexagon::V6_vL32b_ai; + //128B + case Hexagon::V6_vL32b_cur_pi_128B: + return Hexagon::V6_vL32b_pi_128B; + case Hexagon::V6_vL32b_cur_ai_128B: + return Hexagon::V6_vL32b_ai_128B; + } + return 0; +} + + // The diagram below shows the steps involved in the conversion of a predicated // store instruction to its .new predicated new-value form. // +// Note: It doesn't include conditional new-value stores as they can't be +// converted to .new predicate. +// // p.new NV store [ if(p0.new)memw(R0+#0)=R2.new ] // ^ ^ // / \ (not OK. it will cause new-value store to be @@ -3564,11 +3620,11 @@ int HexagonInstrInfo::getDotNewPredOp(const MachineInstr &MI, } int HexagonInstrInfo::getDotOldOp(const MachineInstr &MI) const { + const MachineFunction &MF = *MI.getParent()->getParent(); + const HexagonSubtarget &HST = MF.getSubtarget<HexagonSubtarget>(); int NewOp = MI.getOpcode(); if (isPredicated(NewOp) && isPredicatedNew(NewOp)) { // Get predicate old form NewOp = Hexagon::getPredOldOpcode(NewOp); - const MachineFunction &MF = *MI.getParent()->getParent(); - const HexagonSubtarget &HST = MF.getSubtarget<HexagonSubtarget>(); // All Hexagon architectures have prediction bits on dot-new branches, // but only Hexagon V60+ has prediction bits on dot-old ones. Make sure // to pick the right opcode when converting back to dot-old. @@ -3596,6 +3652,21 @@ int HexagonInstrInfo::getDotOldOp(const MachineInstr &MI) const { NewOp = Hexagon::getNonNVStore(NewOp); assert(NewOp >= 0 && "Couldn't change new-value store to its old form."); } + + if (HST.hasV60TOps()) + return NewOp; + + // Subtargets prior to V60 didn't support 'taken' forms of predicated jumps. + switch (NewOp) { + case Hexagon::J2_jumpfpt: + return Hexagon::J2_jumpf; + case Hexagon::J2_jumptpt: + return Hexagon::J2_jumpt; + case Hexagon::J2_jumprfpt: + return Hexagon::J2_jumprf; + case Hexagon::J2_jumprtpt: + return Hexagon::J2_jumprt; + } return NewOp; } @@ -3947,18 +4018,6 @@ short HexagonInstrInfo::getEquivalentHWInstr(const MachineInstr &MI) const { return Hexagon::getRealHWInstr(MI.getOpcode(), Hexagon::InstrType_Real); } -// Return first non-debug instruction in the basic block. -MachineInstr *HexagonInstrInfo::getFirstNonDbgInst(MachineBasicBlock *BB) - const { - for (auto MII = BB->instr_begin(), End = BB->instr_end(); MII != End; MII++) { - MachineInstr &MI = *MII; - if (MI.isDebugValue()) - continue; - return &MI; - } - return nullptr; -} - unsigned HexagonInstrInfo::getInstrTimingClassLatency( const InstrItineraryData *ItinData, const MachineInstr &MI) const { // Default to one cycle for no itinerary. However, an "empty" itinerary may @@ -4139,11 +4198,6 @@ unsigned HexagonInstrInfo::getUnits(const MachineInstr &MI) const { return IS.getUnits(); } -unsigned HexagonInstrInfo::getValidSubTargets(const unsigned Opcode) const { - const uint64_t F = get(Opcode).TSFlags; - return (F >> HexagonII::validSubTargetPos) & HexagonII::validSubTargetMask; -} - // Calculate size of the basic block without debug instructions. unsigned HexagonInstrInfo::nonDbgBBSize(const MachineBasicBlock *BB) const { return nonDbgMICount(BB->instr_begin(), BB->instr_end()); diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h index b268c7a28171..21b4f738f6e8 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.h +++ b/lib/Target/Hexagon/HexagonInstrInfo.h @@ -399,6 +399,7 @@ public: const MachineInstr &GB) const; int getCondOpcode(int Opc, bool sense) const; int getDotCurOp(const MachineInstr &MI) const; + int getNonDotCurOp(const MachineInstr &MI) const; int getDotNewOp(const MachineInstr &MI) const; int getDotNewPredJumpOp(const MachineInstr &MI, const MachineBranchProbabilityInfo *MBPI) const; @@ -424,7 +425,6 @@ public: unsigned getSize(const MachineInstr &MI) const; uint64_t getType(const MachineInstr &MI) const; unsigned getUnits(const MachineInstr &MI) const; - unsigned getValidSubTargets(const unsigned Opcode) const; /// getInstrTimingClassLatency - Compute the instruction latency of a given /// instruction using Timing Class information, if available. diff --git a/lib/Target/Hexagon/HexagonMCInstLower.cpp b/lib/Target/Hexagon/HexagonMCInstLower.cpp index 7189b5a52c42..072501d8260d 100644 --- a/lib/Target/Hexagon/HexagonMCInstLower.cpp +++ b/lib/Target/Hexagon/HexagonMCInstLower.cpp @@ -39,7 +39,7 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, // Populate the relocation type based on Hexagon target flags // set on an operand MCSymbolRefExpr::VariantKind RelocationType; - switch (MO.getTargetFlags()) { + switch (MO.getTargetFlags() & ~HexagonII::HMOTF_ConstExtended) { default: RelocationType = MCSymbolRefExpr::VK_None; break; diff --git a/lib/Target/Hexagon/HexagonOperands.td b/lib/Target/Hexagon/HexagonOperands.td index f87a1b8e424d..f80e0ef9e39f 100644 --- a/lib/Target/Hexagon/HexagonOperands.td +++ b/lib/Target/Hexagon/HexagonOperands.td @@ -14,8 +14,8 @@ def f64Imm : Operand<f64> { let ParserMatchClass = f64ImmOperand; } def s8_0Imm64Pred : PatLeaf<(i64 imm), [{ return isInt<8>(N->getSExtValue()); }]>; def s9_0ImmOperand : AsmOperandClass { let Name = "s9_0Imm"; } def s9_0Imm : Operand<i32> { let ParserMatchClass = s9_0ImmOperand; } -def s23_2ImmOperand : AsmOperandClass { let Name = "s23_2Imm"; let RenderMethod = "addSignedImmOperands"; } -def s23_2Imm : Operand<i32> { let ParserMatchClass = s23_2ImmOperand; } +def s27_2ImmOperand : AsmOperandClass { let Name = "s27_2Imm"; let RenderMethod = "addSignedImmOperands"; } +def s27_2Imm : Operand<i32> { let ParserMatchClass = s27_2ImmOperand; } def r32_0ImmPred : PatLeaf<(i32 imm), [{ int64_t v = (int64_t)N->getSExtValue(); return isInt<32>(v); diff --git a/lib/Target/Hexagon/HexagonPseudo.td b/lib/Target/Hexagon/HexagonPseudo.td index 5a720e794562..2e8def572c4b 100644 --- a/lib/Target/Hexagon/HexagonPseudo.td +++ b/lib/Target/Hexagon/HexagonPseudo.td @@ -14,8 +14,11 @@ let PrintMethod = "printGlobalOperand" in { let isPseudo = 1 in { let isCodeGenOnly = 0 in -def A2_iconst : Pseudo<(outs IntRegs:$Rd32), (ins s23_2Imm:$Ii), "${Rd32}=iconst(#${Ii})">; -def DUPLEX_Pseudo : InstHexagon<(outs), (ins s32_0Imm:$offset), "DUPLEX", [], "", DUPLEX, TypePSEUDO>; +def A2_iconst : Pseudo<(outs IntRegs:$Rd32), + (ins s27_2Imm:$Ii), "${Rd32}=iconst(#${Ii})">; + +def DUPLEX_Pseudo : InstHexagon<(outs), + (ins s32_0Imm:$offset), "DUPLEX", [], "", DUPLEX, TypePSEUDO>; } let isExtendable = 1, opExtendable = 1, opExtentBits = 6, @@ -321,7 +324,7 @@ def LDriw_mod : LDInst<(outs ModRegs:$dst), // Vector load let Predicates = [HasV60T, UseHVX] in -let mayLoad = 1, validSubTargets = HasV60SubT, hasSideEffects = 0 in +let mayLoad = 1, hasSideEffects = 0 in class V6_LDInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = "", InstrItinClass itin = CVI_VM_LD, IType type = TypeCVI_VM_LD> @@ -329,7 +332,7 @@ let mayLoad = 1, validSubTargets = HasV60SubT, hasSideEffects = 0 in // Vector store let Predicates = [HasV60T, UseHVX] in -let mayStore = 1, validSubTargets = HasV60SubT, hasSideEffects = 0 in +let mayStore = 1, hasSideEffects = 0 in class V6_STInst<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = "", InstrItinClass itin = CVI_VM_ST, IType type = TypeCVI_VM_ST> @@ -415,7 +418,7 @@ let isCall = 1, Uses = [R29, R31], isAsmParserOnly = 1 in { // Vector load/store pseudos -let isPseudo = 1, isCodeGenOnly = 1, validSubTargets = HasV60SubT in +let isPseudo = 1, isCodeGenOnly = 1 in class STrivv_template<RegisterClass RC> : V6_STInst<(outs), (ins IntRegs:$addr, s32_0Imm:$off, RC:$src), "", []>; @@ -429,7 +432,7 @@ def PS_vstorerwu_ai_128B: STrivv_template<VecDblRegs128B>, Requires<[HasV60T,UseHVXDbl]>; -let isPseudo = 1, isCodeGenOnly = 1, validSubTargets = HasV60SubT in +let isPseudo = 1, isCodeGenOnly = 1 in class LDrivv_template<RegisterClass RC> : V6_LDInst<(outs RC:$dst), (ins IntRegs:$addr, s32_0Imm:$off), "", []>; diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp index 3a789a5f7e0b..bf1dce67bd0a 100644 --- a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp +++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp @@ -356,7 +356,7 @@ void HexagonPacketizerList::cleanUpDotCur() { MachineInstr *MI = nullptr; for (auto BI : CurrentPacketMIs) { DEBUG(dbgs() << "Cleanup packet has "; BI->dump();); - if (BI->getOpcode() == Hexagon::V6_vL32b_cur_ai) { + if (HII->isDotCurInst(*BI)) { MI = BI; continue; } @@ -369,7 +369,7 @@ void HexagonPacketizerList::cleanUpDotCur() { if (!MI) return; // We did not find a use of the CUR, so de-cur it. - MI->setDesc(HII->get(Hexagon::V6_vL32b_ai)); + MI->setDesc(HII->get(HII->getNonDotCurOp(*MI))); DEBUG(dbgs() << "Demoted CUR "; MI->dump();); } @@ -1579,14 +1579,13 @@ MachineBasicBlock::iterator HexagonPacketizerList::addToPacket(MachineInstr &MI) { MachineBasicBlock::iterator MII = MI.getIterator(); MachineBasicBlock *MBB = MI.getParent(); - if (MI.isImplicitDef()) { - unsigned R = MI.getOperand(0).getReg(); - if (Hexagon::IntRegsRegClass.contains(R)) { - MCSuperRegIterator S(R, HRI, false); - MI.addOperand(MachineOperand::CreateReg(*S, true, true)); - } + + if (CurrentPacketMIs.size() == 0) + PacketStalls = false; + PacketStalls |= producesStall(MI); + + if (MI.isImplicitDef()) return MII; - } assert(ResourceTracker->canReserveResources(MI)); bool ExtMI = HII->isExtended(MI) || HII->isConstExtended(MI); @@ -1677,6 +1676,11 @@ static bool isDependent(const MachineInstr &ProdMI, // V60 forward scheduling. bool HexagonPacketizerList::producesStall(const MachineInstr &I) { + // If the packet already stalls, then ignore the stall from a subsequent + // instruction in the same packet. + if (PacketStalls) + return false; + // Check whether the previous packet is in a different loop. If this is the // case, there is little point in trying to avoid a stall because that would // favor the rare case (loop entry) over the common case (loop iteration). @@ -1699,6 +1703,7 @@ bool HexagonPacketizerList::producesStall(const MachineInstr &I) { if (isDependent(*J, I) && !HII->isVecUsableNextPacket(*J, I)) return true; } + return false; } @@ -1721,6 +1726,16 @@ bool HexagonPacketizerList::producesStall(const MachineInstr &I) { } } + // Check if the latency is greater than one between this instruction and any + // instruction in the previous packet. + SUnit *SUI = MIToSUnit[const_cast<MachineInstr *>(&I)]; + for (auto J : OldPacketMIs) { + SUnit *SUJ = MIToSUnit[J]; + for (auto &Pred : SUI->Preds) + if (Pred.getSUnit() == SUJ && Pred.getLatency() > 1) + return true; + } + return false; } diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.h b/lib/Target/Hexagon/HexagonVLIWPacketizer.h index 3f28dc5b79ce..adb92b6dc855 100644 --- a/lib/Target/Hexagon/HexagonVLIWPacketizer.h +++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.h @@ -34,6 +34,10 @@ class HexagonPacketizerList : public VLIWPacketizerList { // Track MIs with ignored dependence. std::vector<MachineInstr*> IgnoreDepMIs; + // Set to true if the packet contains an instruction that stalls with an + // instruction from the previous packet. + bool PacketStalls = false; + protected: /// \brief A handle to the branch probability pass. const MachineBranchProbabilityInfo *MBPI; diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp index c3b6eb19828b..904403543e18 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp @@ -184,7 +184,11 @@ public: { "fixup_Hexagon_IE_GOT_11_X", 0, 32, 0 }, { "fixup_Hexagon_TPREL_32_6_X", 0, 32, 0 }, { "fixup_Hexagon_TPREL_16_X", 0, 32, 0 }, - { "fixup_Hexagon_TPREL_11_X", 0, 32, 0 } + { "fixup_Hexagon_TPREL_11_X", 0, 32, 0 }, + { "fixup_Hexagon_GD_PLT_B22_PCREL_X",0, 32, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_Hexagon_GD_PLT_B32_PCREL_X",0, 32, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_Hexagon_LD_PLT_B22_PCREL_X",0, 32, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_Hexagon_LD_PLT_B32_PCREL_X",0, 32, MCFixupKindInfo::FKF_IsPCRel } }; if (Kind < FirstTargetFixupKind) @@ -291,6 +295,11 @@ public: case fixup_Hexagon_32_PCREL: case fixup_Hexagon_6_PCREL_X: case fixup_Hexagon_23_REG: + case fixup_Hexagon_27_REG: + case fixup_Hexagon_GD_PLT_B22_PCREL_X: + case fixup_Hexagon_GD_PLT_B32_PCREL_X: + case fixup_Hexagon_LD_PLT_B22_PCREL_X: + case fixup_Hexagon_LD_PLT_B32_PCREL_X: // These relocations should always have a relocation recorded IsResolved = false; return; @@ -347,6 +356,8 @@ public: case fixup_Hexagon_B9_PCREL_X: case fixup_Hexagon_B7_PCREL: case fixup_Hexagon_B7_PCREL_X: + case fixup_Hexagon_GD_PLT_B32_PCREL_X: + case fixup_Hexagon_LD_PLT_B32_PCREL_X: return 4; } } @@ -374,6 +385,8 @@ public: break; case fixup_Hexagon_B32_PCREL_X: + case fixup_Hexagon_GD_PLT_B32_PCREL_X: + case fixup_Hexagon_LD_PLT_B32_PCREL_X: Value >>= 6; break; } diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h index 53d8b04c50a5..adb546dc2140 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h @@ -128,10 +128,6 @@ namespace HexagonII { ExtentAlignPos = 33, ExtentAlignMask = 0x3, - // Valid subtargets - validSubTargetPos = 35, - validSubTargetMask = 0x3f, - // Addressing mode for load/store instructions. AddrModePos = 41, AddrModeMask = 0x7, diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp index 944e235e72f2..b975e3131094 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp @@ -284,6 +284,16 @@ unsigned HexagonELFObjectWriter::getRelocType(MCContext &Ctx, return ELF::R_HEX_TPREL_11_X; case fixup_Hexagon_23_REG: return ELF::R_HEX_23_REG; + case fixup_Hexagon_27_REG: + return ELF::R_HEX_27_REG; + case fixup_Hexagon_GD_PLT_B22_PCREL_X: + return ELF::R_HEX_GD_PLT_B22_PCREL_X; + case fixup_Hexagon_GD_PLT_B32_PCREL_X: + return ELF::R_HEX_GD_PLT_B32_PCREL_X; + case fixup_Hexagon_LD_PLT_B22_PCREL_X: + return ELF::R_HEX_LD_PLT_B22_PCREL_X; + case fixup_Hexagon_LD_PLT_B32_PCREL_X: + return ELF::R_HEX_LD_PLT_B32_PCREL_X; } } diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonFixupKinds.h b/lib/Target/Hexagon/MCTargetDesc/HexagonFixupKinds.h index 4c97ebbdd346..347327669ad9 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonFixupKinds.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonFixupKinds.h @@ -111,6 +111,11 @@ enum Fixups { fixup_Hexagon_TPREL_16_X, fixup_Hexagon_TPREL_11_X, fixup_Hexagon_23_REG, + fixup_Hexagon_27_REG, + fixup_Hexagon_GD_PLT_B22_PCREL_X, + fixup_Hexagon_GD_PLT_B32_PCREL_X, + fixup_Hexagon_LD_PLT_B22_PCREL_X, + fixup_Hexagon_LD_PLT_B32_PCREL_X, LastTargetFixupKind, NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp index 33d73f1819cd..3bb658b84451 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp @@ -113,7 +113,7 @@ void HexagonMCChecker::init(MCInst const &MCI) { // The instruction table models the USR.OVF flag, which can be // implicitly modified more than once, but cannot be modified in the // same packet with an instruction that modifies is explicitly. Deal - // with such situ- ations individually. + // with such situations individually. SoftDefs.insert(R); else if (isPredicateRegister(R) && HexagonMCInstrInfo::isPredicateLate(MCII, MCI)) @@ -159,12 +159,6 @@ void HexagonMCChecker::init(MCInst const &MCI) { isPredicateRegister(*SRI)) // Some insns produce predicates too late to be used in the same packet. LatePreds.insert(*SRI); - else if (i == 0 && HexagonMCInstrInfo::isCVINew(MCII, MCI) && - MCID.mayLoad()) - // Current loads should be used in the same packet. - // TODO: relies on the impossibility of a current and a temporary loads - // in the same packet. - CurDefs.insert(*SRI), Defs[*SRI].insert(PredSense(PredReg, isTrue)); else if (i == 0 && llvm::HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeCVI_VM_TMP_LD) // Temporary loads should be used in the same packet, but don't commit @@ -202,9 +196,8 @@ void HexagonMCChecker::init(MCInst const &MCI) { if (HexagonMCInstrInfo::hasNewValue2(MCII, MCI)) { unsigned R2 = HexagonMCInstrInfo::getNewValueOperand2(MCII, MCI).getReg(); - for (MCRegAliasIterator SRI(R2, &RI, - !MCSubRegIterator(R2, &RI).isValid()); - SRI.isValid(); ++SRI) + bool HasSubRegs = MCSubRegIterator(R2, &RI).isValid(); + for (MCRegAliasIterator SRI(R2, &RI, !HasSubRegs); SRI.isValid(); ++SRI) if (!MCSubRegIterator(*SRI, &RI).isValid()) NewDefs[*SRI].push_back(NewSense::Def( PredReg, HexagonMCInstrInfo::isPredicatedTrue(MCII, MCI), @@ -252,6 +245,8 @@ bool HexagonMCChecker::check(bool FullCheck) { bool chkNV = checkNewValues(); bool chkR = checkRegisters(); bool chkRRO = checkRegistersReadOnly(); + bool chkELB = checkEndloopBranches(); + checkRegisterCurDefs(); bool chkS = checkSolo(); bool chkSh = true; if (FullCheck) @@ -259,11 +254,106 @@ bool HexagonMCChecker::check(bool FullCheck) { bool chkSl = true; if (FullCheck) chkSl = checkSlots(); - bool chk = chkB && chkP && chkNV && chkR && chkRRO && chkS && chkSh && chkSl; + bool chkAXOK = checkAXOK(); + bool chk = chkB && chkP && chkNV && chkR && chkRRO && chkELB && chkS && + chkSh && chkSl && chkAXOK; return chk; } +bool HexagonMCChecker::checkEndloopBranches() { + for (auto const &I : HexagonMCInstrInfo::bundleInstructions(MCII, MCB)) { + MCInstrDesc const &Desc = HexagonMCInstrInfo::getDesc(MCII, I); + if (Desc.isBranch() || Desc.isCall()) { + auto Inner = HexagonMCInstrInfo::isInnerLoop(MCB); + if (Inner || HexagonMCInstrInfo::isOuterLoop(MCB)) { + reportError(I.getLoc(), + llvm::Twine("packet marked with `:endloop") + + (Inner ? "0" : "1") + "' " + + "cannot contain instructions that modify register " + + "`" + llvm::Twine(RI.getName(Hexagon::PC)) + "'"); + return false; + } + } + } + return true; +} + +namespace { +bool isDuplexAGroup(unsigned Opcode) { + switch (Opcode) { + case Hexagon::SA1_addi: + case Hexagon::SA1_addrx: + case Hexagon::SA1_addsp: + case Hexagon::SA1_and1: + case Hexagon::SA1_clrf: + case Hexagon::SA1_clrfnew: + case Hexagon::SA1_clrt: + case Hexagon::SA1_clrtnew: + case Hexagon::SA1_cmpeqi: + case Hexagon::SA1_combine0i: + case Hexagon::SA1_combine1i: + case Hexagon::SA1_combine2i: + case Hexagon::SA1_combine3i: + case Hexagon::SA1_combinerz: + case Hexagon::SA1_combinezr: + case Hexagon::SA1_dec: + case Hexagon::SA1_inc: + case Hexagon::SA1_seti: + case Hexagon::SA1_setin1: + case Hexagon::SA1_sxtb: + case Hexagon::SA1_sxth: + case Hexagon::SA1_tfr: + case Hexagon::SA1_zxtb: + case Hexagon::SA1_zxth: + return true; + break; + default: + return false; + } +} + +bool isNeitherAnorX(MCInstrInfo const &MCII, MCInst const &ID) { + unsigned Result = 0; + unsigned Type = HexagonMCInstrInfo::getType(MCII, ID); + if (Type == HexagonII::TypeDUPLEX) { + unsigned subInst0Opcode = ID.getOperand(0).getInst()->getOpcode(); + unsigned subInst1Opcode = ID.getOperand(1).getInst()->getOpcode(); + Result += !isDuplexAGroup(subInst0Opcode); + Result += !isDuplexAGroup(subInst1Opcode); + } else + Result += + Type != HexagonII::TypeALU32_2op && Type != HexagonII::TypeALU32_3op && + Type != HexagonII::TypeALU32_ADDI && Type != HexagonII::TypeS_2op && + Type != HexagonII::TypeS_3op && + (Type != HexagonII::TypeALU64 || HexagonMCInstrInfo::isFloat(MCII, ID)); + return Result != 0; +} +} // namespace + +bool HexagonMCChecker::checkAXOK() { + MCInst const *HasSoloAXInst = nullptr; + for (auto const &I : HexagonMCInstrInfo::bundleInstructions(MCII, MCB)) { + if (HexagonMCInstrInfo::isSoloAX(MCII, I)) { + HasSoloAXInst = &I; + } + } + if (!HasSoloAXInst) + return true; + for (auto const &I : HexagonMCInstrInfo::bundleInstructions(MCII, MCB)) { + if (&I != HasSoloAXInst && isNeitherAnorX(MCII, I)) { + reportError( + HasSoloAXInst->getLoc(), + llvm::Twine("Instruction can only be in a packet with ALU or " + "non-FPU XTYPE instructions")); + reportError(I.getLoc(), + llvm::Twine("Not an ALU or non-FPU XTYPE instruction")); + return false; + } + } + return true; +} + bool HexagonMCChecker::checkSlots() { unsigned slotsUsed = 0; for (auto HMI : HexagonMCInstrInfo::bundleInstructions(MCB)) { @@ -309,16 +399,6 @@ bool HexagonMCChecker::checkBranches() { } } - if (Branches) // FIXME: should "Defs.count(Hexagon::PC)" be here too? - if (HexagonMCInstrInfo::isInnerLoop(MCB) || - HexagonMCInstrInfo::isOuterLoop(MCB)) { - // Error out if there's any branch in a loop-end packet. - Twine N(HexagonMCInstrInfo::isInnerLoop(MCB) ? '0' : '1'); - reportError("packet marked with `:endloop" + N + "' " + - "cannot contain instructions that modify register " + "`" + - llvm::Twine(RI.getName(Hexagon::PC)) + "'"); - return false; - } if (Branches > 1) if (!hasConditional || Conditional > Unconditional) { // Error out if more than one unconditional branch or @@ -396,6 +476,31 @@ bool HexagonMCChecker::checkRegistersReadOnly() { return true; } +bool HexagonMCChecker::registerUsed(unsigned Register) { + for (auto const &I : HexagonMCInstrInfo::bundleInstructions(MCII, MCB)) + for (unsigned j = HexagonMCInstrInfo::getDesc(MCII, I).getNumDefs(), + n = I.getNumOperands(); + j < n; ++j) { + MCOperand const &Operand = I.getOperand(j); + if (Operand.isReg() && Operand.getReg() == Register) + return true; + } + return false; +} + +void HexagonMCChecker::checkRegisterCurDefs() { + for (auto const &I : HexagonMCInstrInfo::bundleInstructions(MCII, MCB)) { + if (HexagonMCInstrInfo::isCVINew(MCII, I) && + HexagonMCInstrInfo::getDesc(MCII, I).mayLoad()) { + unsigned Register = I.getOperand(0).getReg(); + if (!registerUsed(Register)) + reportWarning("Register `" + llvm::Twine(RI.getName(Register)) + + "' used with `.cur' " + "but not used in the same packet"); + } + } +} + // Check for legal register uses and definitions. bool HexagonMCChecker::checkRegisters() { // Check for proper register definitions. @@ -447,8 +552,7 @@ bool HexagonMCChecker::checkRegisters() { if (PM.count(P) && PM.size() > 2) { // Error out on conditional changes based on the same predicate // multiple times - // (e.g., "{ if (p0) r0 =...; if (!p0) r0 =... }; if (!p0) r0 =... - // }"). + // (e.g., "if (p0) r0 =...; if (!p0) r0 =... }; if (!p0) r0 =..."). reportErrorRegisters(R); return false; } @@ -456,19 +560,6 @@ bool HexagonMCChecker::checkRegisters() { } } - // Check for use of current definitions. - for (const auto &I : CurDefs) { - unsigned R = I; - - if (!Uses.count(R)) { - // Warn on an unused current definition. - reportWarning("register `" + llvm::Twine(RI.getName(R)) + - "' used with `.cur' " - "but not used in the same packet"); - return true; - } - } - // Check for use of temporary definitions. for (const auto &I : TmpDefs) { unsigned R = I; @@ -499,12 +590,11 @@ bool HexagonMCChecker::checkRegisters() { // Check for legal use of solo insns. bool HexagonMCChecker::checkSolo() { if (HexagonMCInstrInfo::bundleSize(MCB) > 1) - for (auto const &I : HexagonMCInstrInfo::bundleInstructions(MCB)) { - if (llvm::HexagonMCInstrInfo::isSolo(MCII, *I.getInst())) { - SMLoc Loc = I.getInst()->getLoc(); - reportError(Loc, "Instruction is marked `isSolo' and " - "cannot have other instructions in " - "the same packet"); + for (auto const &I : HexagonMCInstrInfo::bundleInstructions(MCII, MCB)) { + if (llvm::HexagonMCInstrInfo::isSolo(MCII, I)) { + reportError(I.getLoc(), "Instruction is marked `isSolo' and " + "cannot have other instructions in " + "the same packet"); return false; } } diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h index d0238691cdc0..027f78b4899c 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h @@ -78,10 +78,6 @@ class HexagonMCChecker { typedef std::set<unsigned>::iterator SoftDefsIterator; std::set<unsigned> SoftDefs; - /// Set of current definitions committed to the register file. - typedef std::set<unsigned>::iterator CurDefsIterator; - std::set<unsigned> CurDefs; - /// Set of temporary definitions not committed to the register file. typedef std::set<unsigned>::iterator TmpDefsIterator; std::set<unsigned> TmpDefs; @@ -110,15 +106,20 @@ class HexagonMCChecker { void init(MCInst const &); void initReg(MCInst const &, unsigned, unsigned &PredReg, bool &isTrue); + bool registerUsed(unsigned Register); + // Checks performed. bool checkBranches(); bool checkPredicates(); bool checkNewValues(); bool checkRegisters(); bool checkRegistersReadOnly(); + bool checkEndloopBranches(); + void checkRegisterCurDefs(); bool checkSolo(); bool checkShuffle(); bool checkSlots(); + bool checkAXOK(); static void compoundRegisterMap(unsigned &); diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp index c0956520de73..dfb5f4cc8260 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp @@ -199,6 +199,11 @@ Hexagon::Fixups HexagonMCCodeEmitter::getFixupNoBits( return Hexagon::fixup_Hexagon_IE_GOT_32_6_X; case MCSymbolRefExpr::VK_Hexagon_PCREL: return Hexagon::fixup_Hexagon_B32_PCREL_X; + case MCSymbolRefExpr::VK_Hexagon_GD_PLT: + return Hexagon::fixup_Hexagon_GD_PLT_B32_PCREL_X; + case MCSymbolRefExpr::VK_Hexagon_LD_PLT: + return Hexagon::fixup_Hexagon_LD_PLT_B32_PCREL_X; + case MCSymbolRefExpr::VK_None: { auto Insts = HexagonMCInstrInfo::bundleInstructions(**CurrentBundle); for (auto I = Insts.begin(), N = Insts.end(); I != N; ++I) { @@ -318,6 +323,8 @@ namespace { case fixup_Hexagon_PLT_B22_PCREL: case fixup_Hexagon_GD_PLT_B22_PCREL: case fixup_Hexagon_LD_PLT_B22_PCREL: + case fixup_Hexagon_GD_PLT_B22_PCREL_X: + case fixup_Hexagon_LD_PLT_B22_PCREL_X: case fixup_Hexagon_6_PCREL_X: return true; default: @@ -414,10 +421,12 @@ unsigned HexagonMCCodeEmitter::getExprOpValue(const MCInst &MI, case 22: switch (kind) { case MCSymbolRefExpr::VK_Hexagon_GD_PLT: - FixupKind = Hexagon::fixup_Hexagon_GD_PLT_B22_PCREL; + FixupKind = *Extended ? Hexagon::fixup_Hexagon_GD_PLT_B22_PCREL_X + : Hexagon::fixup_Hexagon_GD_PLT_B22_PCREL; break; case MCSymbolRefExpr::VK_Hexagon_LD_PLT: - FixupKind = Hexagon::fixup_Hexagon_LD_PLT_B22_PCREL; + FixupKind = *Extended ? Hexagon::fixup_Hexagon_LD_PLT_B22_PCREL_X + : Hexagon::fixup_Hexagon_LD_PLT_B22_PCREL; break; case MCSymbolRefExpr::VK_None: FixupKind = *Extended ? Hexagon::fixup_Hexagon_B22_PCREL_X @@ -467,8 +476,8 @@ unsigned HexagonMCCodeEmitter::getExprOpValue(const MCInst &MI, } else switch (kind) { case MCSymbolRefExpr::VK_None: { - if (HexagonMCInstrInfo::s23_2_reloc(*MO.getExpr())) - FixupKind = Hexagon::fixup_Hexagon_23_REG; + if (HexagonMCInstrInfo::s27_2_reloc(*MO.getExpr())) + FixupKind = Hexagon::fixup_Hexagon_27_REG; else if (MCID.mayStore() || MCID.mayLoad()) { for (const MCPhysReg *ImpUses = MCID.getImplicitUses(); *ImpUses; @@ -593,6 +602,12 @@ unsigned HexagonMCCodeEmitter::getExprOpValue(const MCInst &MI, case MCSymbolRefExpr::VK_Hexagon_LD_GOT: FixupKind = Hexagon::fixup_Hexagon_LD_GOT_11_X; break; + case MCSymbolRefExpr::VK_Hexagon_GD_PLT: + FixupKind = Hexagon::fixup_Hexagon_GD_PLT_B22_PCREL_X; + break; + case MCSymbolRefExpr::VK_Hexagon_LD_PLT: + FixupKind = Hexagon::fixup_Hexagon_LD_PLT_B22_PCREL_X; + break; case MCSymbolRefExpr::VK_None: FixupKind = Hexagon::fixup_Hexagon_11_X; break; diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp index 14300edc7e1b..9fbe299d7d52 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp @@ -94,9 +94,9 @@ void HexagonMCExpr::setMustNotExtend(bool Val) { } bool HexagonMCExpr::mustNotExtend() const { return MustNotExtend; } -bool HexagonMCExpr::s23_2_reloc() const { return S23_2_reloc; } -void HexagonMCExpr::setS23_2_reloc(bool Val) { - S23_2_reloc = Val; +bool HexagonMCExpr::s27_2_reloc() const { return S27_2_reloc; } +void HexagonMCExpr::setS27_2_reloc(bool Val) { + S27_2_reloc = Val; } bool HexagonMCExpr::classof(MCExpr const *E) { @@ -104,7 +104,7 @@ bool HexagonMCExpr::classof(MCExpr const *E) { } HexagonMCExpr::HexagonMCExpr(MCExpr const *Expr) - : Expr(Expr), MustNotExtend(false), MustExtend(false), S23_2_reloc(false), + : Expr(Expr), MustNotExtend(false), MustExtend(false), S27_2_reloc(false), SignMismatch(false) {} void HexagonMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const { diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.h index bca40cfaf6f4..acfd996ccf82 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.h @@ -29,8 +29,8 @@ public: bool mustExtend() const; void setMustNotExtend(bool Val = true); bool mustNotExtend() const; - void setS23_2_reloc(bool Val = true); - bool s23_2_reloc() const; + void setS27_2_reloc(bool Val = true); + bool s27_2_reloc() const; void setSignMismatch(bool Val = true); bool signMismatch() const; @@ -39,7 +39,7 @@ private: MCExpr const *Expr; bool MustNotExtend; bool MustExtend; - bool S23_2_reloc; + bool S27_2_reloc; bool SignMismatch; }; } // end namespace llvm diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp index 30a811a36406..5fe638a9996b 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp @@ -22,6 +22,49 @@ #include "llvm/MC/MCSubtargetInfo.h" namespace llvm { + +Hexagon::PacketIterator::PacketIterator(MCInstrInfo const &MCII, + MCInst const &Inst) + : MCII(MCII), BundleCurrent(Inst.begin() + + HexagonMCInstrInfo::bundleInstructionsOffset), + BundleEnd(Inst.end()), DuplexCurrent(Inst.end()), DuplexEnd(Inst.end()) {} + +Hexagon::PacketIterator::PacketIterator(MCInstrInfo const &MCII, + MCInst const &Inst, std::nullptr_t) + : MCII(MCII), BundleCurrent(Inst.end()), BundleEnd(Inst.end()), + DuplexCurrent(Inst.end()), DuplexEnd(Inst.end()) {} + +Hexagon::PacketIterator &Hexagon::PacketIterator::operator++() { + if (DuplexCurrent != DuplexEnd) { + ++DuplexCurrent; + if (DuplexCurrent == DuplexEnd) { + DuplexCurrent = BundleEnd; + DuplexEnd = BundleEnd; + } + return *this; + } + ++BundleCurrent; + if (BundleCurrent != BundleEnd) { + MCInst const &Inst = *BundleCurrent->getInst(); + if (HexagonMCInstrInfo::isDuplex(MCII, Inst)) { + DuplexCurrent = Inst.begin(); + DuplexEnd = Inst.end(); + } + } + return *this; +} + +MCInst const &Hexagon::PacketIterator::operator*() const { + if (DuplexCurrent != DuplexEnd) + return *DuplexCurrent->getInst(); + return *BundleCurrent->getInst(); +} + +bool Hexagon::PacketIterator::operator==(PacketIterator const &Other) const { + return BundleCurrent == Other.BundleCurrent && BundleEnd == Other.BundleEnd && + DuplexCurrent == Other.DuplexCurrent && DuplexEnd == Other.DuplexEnd; +} + void HexagonMCInstrInfo::addConstant(MCInst &MI, uint64_t Value, MCContext &Context) { MI.addOperand(MCOperand::createExpr(MCConstantExpr::create(Value, Context))); @@ -41,6 +84,14 @@ void HexagonMCInstrInfo::addConstExtender(MCContext &Context, MCB.addOperand(MCOperand::createInst(XMCI)); } +iterator_range<Hexagon::PacketIterator> +HexagonMCInstrInfo::bundleInstructions(MCInstrInfo const &MCII, + MCInst const &MCI) { + assert(isBundle(MCI)); + return make_range(Hexagon::PacketIterator(MCII, MCI), + Hexagon::PacketIterator(MCII, MCI, nullptr)); +} + iterator_range<MCInst::const_iterator> HexagonMCInstrInfo::bundleInstructions(MCInst const &MCI) { assert(isBundle(MCI)); @@ -292,7 +343,7 @@ int HexagonMCInstrInfo::getMinValue(MCInstrInfo const &MCII, } StringRef HexagonMCInstrInfo::getName(MCInstrInfo const &MCII, - MCInst const &MCI) { + MCInst const &MCI) { return MCII.getName(MCI.getOpcode()); } @@ -339,25 +390,6 @@ unsigned HexagonMCInstrInfo::getType(MCInstrInfo const &MCII, return ((F >> HexagonII::TypePos) & HexagonII::TypeMask); } -int HexagonMCInstrInfo::getSubTarget(MCInstrInfo const &MCII, - MCInst const &MCI) { - const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; - - HexagonII::SubTarget Target = static_cast<HexagonII::SubTarget>( - (F >> HexagonII::validSubTargetPos) & HexagonII::validSubTargetMask); - - switch (Target) { - default: - return Hexagon::ArchV4; - case HexagonII::HasV5SubT: - return Hexagon::ArchV5; - case HexagonII::HasV55SubT: - return Hexagon::ArchV55; - case HexagonII::HasV60SubT: - return Hexagon::ArchV60; - } -} - /// Return the slots this instruction can execute out of unsigned HexagonMCInstrInfo::getUnits(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, @@ -397,9 +429,8 @@ bool HexagonMCInstrInfo::hasDuplex(MCInstrInfo const &MCII, MCInst const &MCI) { if (!HexagonMCInstrInfo::isBundle(MCI)) return false; - for (const auto &I : HexagonMCInstrInfo::bundleInstructions(MCI)) { - auto MI = I.getInst(); - if (HexagonMCInstrInfo::isDuplex(MCII, *MI)) + for (auto const &I : HexagonMCInstrInfo::bundleInstructions(MCII, MCI)) { + if (HexagonMCInstrInfo::isDuplex(MCII, I)) return true; } @@ -410,13 +441,12 @@ bool HexagonMCInstrInfo::hasExtenderForIndex(MCInst const &MCB, size_t Index) { return extenderForIndex(MCB, Index) != nullptr; } -bool HexagonMCInstrInfo::hasImmExt(MCInst const &MCI) { +bool HexagonMCInstrInfo::hasImmExt( MCInst const &MCI) { if (!HexagonMCInstrInfo::isBundle(MCI)) return false; for (const auto &I : HexagonMCInstrInfo::bundleInstructions(MCI)) { - auto MI = I.getInst(); - if (isImmext(*MI)) + if (isImmext(*I.getInst())) return true; } @@ -737,16 +767,16 @@ bool HexagonMCInstrInfo::mustNotExtend(MCExpr const &Expr) { HexagonMCExpr const &HExpr = cast<HexagonMCExpr>(Expr); return HExpr.mustNotExtend(); } -void HexagonMCInstrInfo::setS23_2_reloc(MCExpr const &Expr, bool Val) { +void HexagonMCInstrInfo::setS27_2_reloc(MCExpr const &Expr, bool Val) { HexagonMCExpr &HExpr = const_cast<HexagonMCExpr &>(*llvm::cast<HexagonMCExpr>(&Expr)); - HExpr.setS23_2_reloc(Val); + HExpr.setS27_2_reloc(Val); } -bool HexagonMCInstrInfo::s23_2_reloc(MCExpr const &Expr) { +bool HexagonMCInstrInfo::s27_2_reloc(MCExpr const &Expr) { HexagonMCExpr const *HExpr = llvm::dyn_cast<HexagonMCExpr>(&Expr); if (!HExpr) return false; - return HExpr->s23_2_reloc(); + return HExpr->s27_2_reloc(); } void HexagonMCInstrInfo::padEndloop(MCInst &MCB, MCContext &Context) { @@ -818,4 +848,4 @@ unsigned HexagonMCInstrInfo::SubregisterBit(unsigned Consumer, return 0x1; return 0; } -} +} // namespace llvm diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h index 4d2df4d0eb69..ca44c3a11ba7 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h @@ -31,6 +31,25 @@ public: DuplexCandidate(unsigned i, unsigned j, unsigned iClass) : packetIndexI(i), packetIndexJ(j), iClass(iClass) {} }; +namespace Hexagon { +class PacketIterator { + MCInstrInfo const &MCII; + MCInst::const_iterator BundleCurrent; + MCInst::const_iterator BundleEnd; + MCInst::const_iterator DuplexCurrent; + MCInst::const_iterator DuplexEnd; + +public: + PacketIterator(MCInstrInfo const &MCII, MCInst const &Inst); + PacketIterator(MCInstrInfo const &MCII, MCInst const &Inst, std::nullptr_t); + PacketIterator &operator++(); + MCInst const &operator*() const; + bool operator==(PacketIterator const &Other) const; + bool operator!=(PacketIterator const &Other) const { + return !(*this == Other); + } +}; +} // namespace Hexagon namespace HexagonMCInstrInfo { size_t const innerLoopOffset = 0; int64_t const innerLoopMask = 1 << innerLoopOffset; @@ -54,6 +73,8 @@ void addConstExtender(MCContext &Context, MCInstrInfo const &MCII, MCInst &MCB, MCInst const &MCI); // Returns a iterator range of instructions in this bundle +iterator_range<Hexagon::PacketIterator> +bundleInstructions(MCInstrInfo const &MCII, MCInst const &MCI); iterator_range<MCInst::const_iterator> bundleInstructions(MCInst const &MCI); // Returns the number of instructions in the bundle @@ -131,7 +152,6 @@ MCOperand const &getNewValueOperand(MCInstrInfo const &MCII, MCInst const &MCI); unsigned short getNewValueOp2(MCInstrInfo const &MCII, MCInst const &MCI); MCOperand const &getNewValueOperand2(MCInstrInfo const &MCII, MCInst const &MCI); -int getSubTarget(MCInstrInfo const &MCII, MCInst const &MCI); // Return the Hexagon ISA class for the insn. unsigned getType(MCInstrInfo const &MCII, MCInst const &MCI); @@ -263,14 +283,14 @@ bool prefersSlot3(MCInstrInfo const &MCII, MCInst const &MCI); // Replace the instructions inside MCB, represented by Candidate void replaceDuplex(MCContext &Context, MCInst &MCI, DuplexCandidate Candidate); -bool s23_2_reloc(MCExpr const &Expr); +bool s27_2_reloc(MCExpr const &Expr); // Marks a bundle as endloop0 void setInnerLoop(MCInst &MCI); void setMemReorderDisabled(MCInst &MCI); void setMemStoreReorderEnabled(MCInst &MCI); void setMustExtend(MCExpr const &Expr, bool Val = true); void setMustNotExtend(MCExpr const &Expr, bool Val = true); -void setS23_2_reloc(MCExpr const &Expr, bool Val = true); +void setS27_2_reloc(MCExpr const &Expr, bool Val = true); // Marks a bundle as endloop1 void setOuterLoop(MCInst &MCI); @@ -283,7 +303,7 @@ unsigned SubregisterBit(unsigned Consumer, unsigned Producer, // Attempt to find and replace compound pairs void tryCompound(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, MCContext &Context, MCInst &MCI); -} -} +} // namespace HexagonMCInstrInfo +} // namespace llvm #endif // LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCINSTRINFO_H diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp index eb303464555d..a5afa1daeb9e 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp @@ -205,64 +205,12 @@ static struct { } jumpSlots[] = {{8, 4}, {8, 2}, {8, 1}, {4, 2}, {4, 1}, {2, 1}}; #define MAX_JUMP_SLOTS (sizeof(jumpSlots) / sizeof(jumpSlots[0])) -namespace { -bool isDuplexAGroup(unsigned Opcode) { - switch (Opcode) { - case Hexagon::SA1_addi: - case Hexagon::SA1_addrx: - case Hexagon::SA1_addsp: - case Hexagon::SA1_and1: - case Hexagon::SA1_clrf: - case Hexagon::SA1_clrfnew: - case Hexagon::SA1_clrt: - case Hexagon::SA1_clrtnew: - case Hexagon::SA1_cmpeqi: - case Hexagon::SA1_combine0i: - case Hexagon::SA1_combine1i: - case Hexagon::SA1_combine2i: - case Hexagon::SA1_combine3i: - case Hexagon::SA1_combinerz: - case Hexagon::SA1_combinezr: - case Hexagon::SA1_dec: - case Hexagon::SA1_inc: - case Hexagon::SA1_seti: - case Hexagon::SA1_setin1: - case Hexagon::SA1_sxtb: - case Hexagon::SA1_sxth: - case Hexagon::SA1_tfr: - case Hexagon::SA1_zxtb: - case Hexagon::SA1_zxth: - return true; - break; - default: - return false; - } -} - -unsigned countNeitherAnorX(MCInstrInfo const &MCII, MCInst const &ID) { - unsigned Result = 0; - unsigned Type = HexagonMCInstrInfo::getType(MCII, ID); - if (Type == HexagonII::TypeDUPLEX) { - unsigned subInst0Opcode = ID.getOperand(0).getInst()->getOpcode(); - unsigned subInst1Opcode = ID.getOperand(1).getInst()->getOpcode(); - Result += !isDuplexAGroup(subInst0Opcode); - Result += !isDuplexAGroup(subInst1Opcode); - } else - Result += - Type != HexagonII::TypeALU32_2op && Type != HexagonII::TypeALU32_3op && - Type != HexagonII::TypeALU32_ADDI && Type != HexagonII::TypeS_2op && - Type != HexagonII::TypeS_3op && Type != HexagonII::TypeALU64 && - (Type != HexagonII::TypeM || HexagonMCInstrInfo::isFloat(MCII, ID)); - return Result; -} -} // namespace - /// Check that the packet is legal and enforce relative insn order. bool HexagonShuffler::check() { // Descriptive slot masks. const unsigned slotSingleLoad = 0x1, slotSingleStore = 0x1, slotOne = 0x2, slotThree = 0x8, // slotFirstJump = 0x8, - slotFirstLoadStore = 0x2, slotLastLoadStore = 0x1; + slotFirstLoadStore = 0x2, slotLastLoadStore = 0x1; // Highest slots for branches and stores used to keep their original order. // unsigned slotJump = slotFirstJump; unsigned slotLoadStore = slotFirstLoadStore; @@ -271,18 +219,12 @@ bool HexagonShuffler::check() { // Number of memory operations, loads, solo loads, stores, solo stores, single // stores. unsigned memory = 0, loads = 0, load0 = 0, stores = 0, store0 = 0, store1 = 0; - // Number of HVX loads, HVX stores. - unsigned CVIloads = 0, CVIstores = 0; - // Number of duplex insns, solo insns. - unsigned duplex = 0, solo = 0; - // Number of insns restricting other insns in the packet to A and X types, - // which is neither A or X types. - unsigned onlyAX = 0, neitherAnorX = 0; + // Number of duplex insns + unsigned duplex = 0; // Number of insns restricting other insns in slot #1 to A type. unsigned onlyAin1 = 0; // Number of insns restricting any insn in slot #1, except A2_nop. unsigned onlyNo1 = 0; - unsigned xtypeFloat = 0; unsigned pSlot3Cnt = 0; unsigned nvstores = 0; unsigned memops = 0; @@ -295,13 +237,8 @@ bool HexagonShuffler::check() { for (iterator ISJ = begin(); ISJ != end(); ++ISJ) { MCInst const &ID = ISJ->getDesc(); - if (HexagonMCInstrInfo::isSolo(MCII, ID)) - solo++; - else if (HexagonMCInstrInfo::isSoloAX(MCII, ID)) - onlyAX++; - else if (HexagonMCInstrInfo::isSoloAin1(MCII, ID)) - onlyAin1++; - neitherAnorX += countNeitherAnorX(MCII, ID); + if (HexagonMCInstrInfo::isSoloAin1(MCII, ID)) + ++onlyAin1; if (HexagonMCInstrInfo::prefersSlot3(MCII, ID)) { ++pSlot3Cnt; slot3ISJ = ISJ; @@ -314,8 +251,6 @@ bool HexagonShuffler::check() { case HexagonII::TypeS_2op: case HexagonII::TypeS_3op: case HexagonII::TypeALU64: - if (HexagonMCInstrInfo::isFloat(MCII, ID)) - ++xtypeFloat; break; case HexagonII::TypeJ: ++jumps; @@ -325,7 +260,6 @@ bool HexagonShuffler::check() { ++onlyNo1; case HexagonII::TypeCVI_VM_LD: case HexagonII::TypeCVI_VM_TMP_LD: - ++CVIloads; case HexagonII::TypeLD: ++loads; ++memory; @@ -341,7 +275,6 @@ bool HexagonShuffler::check() { ++onlyNo1; case HexagonII::TypeCVI_VM_ST: case HexagonII::TypeCVI_VM_NEW_ST: - ++CVIstores; case HexagonII::TypeST: ++stores; ++memory; @@ -403,15 +336,22 @@ bool HexagonShuffler::check() { ++jumps; foundBranches.push_back(ISJ); } + if (HexagonMCInstrInfo::getDesc(MCII, Inst0).isReturn()) { + ++deallocs, ++jumps, ++jump1; // DEALLOC_RETURN is of type LD. + foundBranches.push_back(ISJ); + } + if (HexagonMCInstrInfo::getDesc(MCII, Inst1).isReturn()) { + ++deallocs, ++jumps, ++jump1; // DEALLOC_RETURN is of type LD. + foundBranches.push_back(ISJ); + } break; } } } // Check if the packet is legal. - if ((load0 > 1 || store0 > 1 || CVIloads > 1 || CVIstores > 1) || - (duplex > 1 || (duplex && memory)) || (solo && size() > 1) || - (onlyAX && neitherAnorX > 1) || (onlyAX && xtypeFloat)) { + if ((load0 > 1 || store0 > 1) || + (duplex > 1 || (duplex && memory))) { reportError(llvm::Twine("invalid instruction packet")); return false; } diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h index bd31c7be4c0a..10a959008f44 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h @@ -105,8 +105,8 @@ class HexagonInstr { public: HexagonInstr(HexagonCVIResource::TypeUnitsAndLanes *T, MCInstrInfo const &MCII, MCInst const *id, - MCInst const *Extender, unsigned s, bool x = false) - : ID(id), Extender(Extender), Core(s), CVI(T, MCII, s, id) {} + MCInst const *Extender, unsigned s) + : ID(id), Extender(Extender), Core(s), CVI(T, MCII, s, id) {}; MCInst const &getDesc() const { return *ID; }; diff --git a/lib/Target/Mips/Mips16HardFloat.cpp b/lib/Target/Mips/Mips16HardFloat.cpp index a71b161b24cc..5a394fe02f16 100644 --- a/lib/Target/Mips/Mips16HardFloat.cpp +++ b/lib/Target/Mips/Mips16HardFloat.cpp @@ -490,15 +490,14 @@ static void createFPFnStub(Function *F, Module *M, FPParamVariant PV, // remove the use-soft-float attribute // static void removeUseSoftFloat(Function &F) { - AttributeList A; + AttrBuilder B; DEBUG(errs() << "removing -use-soft-float\n"); - A = A.addAttribute(F.getContext(), AttributeList::FunctionIndex, - "use-soft-float", "false"); - F.removeAttributes(AttributeList::FunctionIndex, A); + B.addAttribute("use-soft-float", "false"); + F.removeAttributes(AttributeList::FunctionIndex, B); if (F.hasFnAttribute("use-soft-float")) { DEBUG(errs() << "still has -use-soft-float\n"); } - F.addAttributes(AttributeList::FunctionIndex, A); + F.addAttributes(AttributeList::FunctionIndex, B); } diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 5645fdc24850..32661099b79d 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -1017,6 +1017,14 @@ namespace llvm { SDValue combineElementTruncationToVectorTruncation(SDNode *N, DAGCombinerInfo &DCI) const; + + bool supportsModuloShift(ISD::NodeType Inst, + EVT ReturnType) const override { + assert((Inst == ISD::SHL || Inst == ISD::SRA || Inst == ISD::SRL) && + "Expect a shift instruction"); + assert(isOperationLegal(Inst, ReturnType)); + return ReturnType.isVector(); + } }; namespace PPC { diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp index d286158f407d..f56b238f91e6 100644 --- a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp +++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -530,9 +530,10 @@ static Type *getCmpOpsType(const Instruction *I, unsigned VF = 1) { if (CmpInst *CI = dyn_cast<CmpInst>(I->getOperand(0))) OpTy = CI->getOperand(0)->getType(); else if (Instruction *LogicI = dyn_cast<Instruction>(I->getOperand(0))) - if (CmpInst *CI0 = dyn_cast<CmpInst>(LogicI->getOperand(0))) - if (isa<CmpInst>(LogicI->getOperand(1))) - OpTy = CI0->getOperand(0)->getType(); + if (LogicI->getNumOperands() == 2) + if (CmpInst *CI0 = dyn_cast<CmpInst>(LogicI->getOperand(0))) + if (isa<CmpInst>(LogicI->getOperand(1))) + OpTy = CI0->getOperand(0)->getType(); if (OpTy != nullptr) { if (VF == 1) { diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index d2f650cf8f47..784c3a6557ff 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -170,6 +170,8 @@ def FeatureAES : SubtargetFeature<"aes", "HasAES", "true", [FeatureSSE2]>; def FeatureTBM : SubtargetFeature<"tbm", "HasTBM", "true", "Enable TBM instructions">; +def FeatureLWP : SubtargetFeature<"lwp", "HasLWP", "true", + "Enable LWP instructions">; def FeatureMOVBE : SubtargetFeature<"movbe", "HasMOVBE", "true", "Support MOVBE instruction">; def FeatureRDRAND : SubtargetFeature<"rdrnd", "HasRDRAND", "true", @@ -691,6 +693,7 @@ def : Proc<"bdver1", [ FeatureLZCNT, FeaturePOPCNT, FeatureXSAVE, + FeatureLWP, FeatureSlowSHLD, FeatureLAHFSAHF ]>; @@ -713,6 +716,7 @@ def : Proc<"bdver2", [ FeatureXSAVE, FeatureBMI, FeatureTBM, + FeatureLWP, FeatureFMA, FeatureSlowSHLD, FeatureLAHFSAHF @@ -737,6 +741,7 @@ def : Proc<"bdver3", [ FeatureXSAVE, FeatureBMI, FeatureTBM, + FeatureLWP, FeatureFMA, FeatureXSAVEOPT, FeatureSlowSHLD, @@ -763,6 +768,7 @@ def : Proc<"bdver4", [ FeatureBMI, FeatureBMI2, FeatureTBM, + FeatureLWP, FeatureFMA, FeatureXSAVEOPT, FeatureSlowSHLD, diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index fd11b671dd9b..ebd179e786da 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -3181,6 +3181,15 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) { bool Is64Bit = Subtarget->is64Bit(); bool IsWin64 = Subtarget->isCallingConvWin64(CC); + const CallInst *CI = + CLI.CS ? dyn_cast<CallInst>(CLI.CS->getInstruction()) : nullptr; + const Function *CalledFn = CI ? CI->getCalledFunction() : nullptr; + + // Functions with no_caller_saved_registers that need special handling. + if ((CI && CI->hasFnAttr("no_caller_saved_registers")) || + (CalledFn && CalledFn->hasFnAttribute("no_caller_saved_registers"))) + return false; + // Handle only C, fastcc, and webkit_js calling conventions for now. switch (CC) { default: return false; diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 6092fd2bfd69..83542aaa013b 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -2180,6 +2180,12 @@ X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, MachineFunction &MF = DAG.getMachineFunction(); X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); + // In some cases we need to disable registers from the default CSR list. + // For example, when they are used for argument passing. + bool ShouldDisableCalleeSavedRegister = + CallConv == CallingConv::X86_RegCall || + MF.getFunction()->hasFnAttribute("no_caller_saved_registers"); + if (CallConv == CallingConv::X86_INTR && !Outs.empty()) report_fatal_error("X86 interrupts may not return any value"); @@ -2201,7 +2207,7 @@ X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, assert(VA.isRegLoc() && "Can only return in registers!"); // Add the register to the CalleeSaveDisableRegs list. - if (CallConv == CallingConv::X86_RegCall) + if (ShouldDisableCalleeSavedRegister) MF.getRegInfo().disableCalleeSavedRegister(VA.getLocReg()); SDValue ValToCopy = OutVals[OutsIndex]; @@ -2280,7 +2286,7 @@ X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, "Expecting two registers after Pass64BitArgInRegs"); // Add the second register to the CalleeSaveDisableRegs list. - if (CallConv == CallingConv::X86_RegCall) + if (ShouldDisableCalleeSavedRegister) MF.getRegInfo().disableCalleeSavedRegister(RVLocs[I].getLocReg()); } else { RegsToPass.push_back(std::make_pair(VA.getLocReg(), ValToCopy)); @@ -2340,7 +2346,7 @@ X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout()))); // Add the returned register to the CalleeSaveDisableRegs list. - if (CallConv == CallingConv::X86_RegCall) + if (ShouldDisableCalleeSavedRegister) MF.getRegInfo().disableCalleeSavedRegister(RetValReg); } @@ -2540,7 +2546,7 @@ SDValue X86TargetLowering::LowerCallResult( // In some calling conventions we need to remove the used registers // from the register mask. - if (RegMask && CallConv == CallingConv::X86_RegCall) { + if (RegMask) { for (MCSubRegIterator SubRegs(VA.getLocReg(), TRI, /*IncludeSelf=*/true); SubRegs.isValid(); ++SubRegs) RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32)); @@ -3237,7 +3243,8 @@ SDValue X86TargetLowering::LowerFormalArguments( } } - if (CallConv == CallingConv::X86_RegCall) { + if (CallConv == CallingConv::X86_RegCall || + Fn->hasFnAttribute("no_caller_saved_registers")) { const MachineRegisterInfo &MRI = MF.getRegInfo(); for (const auto &Pair : make_range(MRI.livein_begin(), MRI.livein_end())) MF.getRegInfo().disableCalleeSavedRegister(Pair.first); @@ -3329,6 +3336,11 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, bool IsSibcall = false; X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>(); auto Attr = MF.getFunction()->getFnAttribute("disable-tail-calls"); + const CallInst *CI = + CLI.CS ? dyn_cast<CallInst>(CLI.CS->getInstruction()) : nullptr; + const Function *Fn = CI ? CI->getCalledFunction() : nullptr; + bool HasNCSR = (CI && CI->hasFnAttr("no_caller_saved_registers")) || + (Fn && Fn->hasFnAttribute("no_caller_saved_registers")); if (CallConv == CallingConv::X86_INTR) report_fatal_error("X86 interrupts may not be called directly"); @@ -3741,7 +3753,11 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, RegsToPass[i].second.getValueType())); // Add a register mask operand representing the call-preserved registers. - const uint32_t *Mask = RegInfo->getCallPreservedMask(MF, CallConv); + // If HasNCSR is asserted (attribute NoCallerSavedRegisters exists) then we + // set X86_INTR calling convention because it has the same CSR mask + // (same preserved registers). + const uint32_t *Mask = RegInfo->getCallPreservedMask( + MF, HasNCSR ? (CallingConv::ID)CallingConv::X86_INTR : CallConv); assert(Mask && "Missing call preserved mask for calling convention"); // If this is an invoke in a 32-bit function using a funclet-based @@ -3764,7 +3780,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // In some calling conventions we need to remove the used physical registers // from the reg mask. - if (CallConv == CallingConv::X86_RegCall) { + if (CallConv == CallingConv::X86_RegCall || HasNCSR) { const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); // Allocate a new Reg Mask and copy Mask. @@ -19044,8 +19060,7 @@ static SDValue getScalarMaskingNode(SDValue Op, SDValue Mask, if (Op.getOpcode() == X86ISD::FSETCCM || Op.getOpcode() == X86ISD::FSETCCM_RND) return DAG.getNode(ISD::AND, dl, VT, Op, IMask); - if (Op.getOpcode() == X86ISD::VFPCLASS || - Op.getOpcode() == X86ISD::VFPCLASSS) + if (Op.getOpcode() == X86ISD::VFPCLASSS) return DAG.getNode(ISD::OR, dl, VT, Op, IMask); if (PreservedSrc.isUndef()) @@ -20284,16 +20299,17 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); - const IntrinsicData* IntrData = getIntrinsicWithChain(IntNo); + const IntrinsicData *IntrData = getIntrinsicWithChain(IntNo); if (!IntrData) { - if (IntNo == llvm::Intrinsic::x86_seh_ehregnode) + switch (IntNo) { + case llvm::Intrinsic::x86_seh_ehregnode: return MarkEHRegistrationNode(Op, DAG); - if (IntNo == llvm::Intrinsic::x86_seh_ehguard) + case llvm::Intrinsic::x86_seh_ehguard: return MarkEHGuard(Op, DAG); - if (IntNo == llvm::Intrinsic::x86_flags_read_u32 || - IntNo == llvm::Intrinsic::x86_flags_read_u64 || - IntNo == llvm::Intrinsic::x86_flags_write_u32 || - IntNo == llvm::Intrinsic::x86_flags_write_u64) { + case llvm::Intrinsic::x86_flags_read_u32: + case llvm::Intrinsic::x86_flags_read_u64: + case llvm::Intrinsic::x86_flags_write_u32: + case llvm::Intrinsic::x86_flags_write_u64: { // We need a frame pointer because this will get lowered to a PUSH/POP // sequence. MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); @@ -20302,6 +20318,20 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget, // during ExpandISelPseudos in EmitInstrWithCustomInserter. return SDValue(); } + case Intrinsic::x86_lwpins32: + case Intrinsic::x86_lwpins64: { + SDLoc dl(Op); + SDValue Chain = Op->getOperand(0); + SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other); + SDValue LwpIns = + DAG.getNode(X86ISD::LWPINS, dl, VTs, Chain, Op->getOperand(2), + Op->getOperand(3), Op->getOperand(4)); + SDValue SetCC = getSETCC(X86::COND_B, LwpIns.getValue(0), dl, DAG); + SDValue Result = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i8, SetCC); + return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Result, + LwpIns.getValue(1)); + } + } return SDValue(); } @@ -24477,6 +24507,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::CVTP2UI_RND: return "X86ISD::CVTP2UI_RND"; case X86ISD::CVTS2SI_RND: return "X86ISD::CVTS2SI_RND"; case X86ISD::CVTS2UI_RND: return "X86ISD::CVTS2UI_RND"; + case X86ISD::LWPINS: return "X86ISD::LWPINS"; } return nullptr; } diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 46dc587c637d..18106c2eb394 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -559,6 +559,9 @@ namespace llvm { // Conversions between float and half-float. CVTPS2PH, CVTPH2PS, + // LWP insert record. + LWPINS, + // Compare and swap. LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE, LCMPXCHG8_DAG, diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 26444dd1f619..888daa275265 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -821,6 +821,12 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPSHLQrr, X86::VPSHLQmr, 0 }, { X86::VPSHLWrr, X86::VPSHLWmr, 0 }, + // LWP foldable instructions + { X86::LWPINS32rri, X86::LWPINS32rmi, 0 }, + { X86::LWPINS64rri, X86::LWPINS64rmi, 0 }, + { X86::LWPVAL32rri, X86::LWPVAL32rmi, 0 }, + { X86::LWPVAL64rri, X86::LWPVAL64rmi, 0 }, + // BMI/BMI2/LZCNT/POPCNT/TBM foldable instructions { X86::BEXTR32rr, X86::BEXTR32rm, 0 }, { X86::BEXTR64rr, X86::BEXTR64rm, 0 }, diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index ce0876498677..cdf7ce19cdc8 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -283,6 +283,11 @@ def X86SegAlloca : SDNode<"X86ISD::SEG_ALLOCA", SDT_X86SEG_ALLOCA, def X86TLSCall : SDNode<"X86ISD::TLSCALL", SDT_X86TLSCALL, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; +def X86lwpins : SDNode<"X86ISD::LWPINS", + SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisInt<1>, + SDTCisVT<2, i32>, SDTCisVT<3, i32>]>, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPSideEffect]>; + //===----------------------------------------------------------------------===// // X86 Operand Definitions. // @@ -836,6 +841,7 @@ def HasFMA : Predicate<"Subtarget->hasFMA()">; def HasFMA4 : Predicate<"Subtarget->hasFMA4()">; def HasXOP : Predicate<"Subtarget->hasXOP()">; def HasTBM : Predicate<"Subtarget->hasTBM()">; +def HasLWP : Predicate<"Subtarget->hasLWP()">; def HasMOVBE : Predicate<"Subtarget->hasMOVBE()">; def HasRDRAND : Predicate<"Subtarget->hasRDRAND()">; def HasF16C : Predicate<"Subtarget->hasF16C()">; @@ -2444,6 +2450,59 @@ defm TZMSK : tbm_binary_intr<0x01, "tzmsk", MRM4r, MRM4m>; } // HasTBM, EFLAGS //===----------------------------------------------------------------------===// +// Lightweight Profiling Instructions + +let Predicates = [HasLWP] in { + +def LLWPCB : I<0x12, MRM0r, (outs), (ins GR32:$src), "llwpcb\t$src", + [(int_x86_llwpcb GR32:$src)], IIC_LWP>, + XOP, XOP9, Requires<[Not64BitMode]>; +def SLWPCB : I<0x12, MRM1r, (outs GR32:$dst), (ins), "slwpcb\t$dst", + [(set GR32:$dst, (int_x86_slwpcb))], IIC_LWP>, + XOP, XOP9, Requires<[Not64BitMode]>; + +def LLWPCB64 : I<0x12, MRM0r, (outs), (ins GR64:$src), "llwpcb\t$src", + [(int_x86_llwpcb GR64:$src)], IIC_LWP>, + XOP, XOP9, VEX_W, Requires<[In64BitMode]>; +def SLWPCB64 : I<0x12, MRM1r, (outs GR64:$dst), (ins), "slwpcb\t$dst", + [(set GR64:$dst, (int_x86_slwpcb))], IIC_LWP>, + XOP, XOP9, VEX_W, Requires<[In64BitMode]>; + +multiclass lwpins_intr<RegisterClass RC> { + def rri : Ii32<0x12, MRM0r, (outs), (ins RC:$src0, GR32:$src1, i32imm:$cntl), + "lwpins\t{$cntl, $src1, $src0|$src0, $src1, $cntl}", + [(set EFLAGS, (X86lwpins RC:$src0, GR32:$src1, imm:$cntl))]>, + XOP_4V, XOPA; + let mayLoad = 1 in + def rmi : Ii32<0x12, MRM0m, (outs), (ins RC:$src0, i32mem:$src1, i32imm:$cntl), + "lwpins\t{$cntl, $src1, $src0|$src0, $src1, $cntl}", + [(set EFLAGS, (X86lwpins RC:$src0, (loadi32 addr:$src1), imm:$cntl))]>, + XOP_4V, XOPA; +} + +let Defs = [EFLAGS] in { + defm LWPINS32 : lwpins_intr<GR32>; + defm LWPINS64 : lwpins_intr<GR64>, VEX_W; +} // EFLAGS + +multiclass lwpval_intr<RegisterClass RC, Intrinsic Int> { + def rri : Ii32<0x12, MRM1r, (outs), (ins RC:$src0, GR32:$src1, i32imm:$cntl), + "lwpval\t{$cntl, $src1, $src0|$src0, $src1, $cntl}", + [(Int RC:$src0, GR32:$src1, imm:$cntl)], IIC_LWP>, + XOP_4V, XOPA; + let mayLoad = 1 in + def rmi : Ii32<0x12, MRM1m, (outs), (ins RC:$src0, i32mem:$src1, i32imm:$cntl), + "lwpval\t{$cntl, $src1, $src0|$src0, $src1, $cntl}", + [(Int RC:$src0, (loadi32 addr:$src1), imm:$cntl)], IIC_LWP>, + XOP_4V, XOPA; +} + +defm LWPVAL32 : lwpval_intr<GR32, int_x86_lwpval32>; +defm LWPVAL64 : lwpval_intr<GR64, int_x86_lwpval64>, VEX_W; + +} // HasLWP + +//===----------------------------------------------------------------------===// // MONITORX/MWAITX Instructions // let SchedRW = [ WriteSystem ] in { diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 1f16f3c9a14d..cf2ceef8013a 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -276,7 +276,14 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { bool HasAVX512 = Subtarget.hasAVX512(); bool CallsEHReturn = MF->callsEHReturn(); - switch (MF->getFunction()->getCallingConv()) { + CallingConv::ID CC = MF->getFunction()->getCallingConv(); + + // If attribute NoCallerSavedRegisters exists then we set X86_INTR calling + // convention because it has the CSR list. + if (MF->getFunction()->hasFnAttribute("no_caller_saved_registers")) + CC = CallingConv::X86_INTR; + + switch (CC) { case CallingConv::GHC: case CallingConv::HiPE: return CSR_NoRegs_SaveList; diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td index 7f7efd7cad3f..4eae6ca7abe3 100644 --- a/lib/Target/X86/X86Schedule.td +++ b/lib/Target/X86/X86Schedule.td @@ -497,6 +497,7 @@ def IIC_IN_RI : InstrItinClass; def IIC_OUT_RR : InstrItinClass; def IIC_OUT_IR : InstrItinClass; def IIC_INS : InstrItinClass; +def IIC_LWP : InstrItinClass; def IIC_MOV_REG_DR : InstrItinClass; def IIC_MOV_DR_REG : InstrItinClass; def IIC_MOV_REG_CR : InstrItinClass; diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 82ff436f7ebf..9ab751e2b002 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -265,6 +265,7 @@ void X86Subtarget::initializeEnvironment() { HasFMA4 = false; HasXOP = false; HasTBM = false; + HasLWP = false; HasMOVBE = false; HasRDRAND = false; HasF16C = false; diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 8568cf04e7d2..de1514243aeb 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -124,6 +124,9 @@ protected: /// Target has TBM instructions. bool HasTBM; + /// Target has LWP instructions + bool HasLWP; + /// True if the processor has the MOVBE instruction. bool HasMOVBE; @@ -447,6 +450,7 @@ public: bool hasAnyFMA() const { return hasFMA() || hasFMA4(); } bool hasXOP() const { return HasXOP; } bool hasTBM() const { return HasTBM; } + bool hasLWP() const { return HasLWP; } bool hasMOVBE() const { return HasMOVBE; } bool hasRDRAND() const { return HasRDRAND; } bool hasF16C() const { return HasF16C; } diff --git a/lib/Target/X86/X86WinEHState.cpp b/lib/Target/X86/X86WinEHState.cpp index bc14630584e5..500b26b3be17 100644 --- a/lib/Target/X86/X86WinEHState.cpp +++ b/lib/Target/X86/X86WinEHState.cpp @@ -412,7 +412,7 @@ Function *WinEHStatePass::generateLSDAInEAXThunk(Function *ParentFunc) { // Can't use musttail due to prototype mismatch, but we can use tail. Call->setTailCall(true); // Set inreg so we pass it in EAX. - Call->addAttribute(1, Attribute::InReg); + Call->addParamAttr(0, Attribute::InReg); Builder.CreateRet(Call); return Trampoline; } diff --git a/lib/Transforms/Coroutines/CoroSplit.cpp b/lib/Transforms/Coroutines/CoroSplit.cpp index ab648f884c5b..12eb16789825 100644 --- a/lib/Transforms/Coroutines/CoroSplit.cpp +++ b/lib/Transforms/Coroutines/CoroSplit.cpp @@ -216,8 +216,8 @@ static Function *createClone(Function &F, Twine Suffix, coro::Shape &Shape, Function *NewF = Function::Create(FnTy, GlobalValue::LinkageTypes::InternalLinkage, F.getName() + Suffix, M); - NewF->addAttribute(1, Attribute::NonNull); - NewF->addAttribute(1, Attribute::NoAlias); + NewF->addParamAttr(0, Attribute::NonNull); + NewF->addParamAttr(0, Attribute::NoAlias); ValueToValueMapTy VMap; // Replace all args with undefs. The buildCoroutineFrame algorithm already @@ -245,9 +245,7 @@ static Function *createClone(Function &F, Twine Suffix, coro::Shape &Shape, // Remove old return attributes. NewF->removeAttributes( AttributeList::ReturnIndex, - AttributeList::get( - NewF->getContext(), AttributeList::ReturnIndex, - AttributeFuncs::typeIncompatible(NewF->getReturnType()))); + AttributeFuncs::typeIncompatible(NewF->getReturnType())); // Make AllocaSpillBlock the new entry block. auto *SwitchBB = cast<BasicBlock>(VMap[ResumeEntry]); diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp index 25db0eff8848..6408cad08d55 100644 --- a/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -839,12 +839,12 @@ promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter, // avoiding a register copy. if (PtrArg->hasStructRetAttr()) { unsigned ArgNo = PtrArg->getArgNo(); - F->removeAttribute(ArgNo + 1, Attribute::StructRet); - F->addAttribute(ArgNo + 1, Attribute::NoAlias); + F->removeParamAttr(ArgNo, Attribute::StructRet); + F->addParamAttr(ArgNo, Attribute::NoAlias); for (Use &U : F->uses()) { CallSite CS(U.getUser()); - CS.removeAttribute(ArgNo + 1, Attribute::StructRet); - CS.addAttribute(ArgNo + 1, Attribute::NoAlias); + CS.removeParamAttr(ArgNo, Attribute::StructRet); + CS.addParamAttr(ArgNo, Attribute::NoAlias); } } diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp index 031c3d8a9ebe..28cc81c76d4f 100644 --- a/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/lib/Transforms/IPO/FunctionAttrs.cpp @@ -835,7 +835,7 @@ static bool addNoAliasAttrs(const SCCNodeSet &SCCNodes) { // pointers. for (Function *F : SCCNodes) { // Already noalias. - if (F->doesNotAlias(0)) + if (F->returnDoesNotAlias()) continue; // We can infer and propagate function attributes only when we know that the @@ -855,11 +855,11 @@ static bool addNoAliasAttrs(const SCCNodeSet &SCCNodes) { bool MadeChange = false; for (Function *F : SCCNodes) { - if (F->doesNotAlias(AttributeList::ReturnIndex) || + if (F->returnDoesNotAlias() || !F->getReturnType()->isPointerTy()) continue; - F->setDoesNotAlias(AttributeList::ReturnIndex); + F->setReturnDoesNotAlias(); ++NumNoAlias; MadeChange = true; } diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp index 1bb9d654ec1c..2db47b3b5622 100644 --- a/lib/Transforms/IPO/PartialInlining.cpp +++ b/lib/Transforms/IPO/PartialInlining.cpp @@ -337,6 +337,16 @@ Function *PartialInlinerImpl::unswitchFunction(Function *F) { if (F->hasAddressTaken()) return nullptr; + // Let inliner handle it + if (F->hasFnAttribute(Attribute::AlwaysInline)) + return nullptr; + + if (F->hasFnAttribute(Attribute::NoInline)) + return nullptr; + + if (PSI->isFunctionEntryCold(F)) + return nullptr; + std::unique_ptr<FunctionOutliningInfo> OutliningInfo = computeOutliningInfo(F); diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index e9286b1bf175..4fd90d78a63b 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -3845,7 +3845,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) { if (V->getType()->isPointerTy() && !CS.paramHasAttr(ArgNo, Attribute::NonNull) && isKnownNonNullAt(V, CS.getInstruction(), &DT)) - Indices.push_back(ArgNo + 1); + Indices.push_back(ArgNo + AttributeList::FirstArgIndex); ArgNo++; } diff --git a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp index 4e454f0c95b6..8786781933ea 100644 --- a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp +++ b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp @@ -254,7 +254,7 @@ class DataFlowSanitizer : public ModulePass { MDNode *ColdCallWeights; DFSanABIList ABIList; DenseMap<Value *, Function *> UnwrappedFnMap; - AttributeList ReadOnlyNoneAttrs; + AttrBuilder ReadOnlyNoneAttrs; bool DFSanRuntimeShadowMask; Value *getShadowAddress(Value *Addr, Instruction *Pos); @@ -544,16 +544,12 @@ DataFlowSanitizer::buildWrapperFunction(Function *F, StringRef NewFName, NewF->copyAttributesFrom(F); NewF->removeAttributes( AttributeList::ReturnIndex, - AttributeList::get( - F->getContext(), AttributeList::ReturnIndex, - AttributeFuncs::typeIncompatible(NewFT->getReturnType()))); + AttributeFuncs::typeIncompatible(NewFT->getReturnType())); BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", NewF); if (F->isVarArg()) { - NewF->removeAttributes( - AttributeList::FunctionIndex, - AttributeList().addAttribute(*Ctx, AttributeList::FunctionIndex, - "split-stack")); + NewF->removeAttributes(AttributeList::FunctionIndex, + AttrBuilder().addAttribute("split-stack")); CallInst::Create(DFSanVarargWrapperFn, IRBuilder<>(BB).CreateGlobalStringPtr(F->getName()), "", BB); @@ -629,16 +625,16 @@ bool DataFlowSanitizer::runOnModule(Module &M) { F->addAttribute(AttributeList::FunctionIndex, Attribute::NoUnwind); F->addAttribute(AttributeList::FunctionIndex, Attribute::ReadNone); F->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt); - F->addAttribute(1, Attribute::ZExt); - F->addAttribute(2, Attribute::ZExt); + F->addParamAttr(0, Attribute::ZExt); + F->addParamAttr(1, Attribute::ZExt); } DFSanCheckedUnionFn = Mod->getOrInsertFunction("dfsan_union", DFSanUnionFnTy); if (Function *F = dyn_cast<Function>(DFSanCheckedUnionFn)) { F->addAttribute(AttributeList::FunctionIndex, Attribute::NoUnwind); F->addAttribute(AttributeList::FunctionIndex, Attribute::ReadNone); F->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt); - F->addAttribute(1, Attribute::ZExt); - F->addAttribute(2, Attribute::ZExt); + F->addParamAttr(0, Attribute::ZExt); + F->addParamAttr(1, Attribute::ZExt); } DFSanUnionLoadFn = Mod->getOrInsertFunction("__dfsan_union_load", DFSanUnionLoadFnTy); @@ -652,7 +648,7 @@ bool DataFlowSanitizer::runOnModule(Module &M) { DFSanSetLabelFn = Mod->getOrInsertFunction("__dfsan_set_label", DFSanSetLabelFnTy); if (Function *F = dyn_cast<Function>(DFSanSetLabelFn)) { - F->addAttribute(1, Attribute::ZExt); + F->addParamAttr(0, Attribute::ZExt); } DFSanNonzeroLabelFn = Mod->getOrInsertFunction("__dfsan_nonzero_label", DFSanNonzeroLabelFnTy); @@ -698,9 +694,8 @@ bool DataFlowSanitizer::runOnModule(Module &M) { } } - AttrBuilder B; - B.addAttribute(Attribute::ReadOnly).addAttribute(Attribute::ReadNone); - ReadOnlyNoneAttrs = AttributeList::get(*Ctx, AttributeList::FunctionIndex, B); + ReadOnlyNoneAttrs.addAttribute(Attribute::ReadOnly) + .addAttribute(Attribute::ReadNone); // First, change the ABI of every function in the module. ABI-listed // functions keep their original ABI and get a wrapper function. @@ -722,9 +717,7 @@ bool DataFlowSanitizer::runOnModule(Module &M) { NewF->copyAttributesFrom(&F); NewF->removeAttributes( AttributeList::ReturnIndex, - AttributeList::get( - NewF->getContext(), AttributeList::ReturnIndex, - AttributeFuncs::typeIncompatible(NewFT->getReturnType()))); + AttributeFuncs::typeIncompatible(NewFT->getReturnType())); for (Function::arg_iterator FArg = F.arg_begin(), NewFArg = NewF->arg_begin(), FArgEnd = F.arg_end(); @@ -989,8 +982,8 @@ Value *DFSanFunction::combineShadows(Value *V1, Value *V2, Instruction *Pos) { if (AvoidNewBlocks) { CallInst *Call = IRB.CreateCall(DFS.DFSanCheckedUnionFn, {V1, V2}); Call->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt); - Call->addAttribute(1, Attribute::ZExt); - Call->addAttribute(2, Attribute::ZExt); + Call->addParamAttr(0, Attribute::ZExt); + Call->addParamAttr(1, Attribute::ZExt); CCS.Block = Pos->getParent(); CCS.Shadow = Call; @@ -1002,8 +995,8 @@ Value *DFSanFunction::combineShadows(Value *V1, Value *V2, Instruction *Pos) { IRBuilder<> ThenIRB(BI); CallInst *Call = ThenIRB.CreateCall(DFS.DFSanUnionFn, {V1, V2}); Call->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt); - Call->addAttribute(1, Attribute::ZExt); - Call->addAttribute(2, Attribute::ZExt); + Call->addParamAttr(0, Attribute::ZExt); + Call->addParamAttr(1, Attribute::ZExt); BasicBlock *Tail = BI->getSuccessor(0); PHINode *Phi = PHINode::Create(DFS.ShadowTy, 2, "", &Tail->front()); diff --git a/lib/Transforms/Instrumentation/InstrProfiling.cpp b/lib/Transforms/Instrumentation/InstrProfiling.cpp index d91ac6ac7883..9a82532d7703 100644 --- a/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -241,7 +241,7 @@ static Constant *getOrInsertValueProfilingCall(Module &M, if (Function *FunRes = dyn_cast<Function>(Res)) { if (auto AK = TLI.getExtAttrForI32Param(false)) - FunRes->addAttribute(3, AK); + FunRes->addParamAttr(2, AK); } return Res; } @@ -292,7 +292,7 @@ void InstrProfiling::lowerValueProfileInst(InstrProfValueProfileInst *Ind) { Builder.CreateCall(getOrInsertValueProfilingCall(*M, *TLI, true), Args); } if (auto AK = TLI->getExtAttrForI32Param(false)) - Call->addAttribute(3, AK); + Call->addParamAttr(2, AK); Ind->replaceAllUsesWith(Call); Ind->eraseFromParent(); } diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 3e480a6df446..15333a5317dd 100644 --- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -2607,10 +2607,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { AttrBuilder B; B.addAttribute(Attribute::ReadOnly) .addAttribute(Attribute::ReadNone); - Func->removeAttributes(AttributeList::FunctionIndex, - AttributeList::get(Func->getContext(), - AttributeList::FunctionIndex, - B)); + Func->removeAttributes(AttributeList::FunctionIndex, B); } maybeMarkSanitizerLibraryCallNoBuiltin(Call, TLI); @@ -3659,9 +3656,7 @@ bool MemorySanitizer::runOnFunction(Function &F) { AttrBuilder B; B.addAttribute(Attribute::ReadOnly) .addAttribute(Attribute::ReadNone); - F.removeAttributes( - AttributeList::FunctionIndex, - AttributeList::get(F.getContext(), AttributeList::FunctionIndex, B)); + F.removeAttributes(AttributeList::FunctionIndex, B); return Visitor.runOnFunction(); } diff --git a/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h b/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h index c541fa4c8bee..cb3b5757f8d0 100644 --- a/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h +++ b/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h @@ -163,7 +163,7 @@ private: AttributeList Attr = AttributeList().addAttribute( C, AttributeList::FunctionIndex, Attribute::NoUnwind); - Attr = Attr.addAttribute(C, 1, Attribute::NoCapture); + Attr = Attr.addParamAttribute(C, 0, Attribute::NoCapture); FunctionType *Fty = FunctionType::get(Type::getVoidTy(C), Params, /*isVarArg=*/false); diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp index dc864f48bf1f..3f1a77b49a44 100644 --- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -318,7 +318,7 @@ static bool processCallSite(CallSite CS, LazyValueInfo *LVI) { LVI->getPredicateAt(ICmpInst::ICMP_EQ, V, ConstantPointerNull::get(Type), CS.getInstruction()) == LazyValueInfo::False) - Indices.push_back(ArgNo + 1); + Indices.push_back(ArgNo + AttributeList::FirstArgIndex); ArgNo++; } diff --git a/lib/Transforms/Scalar/GuardWidening.cpp b/lib/Transforms/Scalar/GuardWidening.cpp index 48eda09c463e..198d2b2b024f 100644 --- a/lib/Transforms/Scalar/GuardWidening.cpp +++ b/lib/Transforms/Scalar/GuardWidening.cpp @@ -613,16 +613,16 @@ bool GuardWideningImpl::combineRangeChecks( // We have a series of f+1 checks as: // // I+k_0 u< L ... Chk_0 - // I_k_1 u< L ... Chk_1 + // I+k_1 u< L ... Chk_1 // ... - // I_k_f u< L ... Chk_(f+1) + // I+k_f u< L ... Chk_f // - // with forall i in [0,f): k_f-k_i u< k_f-k_0 ... Precond_0 + // with forall i in [0,f]: k_f-k_i u< k_f-k_0 ... Precond_0 // k_f-k_0 u< INT_MIN+k_f ... Precond_1 // k_f != k_0 ... Precond_2 // // Claim: - // Chk_0 AND Chk_(f+1) implies all the other checks + // Chk_0 AND Chk_f implies all the other checks // // Informal proof sketch: // diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp index 73e8ce0e1d93..3151ccd279c4 100644 --- a/lib/Transforms/Scalar/LoopDeletion.cpp +++ b/lib/Transforms/Scalar/LoopDeletion.cpp @@ -20,6 +20,7 @@ #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/LoopPassManager.h" #include "llvm/Transforms/Utils/LoopUtils.h" @@ -29,6 +30,21 @@ using namespace llvm; STATISTIC(NumDeleted, "Number of loops deleted"); +/// This function deletes dead loops. The caller of this function needs to +/// guarantee that the loop is infact dead. Here we handle two kinds of dead +/// loop. The first kind (\p isLoopDead) is where only invariant values from +/// within the loop are used outside of it. The second kind (\p +/// isLoopNeverExecuted) is where the loop is provably never executed. We can +/// always remove never executed loops since they will not cause any +/// difference to program behaviour. +/// +/// This also updates the relevant analysis information in \p DT, \p SE, and \p +/// LI. It also updates the loop PM if an updater struct is provided. +// TODO: This function will be used by loop-simplifyCFG as well. So, move this +// to LoopUtils.cpp +static void deleteDeadLoop(Loop *L, DominatorTree &DT, ScalarEvolution &SE, + LoopInfo &LI, bool LoopIsNeverExecuted, + LPMUpdater *Updater = nullptr); /// Determines if a loop is dead. /// /// This assumes that we've already checked for unique exit and exiting blocks, @@ -84,12 +100,44 @@ static bool isLoopDead(Loop *L, ScalarEvolution &SE, return true; } +/// This function returns true if there is no viable path from the +/// entry block to the header of \p L. Right now, it only does +/// a local search to save compile time. +static bool isLoopNeverExecuted(Loop *L) { + using namespace PatternMatch; + + auto *Preheader = L->getLoopPreheader(); + // TODO: We can relax this constraint, since we just need a loop + // predecessor. + assert(Preheader && "Needs preheader!"); + + if (Preheader == &Preheader->getParent()->getEntryBlock()) + return false; + // All predecessors of the preheader should have a constant conditional + // branch, with the loop's preheader as not-taken. + for (auto *Pred: predecessors(Preheader)) { + BasicBlock *Taken, *NotTaken; + ConstantInt *Cond; + if (!match(Pred->getTerminator(), + m_Br(m_ConstantInt(Cond), Taken, NotTaken))) + return false; + if (!Cond->getZExtValue()) + std::swap(Taken, NotTaken); + if (Taken == Preheader) + return false; + } + assert(!pred_empty(Preheader) && + "Preheader should have predecessors at this point!"); + // All the predecessors have the loop preheader as not-taken target. + return true; +} + /// Remove a loop if it is dead. /// /// A loop is considered dead if it does not impact the observable behavior of /// the program other than finite running time. This never removes a loop that -/// might be infinite, as doing so could change the halting/non-halting nature -/// of a program. +/// might be infinite (unless it is never executed), as doing so could change +/// the halting/non-halting nature of a program. /// /// This entire process relies pretty heavily on LoopSimplify form and LCSSA in /// order to make various safety checks work. @@ -97,9 +145,6 @@ static bool isLoopDead(Loop *L, ScalarEvolution &SE, /// \returns true if any changes were made. This may mutate the loop even if it /// is unable to delete it due to hoisting trivially loop invariant /// instructions out of the loop. -/// -/// This also updates the relevant analysis information in \p DT, \p SE, and \p -/// LI. It also updates the loop PM if an updater struct is provided. static bool deleteLoopIfDead(Loop *L, DominatorTree &DT, ScalarEvolution &SE, LoopInfo &LI, LPMUpdater *Updater = nullptr) { assert(L->isLCSSAForm(DT) && "Expected LCSSA!"); @@ -119,6 +164,17 @@ static bool deleteLoopIfDead(Loop *L, DominatorTree &DT, ScalarEvolution &SE, if (L->begin() != L->end()) return false; + + BasicBlock *ExitBlock = L->getUniqueExitBlock(); + + if (ExitBlock && isLoopNeverExecuted(L)) { + deleteDeadLoop(L, DT, SE, LI, true /* LoopIsNeverExecuted */, Updater); + ++NumDeleted; + return true; + } + + // The remaining checks below are for a loop being dead because all statements + // in the loop are invariant. SmallVector<BasicBlock *, 4> ExitingBlocks; L->getExitingBlocks(ExitingBlocks); @@ -126,7 +182,6 @@ static bool deleteLoopIfDead(Loop *L, DominatorTree &DT, ScalarEvolution &SE, // be in the situation of needing to be able to solve statically which exit // block will be branched to, or trying to preserve the branching logic in // a loop invariant manner. - BasicBlock *ExitBlock = L->getUniqueExitBlock(); if (!ExitBlock) return false; @@ -141,6 +196,19 @@ static bool deleteLoopIfDead(Loop *L, DominatorTree &DT, ScalarEvolution &SE, if (isa<SCEVCouldNotCompute>(S)) return Changed; + deleteDeadLoop(L, DT, SE, LI, false /* LoopIsNeverExecuted */, Updater); + ++NumDeleted; + + return true; +} + +static void deleteDeadLoop(Loop *L, DominatorTree &DT, ScalarEvolution &SE, + LoopInfo &LI, bool LoopIsNeverExecuted, + LPMUpdater *Updater) { + assert(L->isLCSSAForm(DT) && "Expected LCSSA!"); + auto *Preheader = L->getLoopPreheader(); + assert(Preheader && "Preheader should exist!"); + // Now that we know the removal is safe, remove the loop by changing the // branch from the preheader to go to the single exit block. // @@ -156,17 +224,29 @@ static bool deleteLoopIfDead(Loop *L, DominatorTree &DT, ScalarEvolution &SE, // to determine what it needs to clean up. SE.forgetLoop(L); + auto *ExitBlock = L->getUniqueExitBlock(); + assert(ExitBlock && "Should have a unique exit block!"); + // Connect the preheader directly to the exit block. - TerminatorInst *TI = Preheader->getTerminator(); - TI->replaceUsesOfWith(L->getHeader(), ExitBlock); + // Even when the loop is never executed, we cannot remove the edge from the + // source block to the exit block. Consider the case where the unexecuted loop + // branches back to an outer loop. If we deleted the loop and removed the edge + // coming to this inner loop, this will break the outer loop structure (by + // deleting the backedge of the outer loop). If the outer loop is indeed a + // non-loop, it will be deleted in a future iteration of loop deletion pass. + Preheader->getTerminator()->replaceUsesOfWith(L->getHeader(), ExitBlock); - // Rewrite phis in the exit block to get their inputs from - // the preheader instead of the exiting block. + SmallVector<BasicBlock *, 4> ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + // Rewrite phis in the exit block to get their inputs from the Preheader + // instead of the exiting block. BasicBlock *ExitingBlock = ExitingBlocks[0]; BasicBlock::iterator BI = ExitBlock->begin(); while (PHINode *P = dyn_cast<PHINode>(BI)) { int j = P->getBasicBlockIndex(ExitingBlock); assert(j >= 0 && "Can't find exiting block in exit block's phi node!"); + if (LoopIsNeverExecuted) + P->setIncomingValue(j, UndefValue::get(P->getType())); P->setIncomingBlock(j, Preheader); for (unsigned i = 1; i < ExitingBlocks.size(); ++i) P->removeIncomingValue(ExitingBlocks[i]); @@ -211,9 +291,6 @@ static bool deleteLoopIfDead(Loop *L, DominatorTree &DT, ScalarEvolution &SE, // The last step is to update LoopInfo now that we've eliminated this loop. LI.markAsRemoved(L); - ++NumDeleted; - - return true; } PreservedAnalyses LoopDeletionPass::run(Loop &L, LoopAnalysisManager &AM, @@ -254,7 +331,6 @@ Pass *llvm::createLoopDeletionPass() { return new LoopDeletionLegacyPass(); } bool LoopDeletionLegacyPass::runOnLoop(Loop *L, LPPassManager &) { if (skipLoop(L)) return false; - DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE(); LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); diff --git a/lib/Transforms/Scalar/NewGVN.cpp b/lib/Transforms/Scalar/NewGVN.cpp index 162d91beae76..62b5d80d611b 100644 --- a/lib/Transforms/Scalar/NewGVN.cpp +++ b/lib/Transforms/Scalar/NewGVN.cpp @@ -1440,18 +1440,15 @@ const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I) { // True if one of the incoming phi edges is a backedge. bool HasBackedge = false; // All constant tracks the state of whether all the *original* phi operands - // were constant. - // This is really shorthand for "this phi cannot cycle due to forward - // propagation", as any - // change in value of the phi is guaranteed not to later change the value of - // the phi. + // were constant. This is really shorthand for "this phi cannot cycle due + // to forward propagation", as any change in value of the phi is guaranteed + // not to later change the value of the phi. // IE it can't be v = phi(undef, v+1) bool AllConstant = true; auto *E = cast<PHIExpression>(createPHIExpression(I, HasBackedge, AllConstant)); // We match the semantics of SimplifyPhiNode from InstructionSimplify here. - - // See if all arguaments are the same. + // See if all arguments are the same. // We track if any were undef because they need special handling. bool HasUndef = false; auto Filtered = make_filter_range(E->operands(), [&](const Value *Arg) { diff --git a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp index c11247c06b85..77b2bd84f9b6 100644 --- a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -2286,12 +2286,11 @@ static void RemoveNonValidAttrAtIndex(LLVMContext &Ctx, AttrHolder &AH, if (AH.getDereferenceableOrNullBytes(Index)) R.addAttribute(Attribute::get(Ctx, Attribute::DereferenceableOrNull, AH.getDereferenceableOrNullBytes(Index))); - if (AH.doesNotAlias(Index)) + if (AH.getAttributes().hasAttribute(Index, Attribute::NoAlias)) R.addAttribute(Attribute::NoAlias); if (!R.empty()) - AH.setAttributes(AH.getAttributes().removeAttributes( - Ctx, Index, AttributeList::get(Ctx, Index, R))); + AH.setAttributes(AH.getAttributes().removeAttributes(Ctx, Index, R)); } void @@ -2300,7 +2299,8 @@ RewriteStatepointsForGC::stripNonValidAttributesFromPrototype(Function &F) { for (Argument &A : F.args()) if (isa<PointerType>(A.getType())) - RemoveNonValidAttrAtIndex(Ctx, F, A.getArgNo() + 1); + RemoveNonValidAttrAtIndex(Ctx, F, + A.getArgNo() + AttributeList::FirstArgIndex); if (isa<PointerType>(F.getReturnType())) RemoveNonValidAttrAtIndex(Ctx, F, AttributeList::ReturnIndex); @@ -2336,7 +2336,7 @@ void RewriteStatepointsForGC::stripNonValidAttributesFromBody(Function &F) { if (CallSite CS = CallSite(&I)) { for (int i = 0, e = CS.arg_size(); i != e; i++) if (isa<PointerType>(CS.getArgument(i)->getType())) - RemoveNonValidAttrAtIndex(Ctx, CS, i + 1); + RemoveNonValidAttrAtIndex(Ctx, CS, i + AttributeList::FirstArgIndex); if (isa<PointerType>(CS.getType())) RemoveNonValidAttrAtIndex(Ctx, CS, AttributeList::ReturnIndex); } diff --git a/lib/Transforms/Scalar/SpeculativeExecution.cpp b/lib/Transforms/Scalar/SpeculativeExecution.cpp index a7c308b59877..a0fc966cee2c 100644 --- a/lib/Transforms/Scalar/SpeculativeExecution.cpp +++ b/lib/Transforms/Scalar/SpeculativeExecution.cpp @@ -208,47 +208,6 @@ bool SpeculativeExecutionPass::runOnBasicBlock(BasicBlock &B) { return false; } -static unsigned ComputeSpeculationCost(const Instruction *I, - const TargetTransformInfo &TTI) { - switch (Operator::getOpcode(I)) { - case Instruction::GetElementPtr: - case Instruction::Add: - case Instruction::Mul: - case Instruction::And: - case Instruction::Or: - case Instruction::Select: - case Instruction::Shl: - case Instruction::Sub: - case Instruction::LShr: - case Instruction::AShr: - case Instruction::Xor: - case Instruction::ZExt: - case Instruction::SExt: - case Instruction::Call: - case Instruction::BitCast: - case Instruction::PtrToInt: - case Instruction::IntToPtr: - case Instruction::AddrSpaceCast: - case Instruction::FPToUI: - case Instruction::FPToSI: - case Instruction::UIToFP: - case Instruction::SIToFP: - case Instruction::FPExt: - case Instruction::FPTrunc: - case Instruction::FAdd: - case Instruction::FSub: - case Instruction::FMul: - case Instruction::FDiv: - case Instruction::FRem: - case Instruction::ICmp: - case Instruction::FCmp: - return TTI.getUserCost(I); - - default: - return UINT_MAX; // Disallow anything not whitelisted. - } -} - bool SpeculativeExecutionPass::considerHoistingFromTo( BasicBlock &FromBlock, BasicBlock &ToBlock) { SmallSet<const Instruction *, 8> NotHoisted; @@ -264,7 +223,7 @@ bool SpeculativeExecutionPass::considerHoistingFromTo( unsigned TotalSpeculationCost = 0; for (auto& I : FromBlock) { - const unsigned Cost = ComputeSpeculationCost(&I, *TTI); + const unsigned Cost = TTI->getUserCost(&I); if (Cost != UINT_MAX && isSafeToSpeculativelyExecute(&I) && AllPrecedingUsesFromBlockHoisted(&I)) { TotalSpeculationCost += Cost; diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp index 6cd9f1614991..1956697ccb8b 100644 --- a/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/lib/Transforms/Utils/BuildLibCalls.cpp @@ -58,7 +58,7 @@ static bool setOnlyReadsMemory(Function &F) { static bool setOnlyAccessesArgMemory(Function &F) { if (F.onlyAccessesArgMemory()) return false; - F.setOnlyAccessesArgMemory (); + F.setOnlyAccessesArgMemory(); ++NumArgMemOnly; return true; } @@ -71,37 +71,36 @@ static bool setDoesNotThrow(Function &F) { return true; } -static bool setDoesNotCapture(Function &F, unsigned n) { - if (F.doesNotCapture(n)) +static bool setRetDoesNotAlias(Function &F) { + if (F.hasAttribute(AttributeList::ReturnIndex, Attribute::NoAlias)) return false; - F.setDoesNotCapture(n); - ++NumNoCapture; + F.addAttribute(AttributeList::ReturnIndex, Attribute::NoAlias); + ++NumNoAlias; return true; } -static bool setOnlyReadsMemory(Function &F, unsigned n) { - if (F.onlyReadsMemory(n)) +static bool setDoesNotCapture(Function &F, unsigned ArgNo) { + if (F.hasParamAttribute(ArgNo, Attribute::NoCapture)) return false; - F.setOnlyReadsMemory(n); - ++NumReadOnlyArg; + F.addParamAttr(ArgNo, Attribute::NoCapture); + ++NumNoCapture; return true; } -static bool setDoesNotAlias(Function &F, unsigned n) { - if (F.doesNotAlias(n)) +static bool setOnlyReadsMemory(Function &F, unsigned ArgNo) { + if (F.hasParamAttribute(ArgNo, Attribute::ReadOnly)) return false; - F.setDoesNotAlias(n); - ++NumNoAlias; + F.addParamAttr(ArgNo, Attribute::ReadOnly); + ++NumReadOnlyArg; return true; } -static bool setNonNull(Function &F, unsigned n) { - assert( - (n != AttributeList::ReturnIndex || F.getReturnType()->isPointerTy()) && - "nonnull applies only to pointers"); - if (F.getAttributes().hasAttribute(n, Attribute::NonNull)) +static bool setRetNonNull(Function &F) { + assert(F.getReturnType()->isPointerTy() && + "nonnull applies only to pointers"); + if (F.hasAttribute(AttributeList::ReturnIndex, Attribute::NonNull)) return false; - F.addAttribute(n, Attribute::NonNull); + F.addAttribute(AttributeList::ReturnIndex, Attribute::NonNull); ++NumNonNull; return true; } @@ -116,7 +115,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { case LibFunc_strlen: Changed |= setOnlyReadsMemory(F); Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_strchr: case LibFunc_strrchr: @@ -131,8 +130,8 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { case LibFunc_strtold: case LibFunc_strtoull: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_strcpy: case LibFunc_stpcpy: @@ -141,14 +140,14 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { case LibFunc_strncpy: case LibFunc_stpncpy: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 2); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_strxfrm: Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 2); + Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_strcmp: // 0,1 case LibFunc_strspn: // 0,1 @@ -159,84 +158,84 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { case LibFunc_strncasecmp: // Changed |= setOnlyReadsMemory(F); Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); return Changed; case LibFunc_strstr: case LibFunc_strpbrk: Changed |= setOnlyReadsMemory(F); Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 2); + Changed |= setDoesNotCapture(F, 1); return Changed; case LibFunc_strtok: case LibFunc_strtok_r: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 2); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_scanf: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_setbuf: case LibFunc_setvbuf: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_strdup: case LibFunc_strndup: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotAlias(F, 0); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setRetDoesNotAlias(F); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_stat: case LibFunc_statvfs: Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_sscanf: Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 0); Changed |= setOnlyReadsMemory(F, 1); - Changed |= setOnlyReadsMemory(F, 2); return Changed; case LibFunc_sprintf: Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 2); + Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_snprintf: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 3); - Changed |= setOnlyReadsMemory(F, 3); + Changed |= setDoesNotCapture(F, 0); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 2); return Changed; case LibFunc_setitimer: Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); Changed |= setDoesNotCapture(F, 2); - Changed |= setDoesNotCapture(F, 3); - Changed |= setOnlyReadsMemory(F, 2); + Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_system: // May throw; "system" is a valid pthread cancellation point. - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_malloc: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotAlias(F, 0); + Changed |= setRetDoesNotAlias(F); return Changed; case LibFunc_memcmp: Changed |= setOnlyReadsMemory(F); Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); return Changed; case LibFunc_memchr: case LibFunc_memrchr: @@ -247,100 +246,100 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { case LibFunc_modff: case LibFunc_modfl: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 2); + Changed |= setDoesNotCapture(F, 1); return Changed; case LibFunc_memcpy: case LibFunc_mempcpy: case LibFunc_memccpy: case LibFunc_memmove: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 2); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_memcpy_chk: Changed |= setDoesNotThrow(F); return Changed; case LibFunc_memalign: - Changed |= setDoesNotAlias(F, 0); + Changed |= setRetDoesNotAlias(F); return Changed; case LibFunc_mkdir: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_mktime: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_realloc: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotAlias(F, 0); - Changed |= setDoesNotCapture(F, 1); + Changed |= setRetDoesNotAlias(F); + Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_read: // May throw; "read" is a valid pthread cancellation point. - Changed |= setDoesNotCapture(F, 2); + Changed |= setDoesNotCapture(F, 1); return Changed; case LibFunc_rewind: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_rmdir: case LibFunc_remove: case LibFunc_realpath: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_rename: Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 0); Changed |= setOnlyReadsMemory(F, 1); - Changed |= setOnlyReadsMemory(F, 2); return Changed; case LibFunc_readlink: Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_write: // May throw; "write" is a valid pthread cancellation point. - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 2); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_bcopy: Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_bcmp: Changed |= setDoesNotThrow(F); Changed |= setOnlyReadsMemory(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); return Changed; case LibFunc_bzero: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_calloc: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotAlias(F, 0); + Changed |= setRetDoesNotAlias(F); return Changed; case LibFunc_chmod: case LibFunc_chown: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_ctermid: case LibFunc_clearerr: case LibFunc_closedir: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_atoi: case LibFunc_atol: @@ -348,26 +347,26 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { case LibFunc_atoll: Changed |= setDoesNotThrow(F); Changed |= setOnlyReadsMemory(F); - Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_access: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_fopen: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotAlias(F, 0); + Changed |= setRetDoesNotAlias(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 0); Changed |= setOnlyReadsMemory(F, 1); - Changed |= setOnlyReadsMemory(F, 2); return Changed; case LibFunc_fdopen: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotAlias(F, 0); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 2); + Changed |= setRetDoesNotAlias(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_feof: case LibFunc_free: @@ -384,11 +383,11 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { case LibFunc_funlockfile: case LibFunc_ftrylockfile: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_ferror: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F); return Changed; case LibFunc_fputc: @@ -398,51 +397,51 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { case LibFunc_frexpl: case LibFunc_fstatvfs: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 2); + Changed |= setDoesNotCapture(F, 1); return Changed; case LibFunc_fgets: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 3); + Changed |= setDoesNotCapture(F, 2); return Changed; case LibFunc_fread: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 4); + Changed |= setDoesNotCapture(F, 0); + Changed |= setDoesNotCapture(F, 3); return Changed; case LibFunc_fwrite: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 4); + Changed |= setDoesNotCapture(F, 0); + Changed |= setDoesNotCapture(F, 3); // FIXME: readonly #1? return Changed; case LibFunc_fputs: Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_fscanf: case LibFunc_fprintf: Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 2); + Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_fgetpos: Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); return Changed; case LibFunc_getc: case LibFunc_getlogin_r: case LibFunc_getc_unlocked: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_getenv: Changed |= setDoesNotThrow(F); Changed |= setOnlyReadsMemory(F); - Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_gets: case LibFunc_getchar: @@ -450,132 +449,132 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { return Changed; case LibFunc_getitimer: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 2); + Changed |= setDoesNotCapture(F, 1); return Changed; case LibFunc_getpwnam: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_ungetc: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 2); + Changed |= setDoesNotCapture(F, 1); return Changed; case LibFunc_uname: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_unlink: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_unsetenv: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_utime: case LibFunc_utimes: Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 0); Changed |= setOnlyReadsMemory(F, 1); - Changed |= setOnlyReadsMemory(F, 2); return Changed; case LibFunc_putc: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 2); + Changed |= setDoesNotCapture(F, 1); return Changed; case LibFunc_puts: case LibFunc_printf: case LibFunc_perror: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_pread: // May throw; "pread" is a valid pthread cancellation point. - Changed |= setDoesNotCapture(F, 2); + Changed |= setDoesNotCapture(F, 1); return Changed; case LibFunc_pwrite: // May throw; "pwrite" is a valid pthread cancellation point. - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 2); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_putchar: Changed |= setDoesNotThrow(F); return Changed; case LibFunc_popen: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotAlias(F, 0); + Changed |= setRetDoesNotAlias(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 0); Changed |= setOnlyReadsMemory(F, 1); - Changed |= setOnlyReadsMemory(F, 2); return Changed; case LibFunc_pclose: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_vscanf: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_vsscanf: Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 0); Changed |= setOnlyReadsMemory(F, 1); - Changed |= setOnlyReadsMemory(F, 2); return Changed; case LibFunc_vfscanf: Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 2); + Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_valloc: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotAlias(F, 0); + Changed |= setRetDoesNotAlias(F); return Changed; case LibFunc_vprintf: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_vfprintf: case LibFunc_vsprintf: Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 2); + Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_vsnprintf: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 3); - Changed |= setOnlyReadsMemory(F, 3); + Changed |= setDoesNotCapture(F, 0); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 2); return Changed; case LibFunc_open: // May throw; "open" is a valid pthread cancellation point. - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_opendir: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotAlias(F, 0); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setRetDoesNotAlias(F); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_tmpfile: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotAlias(F, 0); + Changed |= setRetDoesNotAlias(F); return Changed; case LibFunc_times: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_htonl: case LibFunc_htons: @@ -586,93 +585,93 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { return Changed; case LibFunc_lstat: Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_lchown: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_qsort: // May throw; places call through function pointer. - Changed |= setDoesNotCapture(F, 4); + Changed |= setDoesNotCapture(F, 3); return Changed; case LibFunc_dunder_strdup: case LibFunc_dunder_strndup: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotAlias(F, 0); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setRetDoesNotAlias(F); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_dunder_strtok_r: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 2); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_under_IO_getc: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_under_IO_putc: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 2); + Changed |= setDoesNotCapture(F, 1); return Changed; case LibFunc_dunder_isoc99_scanf: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_stat64: case LibFunc_lstat64: case LibFunc_statvfs64: Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_dunder_isoc99_sscanf: Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 0); Changed |= setOnlyReadsMemory(F, 1); - Changed |= setOnlyReadsMemory(F, 2); return Changed; case LibFunc_fopen64: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotAlias(F, 0); + Changed |= setRetDoesNotAlias(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 0); Changed |= setOnlyReadsMemory(F, 1); - Changed |= setOnlyReadsMemory(F, 2); return Changed; case LibFunc_fseeko64: case LibFunc_ftello64: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_tmpfile64: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotAlias(F, 0); + Changed |= setRetDoesNotAlias(F); return Changed; case LibFunc_fstat64: case LibFunc_fstatvfs64: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 2); + Changed |= setDoesNotCapture(F, 1); return Changed; case LibFunc_open64: // May throw; "open" is a valid pthread cancellation point. - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_gettimeofday: // Currently some platforms have the restrict keyword on the arguments to // gettimeofday. To be conservative, do not add noalias to gettimeofday's // arguments. Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); return Changed; case LibFunc_Znwj: // new(unsigned int) case LibFunc_Znwm: // new(unsigned long) @@ -683,17 +682,17 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { case LibFunc_msvc_new_array_int: // new[](unsigned int) case LibFunc_msvc_new_array_longlong: // new[](unsigned long long) // Operator new always returns a nonnull noalias pointer - Changed |= setNonNull(F, AttributeList::ReturnIndex); - Changed |= setDoesNotAlias(F, AttributeList::ReturnIndex); + Changed |= setRetNonNull(F); + Changed |= setRetDoesNotAlias(F); return Changed; //TODO: add LibFunc entries for: //case LibFunc_memset_pattern4: //case LibFunc_memset_pattern8: case LibFunc_memset_pattern16: Changed |= setOnlyAccessesArgMemory(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 2); + Changed |= setOnlyReadsMemory(F, 1); return Changed; // int __nvvm_reflect(const char *) case LibFunc_nvvm_reflect: @@ -889,7 +888,13 @@ Value *llvm::emitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B, Value *Callee = M->getOrInsertFunction(Name, Op->getType(), Op->getType()); CallInst *CI = B.CreateCall(Callee, Op, Name); - CI->setAttributes(Attrs); + + // The incoming attribute set may have come from a speculatable intrinsic, but + // is being replaced with a library call which is not allowed to be + // speculatable. + CI->setAttributes(Attrs.removeAttribute(B.getContext(), + AttributeList::FunctionIndex, + Attribute::Speculatable)); if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts())) CI->setCallingConv(F->getCallingConv()); diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp index 85db734fb182..391fde3b0b01 100644 --- a/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -512,6 +512,16 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, BasicBlock *Latch = L->getLoopLatch(); + // Cloning the loop basic blocks (`CloneLoopBlocks`) requires that one of the + // targets of the Latch be the single exit block out of the loop. This needs + // to be guaranteed by the callers of UnrollRuntimeLoopRemainder. + BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator()); + assert( + (LatchBR->getSuccessor(0) == Exit || LatchBR->getSuccessor(1) == Exit) && + "one of the loop latch successors should be " + "the exit block!"); + // Avoid warning of unused `LatchBR` variable in release builds. + (void)LatchBR; // Loop structure is the following: // // PreHeader diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index 2c1c30463a23..9e71d746de34 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -537,7 +537,7 @@ Value *LibCallSimplifier::optimizeStrTo(CallInst *CI, IRBuilder<> &B) { if (isa<ConstantPointerNull>(EndPtr)) { // With a null EndPtr, this function won't capture the main argument. // It would be readonly too, except that it still may write to errno. - CI->addAttribute(1, Attribute::NoCapture); + CI->addParamAttr(0, Attribute::NoCapture); } return nullptr; diff --git a/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll b/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll index f2d3f3f0ce63..b673399e428f 100644 --- a/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll +++ b/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll @@ -78,10 +78,10 @@ define <2 x double> @test_gather_2f64(<2 x double*> %ptrs, <2 x i1> %mask, <2 x ; SKX-LABEL: test_gather_2f64 ; SKX: Found an estimated cost of 7 {{.*}}.gather -%res = call <2 x double> @llvm.masked.gather.v2f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) +%res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) ret <2 x double> %res } -declare <2 x double> @llvm.masked.gather.v2f64(<2 x double*> %ptrs, i32, <2 x i1> %mask, <2 x double> %src0) +declare <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32, <2 x i1> %mask, <2 x double> %src0) define <4 x i32> @test_gather_4i32(<4 x i32*> %ptrs, <4 x i1> %mask, <4 x i32> %src0) { @@ -94,7 +94,7 @@ define <4 x i32> @test_gather_4i32(<4 x i32*> %ptrs, <4 x i1> %mask, <4 x i32> % ; SKX-LABEL: test_gather_4i32 ; SKX: Found an estimated cost of 6 {{.*}}.gather -%res = call <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +%res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) ret <4 x i32> %res } @@ -109,10 +109,10 @@ define <4 x i32> @test_gather_4i32_const_mask(<4 x i32*> %ptrs, <4 x i32> %src0) ; SKX-LABEL: test_gather_4i32_const_mask ; SKX: Found an estimated cost of 6 {{.*}}.gather -%res = call <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %src0) +%res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %src0) ret <4 x i32> %res } -declare <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> %ptrs, i32, <4 x i1> %mask, <4 x i32> %src0) +declare <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32, <4 x i1> %mask, <4 x i32> %src0) define <16 x float> @test_gather_16f32_const_mask(float* %base, <16 x i32> %ind) { @@ -128,7 +128,7 @@ define <16 x float> @test_gather_16f32_const_mask(float* %base, <16 x i32> %ind) %sext_ind = sext <16 x i32> %ind to <16 x i64> %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind - %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef) + %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef) ret <16 x float>%res } @@ -146,7 +146,7 @@ define <16 x float> @test_gather_16f32_var_mask(float* %base, <16 x i32> %ind, < %sext_ind = sext <16 x i32> %ind to <16 x i64> %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind - %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) + %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ret <16 x float>%res } @@ -164,7 +164,7 @@ define <16 x float> @test_gather_16f32_ra_var_mask(<16 x float*> %ptrs, <16 x i3 %sext_ind = sext <16 x i32> %ind to <16 x i64> %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind - %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) + %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ret <16 x float>%res } @@ -185,7 +185,7 @@ define <16 x float> @test_gather_16f32_const_mask2(float* %base, <16 x i32> %ind %sext_ind = sext <16 x i32> %ind to <16 x i64> %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind - %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef) + %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef) ret <16 x float>%res } @@ -204,7 +204,7 @@ define void @test_scatter_16i32(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i3 %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind %imask = bitcast i16 %mask to <16 x i1> - call void @llvm.masked.scatter.v16i32(<16 x i32>%val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) + call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32>%val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) ret void } @@ -218,11 +218,11 @@ define void @test_scatter_8i32(<8 x i32>%a1, <8 x i32*> %ptr, <8 x i1>%mask) { ; SKX-LABEL: test_scatter_8i32 ; SKX: Found an estimated cost of 10 {{.*}}.scatter - call void @llvm.masked.scatter.v8i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask) + call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask) ret void } -declare void @llvm.masked.scatter.v8i32(<8 x i32> %a1, <8 x i32*> %ptr, i32, <8 x i1> %mask) +declare void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32, <8 x i1> %mask) define void @test_scatter_4i32(<4 x i32>%a1, <4 x i32*> %ptr, <4 x i1>%mask) { ; AVX2-LABEL: test_scatter_4i32 @@ -234,7 +234,7 @@ define void @test_scatter_4i32(<4 x i32>%a1, <4 x i32*> %ptr, <4 x i1>%mask) { ; SKX-LABEL: test_scatter_4i32 ; SKX: Found an estimated cost of 6 {{.*}}.scatter - call void @llvm.masked.scatter.v4i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask) + call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask) ret void } @@ -252,7 +252,7 @@ define <4 x float> @test_gather_4f32(float* %ptr, <4 x i32> %ind, <4 x i1>%mask) %sext_ind = sext <4 x i32> %ind to <4 x i64> %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind - %res = call <4 x float> @llvm.masked.gather.v4f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) + %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) ret <4 x float>%res } @@ -270,14 +270,14 @@ define <4 x float> @test_gather_4f32_const_mask(float* %ptr, <4 x i32> %ind) { %sext_ind = sext <4 x i32> %ind to <4 x i64> %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind - %res = call <4 x float> @llvm.masked.gather.v4f32(<4 x float*> %gep.v, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef) + %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef) ret <4 x float>%res } -declare <4 x float> @llvm.masked.gather.v4f32(<4 x float*> %gep.v, i32, <4 x i1> %mask, <4 x float> ) -declare void @llvm.masked.scatter.v4i32(<4 x i32> %a1, <4 x i32*> %ptr, i32, <4 x i1> %mask) -declare void @llvm.masked.scatter.v16i32(<16 x i32>%val, <16 x i32*> %gep.random, i32, <16 x i1> %imask) -declare <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.v, i32, <16 x i1> %mask, <16 x float>) +declare <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32, <4 x i1> %mask, <4 x float> ) +declare void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32, <4 x i1> %mask) +declare void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32>%val, <16 x i32*> %gep.random, i32, <16 x i1> %imask) +declare <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32, <16 x i1> %mask, <16 x float>) declare <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>*, i32, <16 x i1>, <16 x i32>) declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32, <4 x i1>, <4 x i32>) diff --git a/test/Analysis/CostModel/X86/vector_gep.ll b/test/Analysis/CostModel/X86/vector_gep.ll index e49f25871d66..17f70dfc7a7c 100644 --- a/test/Analysis/CostModel/X86/vector_gep.ll +++ b/test/Analysis/CostModel/X86/vector_gep.ll @@ -3,7 +3,7 @@ %struct.S = type { [1000 x i32] } -declare <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>) +declare <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>) define <4 x i32> @foov(<4 x %struct.S*> %s, i64 %base){ %temp = insertelement <4 x i64> undef, i64 %base, i32 0 @@ -12,6 +12,6 @@ define <4 x i32> @foov(<4 x %struct.S*> %s, i64 %base){ %B = getelementptr inbounds %struct.S, <4 x %struct.S*> %s, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer ;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds [1000 x i32] %arrayidx = getelementptr inbounds [1000 x i32], <4 x [1000 x i32]*> %B, <4 x i64> zeroinitializer, <4 x i64> %vector - %res = call <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> %arrayidx, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef) + %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %arrayidx, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef) ret <4 x i32> %res } diff --git a/test/Assembler/auto_upgrade_intrinsics.ll b/test/Assembler/auto_upgrade_intrinsics.ll index d00fe5882bcd..87ad371deaa5 100644 --- a/test/Assembler/auto_upgrade_intrinsics.ll +++ b/test/Assembler/auto_upgrade_intrinsics.ll @@ -85,6 +85,23 @@ define void @tests.masked.store(<2 x double>* %ptr, <2 x i1> %mask, <2 x double> ret void } +declare <2 x double> @llvm.masked.gather.v2f64(<2 x double*> %ptrs, i32, <2 x i1> %mask, <2 x double> %src0) + +define <2 x double> @tests.masked.gather(<2 x double*> %ptr, <2 x i1> %mask, <2 x double> %passthru) { +; CHECK-LABEL: @tests.masked.gather( +; CHECK: @llvm.masked.gather.v2f64.v2p0f64 + %res = call <2 x double> @llvm.masked.gather.v2f64(<2 x double*> %ptr, i32 1, <2 x i1> %mask, <2 x double> %passthru) + ret <2 x double> %res +} + +declare void @llvm.masked.scatter.v2f64(<2 x double> %val, <2 x double*> %ptrs, i32, <2 x i1> %mask) + +define void @tests.masked.scatter(<2 x double*> %ptr, <2 x i1> %mask, <2 x double> %val) { +; CHECK-LABEL: @tests.masked.scatter( +; CHECK: @llvm.masked.scatter.v2f64.v2p0f64 + call void @llvm.masked.scatter.v2f64(<2 x double> %val, <2 x double*> %ptr, i32 3, <2 x i1> %mask) + ret void +} declare {}* @llvm.invariant.start(i64, i8* nocapture) nounwind readonly declare void @llvm.invariant.end({}*, i64, i8* nocapture) nounwind diff --git a/test/CodeGen/AMDGPU/promote-alloca-calling-conv.ll b/test/CodeGen/AMDGPU/promote-alloca-calling-conv.ll new file mode 100644 index 000000000000..a95e9f828b61 --- /dev/null +++ b/test/CodeGen/AMDGPU/promote-alloca-calling-conv.ll @@ -0,0 +1,74 @@ +; RUN: opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-promote-alloca < %s | FileCheck -check-prefix=IR %s +; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=ASM %s + +; IR-LABEL: define amdgpu_vs void @promote_alloca_shaders(i32 addrspace(1)* inreg %out, i32 addrspace(1)* inreg %in) #0 { +; IR: alloca [5 x i32] +; ASM-LABEL: {{^}}promote_alloca_shaders: +; ASM: ; LDSByteSize: 0 bytes/workgroup (compile time only) + +define amdgpu_vs void @promote_alloca_shaders(i32 addrspace(1)* inreg %out, i32 addrspace(1)* inreg %in) #0 { +entry: + %stack = alloca [5 x i32], align 4 + %tmp0 = load i32, i32 addrspace(1)* %in, align 4 + %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %tmp0 + store i32 4, i32* %arrayidx1, align 4 + %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1 + %tmp1 = load i32, i32 addrspace(1)* %arrayidx2, align 4 + %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %tmp1 + store i32 5, i32* %arrayidx3, align 4 + %arrayidx4 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 0 + %tmp2 = load i32, i32* %arrayidx4, align 4 + store i32 %tmp2, i32 addrspace(1)* %out, align 4 + %arrayidx5 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 1 + %tmp3 = load i32, i32* %arrayidx5 + %arrayidx6 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1 + store i32 %tmp3, i32 addrspace(1)* %arrayidx6 + ret void +} + +; OPT-LABEL: @promote_to_vector_call_c( +; OPT-NOT: alloca +; OPT: extractelement <2 x i32> %{{[0-9]+}}, i32 %in +; ASM-NOT: LDSByteSize +define void @promote_to_vector_call_c(i32 addrspace(1)* %out, i32 %in) #0 { +entry: + %tmp = alloca [2 x i32] + %tmp1 = getelementptr [2 x i32], [2 x i32]* %tmp, i32 0, i32 0 + %tmp2 = getelementptr [2 x i32], [2 x i32]* %tmp, i32 0, i32 1 + store i32 0, i32* %tmp1 + store i32 1, i32* %tmp2 + %tmp3 = getelementptr [2 x i32], [2 x i32]* %tmp, i32 0, i32 %in + %tmp4 = load i32, i32* %tmp3 + %tmp5 = load volatile i32, i32 addrspace(1)* undef + %tmp6 = add i32 %tmp4, %tmp5 + store i32 %tmp6, i32 addrspace(1)* %out + ret void +} + +; OPT-LABEL: @no_promote_to_lds_c( +; OPT: alloca +; ASM-NOT: LDSByteSize +define void @no_promote_to_lds(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #0 { +entry: + %stack = alloca [5 x i32], align 4 + %0 = load i32, i32 addrspace(1)* %in, align 4 + %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %0 + store i32 4, i32* %arrayidx1, align 4 + %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1 + %1 = load i32, i32 addrspace(1)* %arrayidx2, align 4 + %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %1 + store i32 5, i32* %arrayidx3, align 4 + %arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 0 + %2 = load i32, i32* %arrayidx10, align 4 + store i32 %2, i32 addrspace(1)* %out, align 4 + %arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 1 + %3 = load i32, i32* %arrayidx12 + %arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1 + store i32 %3, i32 addrspace(1)* %arrayidx13 + ret void +} + +declare i32 @llvm.amdgcn.workitem.id.x() #1 + +attributes #0 = { nounwind "amdgpu-max-work-group-size"="64" } +attributes #1 = { nounwind readnone } diff --git a/test/CodeGen/AMDGPU/promote-alloca-shaders.ll b/test/CodeGen/AMDGPU/promote-alloca-shaders.ll deleted file mode 100644 index d40fca9f4fd5..000000000000 --- a/test/CodeGen/AMDGPU/promote-alloca-shaders.ll +++ /dev/null @@ -1,29 +0,0 @@ -; RUN: opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-promote-alloca < %s | FileCheck -check-prefix=IR %s -; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=ASM %s - -; IR-LABEL: define amdgpu_vs void @promote_alloca_shaders(i32 addrspace(1)* inreg %out, i32 addrspace(1)* inreg %in) #0 { -; IR: alloca [5 x i32] -; ASM-LABEL: {{^}}promote_alloca_shaders: -; ASM: ; LDSByteSize: 0 bytes/workgroup (compile time only) - -define amdgpu_vs void @promote_alloca_shaders(i32 addrspace(1)* inreg %out, i32 addrspace(1)* inreg %in) #0 { -entry: - %stack = alloca [5 x i32], align 4 - %tmp0 = load i32, i32 addrspace(1)* %in, align 4 - %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %tmp0 - store i32 4, i32* %arrayidx1, align 4 - %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1 - %tmp1 = load i32, i32 addrspace(1)* %arrayidx2, align 4 - %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %tmp1 - store i32 5, i32* %arrayidx3, align 4 - %arrayidx4 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 0 - %tmp2 = load i32, i32* %arrayidx4, align 4 - store i32 %tmp2, i32 addrspace(1)* %out, align 4 - %arrayidx5 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 1 - %tmp3 = load i32, i32* %arrayidx5 - %arrayidx6 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1 - store i32 %tmp3, i32 addrspace(1)* %arrayidx6 - ret void -} - -attributes #0 = { nounwind "amdgpu-max-work-group-size"="64" } diff --git a/test/CodeGen/ARM/build-attributes.ll b/test/CodeGen/ARM/build-attributes.ll index 699ef6e92a4f..bef7bbe01bff 100644 --- a/test/CodeGen/ARM/build-attributes.ll +++ b/test/CodeGen/ARM/build-attributes.ll @@ -199,7 +199,8 @@ ; RUN: llc < %s -mtriple=armv6-none-linux-gnueabi -mcpu=arm1136j-s -mattr=+strict-align | FileCheck %s --check-prefix=STRICT-ALIGN ; RUN: llc < %s -mtriple=armv6-none-linux-gnueabi -mcpu=arm1136j-s | FileCheck %s --check-prefix=NO-STRICT-ALIGN ; ARMv6k -; RUN: llc < %s -mtriple=armv6k-none-netbsd-gnueabi -mcpu=arm1176j-s | FileCheck %s --check-prefix=NO-STRICT-ALIGN +; RUN: llc < %s -mtriple=armv6k-none-netbsd-gnueabi -mcpu=arm1176j-s 2> %t | FileCheck %s --check-prefix=NO-STRICT-ALIGN +; RUN: FileCheck %s < %t --allow-empty --check-prefix=CPU-SUPPORTED ; RUN: llc < %s -mtriple=armv6k-none-linux-gnueabi -mcpu=arm1176j-s -mattr=+strict-align | FileCheck %s --check-prefix=STRICT-ALIGN ; RUN: llc < %s -mtriple=armv6k-none-linux-gnueabi -mcpu=arm1176j-s | FileCheck %s --check-prefix=NO-STRICT-ALIGN ; ARMv6m @@ -222,6 +223,8 @@ ; RUN: llc < %s -mtriple=thumbv8-none-none-eabi -mcpu=cortex-m33 | FileCheck %s --check-prefix=NO-STRICT-ALIGN ; RUN: llc < %s -mtriple=thumbv8-none-none-eabi -mcpu=cortex-m33 -mattr=+strict-align | FileCheck %s --check-prefix=STRICT-ALIGN +; CPU-SUPPORTED-NOT: is not a recognized processor for this target + ; XSCALE: .eabi_attribute 6, 5 ; XSCALE: .eabi_attribute 8, 1 ; XSCALE: .eabi_attribute 9, 1 diff --git a/test/CodeGen/ARM/load-arm.ll b/test/CodeGen/ARM/load-arm.ll new file mode 100644 index 000000000000..3807424ece81 --- /dev/null +++ b/test/CodeGen/ARM/load-arm.ll @@ -0,0 +1,28 @@ +; RUN: llc -mtriple=arm %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv7 %s -o - | FileCheck %s + +; We ended up feeding a deleted node back to TableGen when we converted "Off * +; 410" into "(Off * 205) << 1", where the multiplication already existed in the +; DAG. + +; CHECK-LABEL: addrmode_cse_mutation: +; CHECK: {{mul|muls}} [[OFFSET:r[0-9]+]], {{r[0-9]+}}, {{r[0-9]+}} +; CHECK: {{ldrb|ldrb.w}} {{r[0-9]+}}, [r0, [[OFFSET]], lsl #3] +define i32 @addrmode_cse_mutation(i8* %base, i32 %count) { + %offset = mul i32 %count, 277288 + %ptr = getelementptr i8, i8* %base, i32 %offset + %val = load volatile i8, i8* %ptr + %res = mul i32 %count, 34661 + ret i32 %res +} + +; CHECK-LABEL: addrmode_cse_multi_use: +; CHECK-NOT: {{ldrb|ldrb.w}} {{r[0-9]+}}, [{{r[0-9]+}}, {{r[0-9]+}}, lsl #3] +define i32 @addrmode_cse_multi_use(i8* %base, i32 %count) { + %offset = mul i32 %count, 277288 + %ptr = getelementptr i8, i8* %base, i32 %offset + %val = load volatile i8, i8* %ptr + %res = mul i32 %count, 34661 + %res.1 = add i32 %res, %offset + ret i32 %res.1 +} diff --git a/test/CodeGen/AVR/brind.ll b/test/CodeGen/AVR/brind.ll index f92038d10829..ec8262e84a95 100644 --- a/test/CodeGen/AVR/brind.ll +++ b/test/CodeGen/AVR/brind.ll @@ -4,8 +4,6 @@ define i8 @brind(i8 %p) { ; CHECK-LABEL: brind: -; CHECK: ld r30 -; CHECK: ldd r31 ; CHECK: ijmp entry: %idxprom = sext i8 %p to i16 diff --git a/test/CodeGen/AVR/dynalloca.ll b/test/CodeGen/AVR/dynalloca.ll index 13f503015f9f..6aa776e2de6f 100644 --- a/test/CodeGen/AVR/dynalloca.ll +++ b/test/CodeGen/AVR/dynalloca.ll @@ -69,9 +69,9 @@ define void @dynalloca2(i16 %x) { ; SP restore ; CHECK: in r0, 63 ; CHECK-NEXT: cli -; CHECK-NEXT: out 62, r29 +; CHECK-NEXT: out 62, r7 ; CHECK-NEXT: out 63, r0 -; CHECK-NEXT: out 61, r28 +; CHECK-NEXT: out 61, r6 %vla = alloca i16, i16 %x call void @foo2(i16* %vla, i64 0, i64 0, i64 0) ret void diff --git a/test/CodeGen/AVR/inline-asm/inline-asm.ll b/test/CodeGen/AVR/inline-asm/inline-asm.ll index 88d0c3af2e88..26f90806781e 100644 --- a/test/CodeGen/AVR/inline-asm/inline-asm.ll +++ b/test/CodeGen/AVR/inline-asm/inline-asm.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -march=avr -mattr=movw -no-integrated-as | FileCheck %s +; XFAIL: * ; CHECK-LABEL: no_operands: define void @no_operands() { diff --git a/test/CodeGen/BPF/reloc.ll b/test/CodeGen/BPF/reloc.ll new file mode 100644 index 000000000000..75dbebf311e3 --- /dev/null +++ b/test/CodeGen/BPF/reloc.ll @@ -0,0 +1,43 @@ +; RUN: llc -march=bpfel -filetype=obj < %s | llvm-objdump -r - | FileCheck --check-prefix=CHECK-RELOC %s + +%struct.bpf_context = type { i64, i64, i64, i64, i64, i64, i64 } +%struct.sk_buff = type { i64, i64, i64, i64, i64, i64, i64 } +%struct.net_device = type { i64, i64, i64, i64, i64, i64, i64 } + +@bpf_prog1.devname = private unnamed_addr constant [3 x i8] c"lo\00", align 1 +@bpf_prog1.fmt = private unnamed_addr constant [15 x i8] c"skb %x dev %x\0A\00", align 1 + +; Function Attrs: norecurse +define i32 @bpf_prog1(%struct.bpf_context* nocapture %ctx) #0 section "events/net/netif_receive_skb" { + %devname = alloca [3 x i8], align 1 + %fmt = alloca [15 x i8], align 1 + %1 = getelementptr inbounds [3 x i8], [3 x i8]* %devname, i64 0, i64 0 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @bpf_prog1.devname, i64 0, i64 0), i64 3, i32 1, i1 false) + %2 = getelementptr inbounds %struct.bpf_context, %struct.bpf_context* %ctx, i64 0, i32 0 + %3 = load i64, i64* %2, align 8 + %4 = inttoptr i64 %3 to %struct.sk_buff* + %5 = getelementptr inbounds %struct.sk_buff, %struct.sk_buff* %4, i64 0, i32 2 + %6 = bitcast i64* %5 to i8* + %7 = call i8* inttoptr (i64 4 to i8* (i8*)*)(i8* %6) #1 + %8 = call i32 inttoptr (i64 9 to i32 (i8*, i8*, i32)*)(i8* %7, i8* %1, i32 2) #1 + %9 = icmp eq i32 %8, 0 + br i1 %9, label %10, label %13 + +; <label>:10 ; preds = %0 + %11 = getelementptr inbounds [15 x i8], [15 x i8]* %fmt, i64 0, i64 0 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %11, i8* getelementptr inbounds ([15 x i8], [15 x i8]* @bpf_prog1.fmt, i64 0, i64 0), i64 15, i32 1, i1 false) + %12 = call i32 (i8*, i32, ...) inttoptr (i64 11 to i32 (i8*, i32, ...)*)(i8* %11, i32 15, %struct.sk_buff* %4, i8* %7) #1 + br label %13 + +; <label>:13 ; preds = %10, %0 + ret i32 0 + +; CHECK-RELOC: file format ELF64-BPF +; CHECK-RELOC: RELOCATION RECORDS FOR [.rel.eh_frame]: +; CHECK-RELOC: R_BPF_64_64 events/net/netif_receive_skb +} + +; Function Attrs: nounwind +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) #1 + +attributes #0 = { norecurse } diff --git a/test/CodeGen/Hexagon/adjust-latency-stackST.ll b/test/CodeGen/Hexagon/adjust-latency-stackST.ll new file mode 100644 index 000000000000..915db91635f1 --- /dev/null +++ b/test/CodeGen/Hexagon/adjust-latency-stackST.ll @@ -0,0 +1,81 @@ +; RUN: llc -march=hexagon -disable-post-ra < %s | FileCheck %s + +; Make sure that if there's only one store to the stack, it gets packetized +; with allocframe as there's a latency of 2 cycles between allocframe and +; the following store if not in the same packet. + +; CHECK: { +; CHECK: memd(r29 +; CHECK-NOT: { +; CHECK: allocframe +; CHECK: } +; CHECK: = memw(gp+#G) + +%struct.0 = type { %struct.0*, i32, %struct.2 } +%struct.1 = type { i32, i32, [31 x i8] } +%struct.2 = type { %struct.1 } + +@G = common global %struct.0* null, align 4 + +define i32 @test(%struct.0* nocapture %a0) #0 { +b1: + %v2 = alloca %struct.0*, align 4 + %v3 = bitcast %struct.0** %v2 to i8* + %v4 = getelementptr inbounds %struct.0, %struct.0* %a0, i32 0, i32 0 + %v5 = load %struct.0*, %struct.0** %v4, align 4 + store %struct.0* %v5, %struct.0** %v2, align 4 + %v6 = bitcast %struct.0* %v5 to i8* + %v7 = load i8*, i8** bitcast (%struct.0** @G to i8**), align 4 + tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %v6, i8* %v7, i32 48, i32 4, i1 false) + %v8 = getelementptr inbounds %struct.0, %struct.0* %a0, i32 0, i32 2, i32 0, i32 1 + store i32 5, i32* %v8, align 4 + %v9 = getelementptr inbounds %struct.0, %struct.0* %v5, i32 0, i32 2, i32 0, i32 1 + store i32 5, i32* %v9, align 4 + %v10 = bitcast %struct.0* %a0 to i32* + %v11 = load i32, i32* %v10, align 4 + %v12 = bitcast %struct.0* %v5 to i32* + store i32 %v11, i32* %v12, align 4 + %v13 = call i32 bitcast (i32 (...)* @f0 to i32 (%struct.0**)*)(%struct.0** nonnull %v2) + %v14 = load %struct.0*, %struct.0** %v2, align 4 + %v15 = getelementptr inbounds %struct.0, %struct.0* %v14, i32 0, i32 1 + %v16 = load i32, i32* %v15, align 4 + %v17 = icmp eq i32 %v16, 0 + br i1 %v17, label %b18, label %b32 + +b18: ; preds = %b1 + %v19 = bitcast %struct.0** %v2 to i32** + %v20 = getelementptr inbounds %struct.0, %struct.0* %v14, i32 0, i32 2, i32 0, i32 1 + store i32 6, i32* %v20, align 4 + %v21 = getelementptr inbounds %struct.0, %struct.0* %a0, i32 0, i32 2, i32 0, i32 0 + %v22 = load i32, i32* %v21, align 4 + %v23 = getelementptr inbounds %struct.0, %struct.0* %v14, i32 0, i32 2, i32 0, i32 0 + %v24 = call i32 bitcast (i32 (...)* @f1 to i32 (i32, i32*)*)(i32 %v22, i32* %v23) + %v25 = load i32*, i32** bitcast (%struct.0** @G to i32**), align 4 + %v26 = load i32, i32* %v25, align 4 + %v27 = load i32*, i32** %v19, align 4 + store i32 %v26, i32* %v27, align 4 + %v28 = load %struct.0*, %struct.0** %v2, align 4 + %v29 = getelementptr inbounds %struct.0, %struct.0* %v28, i32 0, i32 2, i32 0, i32 1 + %v30 = load i32, i32* %v29, align 4 + %v31 = call i32 bitcast (i32 (...)* @f2 to i32 (i32, i32, i32*)*)(i32 %v30, i32 10, i32* %v29) + br label %b36 + +b32: ; preds = %b1 + %v33 = bitcast %struct.0* %a0 to i8** + %v34 = load i8*, i8** %v33, align 4 + %v35 = bitcast %struct.0* %a0 to i8* + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %v35, i8* %v34, i32 48, i32 4, i1 false) + br label %b36 + +b36: ; preds = %b32, %b18 + ret i32 undef +} + +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i32, i1) #1 + +declare i32 @f0(...) #0 +declare i32 @f1(...) #0 +declare i32 @f2(...) #0 + +attributes #0 = { nounwind } +attributes #1 = { argmemonly nounwind } diff --git a/test/CodeGen/Hexagon/multi-cycle.ll b/test/CodeGen/Hexagon/multi-cycle.ll new file mode 100644 index 000000000000..fc021821af38 --- /dev/null +++ b/test/CodeGen/Hexagon/multi-cycle.ll @@ -0,0 +1,103 @@ +; RUN: llc -march=hexagon -O2 < %s | FileCheck %s + +; CHECK: v{{[0-9]+}}.h{{ *}}={{ *}}vadd(v{{[0-9]+}}.h,v{{[0-9]+}}.h) +; CHECK: } +; CHECK: { +; CHECK: v{{[0-9]+}}{{ *}}={{ *}}valign(v{{[0-9]+}},v{{[0-9]+}},r{{[0-9]+}}) +; CHECK: } +; CHECK: { +; CHECK: v{{[0-9]+}}{{ *}}={{ *}}valign(v{{[0-9]+}},v{{[0-9]+}},r{{[0-9]+}}) + +target triple = "hexagon" + +@ZERO = global <16 x i32> zeroinitializer, align 64 + +define void @fred(i16* nocapture readonly %a0, i32 %a1, i32 %a2, i16* nocapture %a3) #0 { +b4: + %v5 = bitcast i16* %a0 to <16 x i32>* + %v6 = getelementptr inbounds i16, i16* %a0, i32 %a1 + %v7 = bitcast i16* %v6 to <16 x i32>* + %v8 = mul nsw i32 %a1, 2 + %v9 = getelementptr inbounds i16, i16* %a0, i32 %v8 + %v10 = bitcast i16* %v9 to <16 x i32>* + %v11 = load <16 x i32>, <16 x i32>* %v5, align 64, !tbaa !1 + %v12 = load <16 x i32>, <16 x i32>* %v7, align 64, !tbaa !1 + %v13 = load <16 x i32>, <16 x i32>* %v10, align 64, !tbaa !1 + %v14 = load <16 x i32>, <16 x i32>* @ZERO, align 64, !tbaa !1 + %v15 = tail call <16 x i32> @llvm.hexagon.V6.vsubh(<16 x i32> %v14, <16 x i32> %v14) + %v16 = sdiv i32 %a2, 32 + %v17 = icmp sgt i32 %a2, 31 + br i1 %v17, label %b18, label %b66 + +b18: ; preds = %b4 + %v19 = add i32 %v8, 32 + %v20 = add i32 %a1, 32 + %v21 = tail call <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32> %v12, <16 x i32> %v12) + %v22 = tail call <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32> %v11, <16 x i32> %v13) + %v23 = getelementptr inbounds i16, i16* %a0, i32 %v19 + %v24 = getelementptr inbounds i16, i16* %a0, i32 %v20 + %v25 = getelementptr inbounds i16, i16* %a0, i32 32 + %v26 = tail call <16 x i32> @llvm.hexagon.V6.vsubh(<16 x i32> %v11, <16 x i32> %v13) + %v27 = tail call <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32> %v22, <16 x i32> %v21) + %v28 = bitcast i16* %v23 to <16 x i32>* + %v29 = bitcast i16* %v24 to <16 x i32>* + %v30 = bitcast i16* %v25 to <16 x i32>* + %v31 = bitcast i16* %a3 to <16 x i32>* + br label %b32 + +b32: ; preds = %b32, %b18 + %v33 = phi i32 [ 0, %b18 ], [ %v63, %b32 ] + %v34 = phi <16 x i32>* [ %v31, %b18 ], [ %v62, %b32 ] + %v35 = phi <16 x i32>* [ %v28, %b18 ], [ %v46, %b32 ] + %v36 = phi <16 x i32>* [ %v29, %b18 ], [ %v44, %b32 ] + %v37 = phi <16 x i32>* [ %v30, %b18 ], [ %v42, %b32 ] + %v38 = phi <16 x i32> [ %v15, %b18 ], [ %v39, %b32 ] + %v39 = phi <16 x i32> [ %v26, %b18 ], [ %v56, %b32 ] + %v40 = phi <16 x i32> [ %v27, %b18 ], [ %v51, %b32 ] + %v41 = phi <16 x i32> [ %v15, %b18 ], [ %v40, %b32 ] + %v42 = getelementptr inbounds <16 x i32>, <16 x i32>* %v37, i32 1 + %v43 = load <16 x i32>, <16 x i32>* %v37, align 64, !tbaa !1 + %v44 = getelementptr inbounds <16 x i32>, <16 x i32>* %v36, i32 1 + %v45 = load <16 x i32>, <16 x i32>* %v36, align 64, !tbaa !1 + %v46 = getelementptr inbounds <16 x i32>, <16 x i32>* %v35, i32 1 + %v47 = load <16 x i32>, <16 x i32>* %v35, align 64, !tbaa !1 + %v48 = tail call <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32> %v43, <16 x i32> %v47) + %v49 = tail call <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32> %v45, <16 x i32> %v45) + %v50 = tail call <16 x i32> @llvm.hexagon.V6.valignb(<16 x i32> %v40, <16 x i32> %v41, i32 62) + %v51 = tail call <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32> %v48, <16 x i32> %v49) + %v52 = tail call <16 x i32> @llvm.hexagon.V6.valignb(<16 x i32> %v51, <16 x i32> %v40, i32 2) + %v53 = tail call <16 x i32> @llvm.hexagon.V6.vabsdiffh(<16 x i32> %v50, <16 x i32> %v52) + %v54 = getelementptr inbounds <16 x i32>, <16 x i32>* %v34, i32 1 + store <16 x i32> %v53, <16 x i32>* %v34, align 64, !tbaa !1 + %v55 = tail call <16 x i32> @llvm.hexagon.V6.valignb(<16 x i32> %v39, <16 x i32> %v38, i32 62) + %v56 = tail call <16 x i32> @llvm.hexagon.V6.vsubh(<16 x i32> %v43, <16 x i32> %v47) + %v57 = tail call <16 x i32> @llvm.hexagon.V6.valignb(<16 x i32> %v56, <16 x i32> %v39, i32 2) + %v58 = tail call <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32> %v39, <16 x i32> %v39) + %v59 = tail call <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32> %v58, <16 x i32> %v55) + %v60 = tail call <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32> %v59, <16 x i32> %v57) + %v61 = tail call <16 x i32> @llvm.hexagon.V6.vabsh(<16 x i32> %v60) + %v62 = getelementptr inbounds <16 x i32>, <16 x i32>* %v34, i32 2 + store <16 x i32> %v61, <16 x i32>* %v54, align 64, !tbaa !1 + %v63 = add nsw i32 %v33, 1 + %v64 = icmp slt i32 %v63, %v16 + br i1 %v64, label %b32, label %b65 + +b65: ; preds = %b32 + br label %b66 + +b66: ; preds = %b65, %b4 + ret void +} + +declare <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32>, <16 x i32>) #1 +declare <16 x i32> @llvm.hexagon.V6.vsubh(<16 x i32>, <16 x i32>) #1 +declare <16 x i32> @llvm.hexagon.V6.valignb(<16 x i32>, <16 x i32>, i32) #1 +declare <16 x i32> @llvm.hexagon.V6.vabsdiffh(<16 x i32>, <16 x i32>) #1 +declare <16 x i32> @llvm.hexagon.V6.vabsh(<16 x i32>) #1 + +attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx" } +attributes #1 = { nounwind readnone } + +!1 = !{!2, !2, i64 0} +!2 = !{!"omnipotent char", !3, i64 0} +!3 = !{!"Simple C/C++ TBAA"} diff --git a/test/CodeGen/Hexagon/plt-rel.ll b/test/CodeGen/Hexagon/plt-rel.ll new file mode 100644 index 000000000000..1d38cf32b886 --- /dev/null +++ b/test/CodeGen/Hexagon/plt-rel.ll @@ -0,0 +1,37 @@ +; RUN: llc -march=hexagon -relocation-model=pic -mattr=+long-calls < %s | FileCheck --check-prefix=CHECK-LONG %s +; RUN: llc -march=hexagon -relocation-model=pic < %s | FileCheck %s + +; CHECK-LONG: call ##_ZL13g_usr1_called@GDPLT +; CHECK-LONG-NOT: call _ZL13g_usr1_called@GDPLT +; CHECK: call _ZL13g_usr1_called@GDPLT +; CHECK-NOT: call ##_ZL13g_usr1_called@GDPLT + + +target triple = "hexagon" + +@_ZL13g_usr1_called = internal thread_local global i32 0, align 4 + +; Function Attrs: norecurse nounwind +define void @_Z14SigUsr1Handleri(i32) local_unnamed_addr #0 { +entry: + store volatile i32 1, i32* @_ZL13g_usr1_called, align 4 + ret void +} + +; Function Attrs: norecurse nounwind +define zeroext i1 @_Z27CheckForMonitorCancellationv() local_unnamed_addr #0 { +entry: + %0 = load volatile i32, i32* @_ZL13g_usr1_called, align 4 + %tobool = icmp eq i32 %0, 0 + br i1 %tobool, label %return, label %if.then + +if.then: ; preds = %entry + store volatile i32 0, i32* @_ZL13g_usr1_called, align 4 + br label %return + +return: ; preds = %entry, %if.then + %.sink = phi i1 [ true, %if.then ], [ false, %entry ] + ret i1 %.sink +} + +attributes #0 = { norecurse nounwind "target-cpu"="hexagonv60" "target-features"="+hvx" } diff --git a/test/CodeGen/PowerPC/shift_mask.ll b/test/CodeGen/PowerPC/shift_mask.ll index 91226a336721..e9ca9b0bdf02 100644 --- a/test/CodeGen/PowerPC/shift_mask.ll +++ b/test/CodeGen/PowerPC/shift_mask.ll @@ -49,8 +49,6 @@ define i64 @test003(i64 %a, i64 %b) { define <16 x i8> @test010(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: test010: ; CHECK: # BB#0: -; CHECK-NEXT: vspltisb 4, 7 -; CHECK-NEXT: xxland 35, 35, 36 ; CHECK-NEXT: vslb 2, 2, 3 ; CHECK-NEXT: blr %rem = and <16 x i8> %b, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7> @@ -61,8 +59,6 @@ define <16 x i8> @test010(<16 x i8> %a, <16 x i8> %b) { define <8 x i16> @test011(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: test011: ; CHECK: # BB#0: -; CHECK-NEXT: vspltish 4, 15 -; CHECK-NEXT: xxland 35, 35, 36 ; CHECK-NEXT: vslh 2, 2, 3 ; CHECK-NEXT: blr %rem = and <8 x i16> %b, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> @@ -73,10 +69,6 @@ define <8 x i16> @test011(<8 x i16> %a, <8 x i16> %b) { define <4 x i32> @test012(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: test012: ; CHECK: # BB#0: -; CHECK-NEXT: vspltisw 4, -16 -; CHECK-NEXT: vspltisw 5, 15 -; CHECK-NEXT: vsubuwm 4, 5, 4 -; CHECK-NEXT: xxland 35, 35, 36 ; CHECK-NEXT: vslw 2, 2, 3 ; CHECK-NEXT: blr %rem = and <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31> @@ -87,11 +79,6 @@ define <4 x i32> @test012(<4 x i32> %a, <4 x i32> %b) { define <2 x i64> @test013(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test013: ; CHECK: # BB#0: -; CHECK-NEXT: addis 3, 2, .LCPI7_0@toc@ha -; CHECK-NEXT: addi 3, 3, .LCPI7_0@toc@l -; CHECK-NEXT: lxvd2x 0, 0, 3 -; CHECK-NEXT: xxswapd 36, 0 -; CHECK-NEXT: xxland 35, 35, 36 ; CHECK-NEXT: vsld 2, 2, 3 ; CHECK-NEXT: blr %rem = and <2 x i64> %b, <i64 63, i64 63> @@ -148,8 +135,6 @@ define i64 @test103(i64 %a, i64 %b) { define <16 x i8> @test110(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: test110: ; CHECK: # BB#0: -; CHECK-NEXT: vspltisb 4, 7 -; CHECK-NEXT: xxland 35, 35, 36 ; CHECK-NEXT: vsrb 2, 2, 3 ; CHECK-NEXT: blr %rem = and <16 x i8> %b, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7> @@ -160,8 +145,6 @@ define <16 x i8> @test110(<16 x i8> %a, <16 x i8> %b) { define <8 x i16> @test111(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: test111: ; CHECK: # BB#0: -; CHECK-NEXT: vspltish 4, 15 -; CHECK-NEXT: xxland 35, 35, 36 ; CHECK-NEXT: vsrh 2, 2, 3 ; CHECK-NEXT: blr %rem = and <8 x i16> %b, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> @@ -172,10 +155,6 @@ define <8 x i16> @test111(<8 x i16> %a, <8 x i16> %b) { define <4 x i32> @test112(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: test112: ; CHECK: # BB#0: -; CHECK-NEXT: vspltisw 4, -16 -; CHECK-NEXT: vspltisw 5, 15 -; CHECK-NEXT: vsubuwm 4, 5, 4 -; CHECK-NEXT: xxland 35, 35, 36 ; CHECK-NEXT: vsrw 2, 2, 3 ; CHECK-NEXT: blr %rem = and <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31> @@ -186,11 +165,6 @@ define <4 x i32> @test112(<4 x i32> %a, <4 x i32> %b) { define <2 x i64> @test113(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test113: ; CHECK: # BB#0: -; CHECK-NEXT: addis 3, 2, .LCPI15_0@toc@ha -; CHECK-NEXT: addi 3, 3, .LCPI15_0@toc@l -; CHECK-NEXT: lxvd2x 0, 0, 3 -; CHECK-NEXT: xxswapd 36, 0 -; CHECK-NEXT: xxland 35, 35, 36 ; CHECK-NEXT: vsrd 2, 2, 3 ; CHECK-NEXT: blr %rem = and <2 x i64> %b, <i64 63, i64 63> @@ -247,8 +221,6 @@ define i64 @test203(i64 %a, i64 %b) { define <16 x i8> @test210(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: test210: ; CHECK: # BB#0: -; CHECK-NEXT: vspltisb 4, 7 -; CHECK-NEXT: xxland 35, 35, 36 ; CHECK-NEXT: vsrab 2, 2, 3 ; CHECK-NEXT: blr %rem = and <16 x i8> %b, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7> @@ -259,8 +231,6 @@ define <16 x i8> @test210(<16 x i8> %a, <16 x i8> %b) { define <8 x i16> @test211(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: test211: ; CHECK: # BB#0: -; CHECK-NEXT: vspltish 4, 15 -; CHECK-NEXT: xxland 35, 35, 36 ; CHECK-NEXT: vsrah 2, 2, 3 ; CHECK-NEXT: blr %rem = and <8 x i16> %b, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> @@ -271,10 +241,6 @@ define <8 x i16> @test211(<8 x i16> %a, <8 x i16> %b) { define <4 x i32> @test212(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: test212: ; CHECK: # BB#0: -; CHECK-NEXT: vspltisw 4, -16 -; CHECK-NEXT: vspltisw 5, 15 -; CHECK-NEXT: vsubuwm 4, 5, 4 -; CHECK-NEXT: xxland 35, 35, 36 ; CHECK-NEXT: vsraw 2, 2, 3 ; CHECK-NEXT: blr %rem = and <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31> @@ -285,11 +251,6 @@ define <4 x i32> @test212(<4 x i32> %a, <4 x i32> %b) { define <2 x i64> @test213(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test213: ; CHECK: # BB#0: -; CHECK-NEXT: addis 3, 2, .LCPI23_0@toc@ha -; CHECK-NEXT: addi 3, 3, .LCPI23_0@toc@l -; CHECK-NEXT: lxvd2x 0, 0, 3 -; CHECK-NEXT: xxswapd 36, 0 -; CHECK-NEXT: xxland 35, 35, 36 ; CHECK-NEXT: vsrad 2, 2, 3 ; CHECK-NEXT: blr %rem = and <2 x i64> %b, <i64 63, i64 63> diff --git a/test/CodeGen/X86/addcarry.ll b/test/CodeGen/X86/addcarry.ll index 6fc07cd84dea..5e95cd832789 100644 --- a/test/CodeGen/X86/addcarry.ll +++ b/test/CodeGen/X86/addcarry.ll @@ -190,9 +190,9 @@ entry: define i64 @shiftadd(i64 %a, i64 %b, i64 %c, i64 %d) { ; CHECK-LABEL: shiftadd: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: leaq (%rdx,%rcx), %rax ; CHECK-NEXT: addq %rsi, %rdi -; CHECK-NEXT: adcq $0, %rax +; CHECK-NEXT: adcq %rcx, %rdx +; CHECK-NEXT: movq %rdx, %rax ; CHECK-NEXT: retq entry: %0 = zext i64 %a to i128 diff --git a/test/CodeGen/X86/lwp-intrinsics-x86_64.ll b/test/CodeGen/X86/lwp-intrinsics-x86_64.ll new file mode 100644 index 000000000000..9ee95267fc33 --- /dev/null +++ b/test/CodeGen/X86/lwp-intrinsics-x86_64.ll @@ -0,0 +1,49 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+lwp | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=bdver1 | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=bdver2 | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=bdver3 | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=bdver4 | FileCheck %s --check-prefix=X64 + +define i8 @test_lwpins64_rri(i64 %a0, i32 %a1) nounwind { +; X64-LABEL: test_lwpins64_rri: +; X64: # BB#0: +; X64-NEXT: lwpins $-1985229329, %esi, %rdi # imm = 0x89ABCDEF +; X64-NEXT: setb %al +; X64-NEXT: retq + %1 = tail call i8 @llvm.x86.lwpins64(i64 %a0, i32 %a1, i32 2309737967) + ret i8 %1 +} + +define i8 @test_lwpins64_rmi(i64 %a0, i32 *%p1) nounwind { +; X64-LABEL: test_lwpins64_rmi: +; X64: # BB#0: +; X64-NEXT: lwpins $1985229328, (%rsi), %rdi # imm = 0x76543210 +; X64-NEXT: setb %al +; X64-NEXT: retq + %a1 = load i32, i32 *%p1 + %1 = tail call i8 @llvm.x86.lwpins64(i64 %a0, i32 %a1, i32 1985229328) + ret i8 %1 +} + +define void @test_lwpval64_rri(i64 %a0, i32 %a1) nounwind { +; X64-LABEL: test_lwpval64_rri: +; X64: # BB#0: +; X64-NEXT: lwpval $-19088744, %esi, %rdi # imm = 0xFEDCBA98 +; X64-NEXT: retq + tail call void @llvm.x86.lwpval64(i64 %a0, i32 %a1, i32 4275878552) + ret void +} + +define void @test_lwpval64_rmi(i64 %a0, i32 *%p1) nounwind { +; X64-LABEL: test_lwpval64_rmi: +; X64: # BB#0: +; X64-NEXT: lwpval $305419896, (%rsi), %rdi # imm = 0x12345678 +; X64-NEXT: retq + %a1 = load i32, i32 *%p1 + tail call void @llvm.x86.lwpval64(i64 %a0, i32 %a1, i32 305419896) + ret void +} + +declare i8 @llvm.x86.lwpins64(i64, i32, i32) nounwind +declare void @llvm.x86.lwpval64(i64, i32, i32) nounwind diff --git a/test/CodeGen/X86/lwp-intrinsics.ll b/test/CodeGen/X86/lwp-intrinsics.ll new file mode 100644 index 000000000000..c949bc806083 --- /dev/null +++ b/test/CodeGen/X86/lwp-intrinsics.ll @@ -0,0 +1,121 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-unknown -mattr=+lwp | FileCheck %s --check-prefix=X86 +; RUN: llc < %s -mtriple=i686-unknown -mcpu=bdver1 | FileCheck %s --check-prefix=X86 +; RUN: llc < %s -mtriple=i686-unknown -mcpu=bdver2 | FileCheck %s --check-prefix=X86 +; RUN: llc < %s -mtriple=i686-unknown -mcpu=bdver3 | FileCheck %s --check-prefix=X86 +; RUN: llc < %s -mtriple=i686-unknown -mcpu=bdver4 | FileCheck %s --check-prefix=X86 +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+lwp | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=bdver1 | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=bdver2 | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=bdver3 | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=bdver4 | FileCheck %s --check-prefix=X64 + +define void @test_llwpcb(i8 *%a0) nounwind { +; X86-LABEL: test_llwpcb: +; X86: # BB#0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: llwpcb %eax +; X86-NEXT: retl +; +; X64-LABEL: test_llwpcb: +; X64: # BB#0: +; X64-NEXT: llwpcb %rdi +; X64-NEXT: retq + tail call void @llvm.x86.llwpcb(i8 *%a0) + ret void +} + +define i8* @test_slwpcb(i8 *%a0) nounwind { +; X86-LABEL: test_slwpcb: +; X86: # BB#0: +; X86-NEXT: slwpcb %eax +; X86-NEXT: retl +; +; X64-LABEL: test_slwpcb: +; X64: # BB#0: +; X64-NEXT: slwpcb %rax +; X64-NEXT: retq + %1 = tail call i8* @llvm.x86.slwpcb() + ret i8 *%1 +} + +define i8 @test_lwpins32_rri(i32 %a0, i32 %a1) nounwind { +; X86-LABEL: test_lwpins32_rri: +; X86: # BB#0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: addl %ecx, %ecx +; X86-NEXT: lwpins $-1985229329, %ecx, %eax # imm = 0x89ABCDEF +; X86-NEXT: setb %al +; X86-NEXT: retl +; +; X64-LABEL: test_lwpins32_rri: +; X64: # BB#0: +; X64-NEXT: addl %esi, %esi +; X64-NEXT: lwpins $-1985229329, %esi, %edi # imm = 0x89ABCDEF +; X64-NEXT: setb %al +; X64-NEXT: retq + %1 = add i32 %a1, %a1 + %2 = tail call i8 @llvm.x86.lwpins32(i32 %a0, i32 %1, i32 2309737967) + ret i8 %2 +} + +define i8 @test_lwpins32_rmi(i32 %a0, i32 *%p1) nounwind { +; X86-LABEL: test_lwpins32_rmi: +; X86: # BB#0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: lwpins $1985229328, (%eax), %ecx # imm = 0x76543210 +; X86-NEXT: setb %al +; X86-NEXT: retl +; +; X64-LABEL: test_lwpins32_rmi: +; X64: # BB#0: +; X64-NEXT: lwpins $1985229328, (%rsi), %edi # imm = 0x76543210 +; X64-NEXT: setb %al +; X64-NEXT: retq + %a1 = load i32, i32 *%p1 + %1 = tail call i8 @llvm.x86.lwpins32(i32 %a0, i32 %a1, i32 1985229328) + ret i8 %1 +} + +define void @test_lwpval32_rri(i32 %a0, i32 %a1) nounwind { +; X86-LABEL: test_lwpval32_rri: +; X86: # BB#0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: addl %ecx, %ecx +; X86-NEXT: lwpval $-19088744, %ecx, %eax # imm = 0xFEDCBA98 +; X86-NEXT: retl +; +; X64-LABEL: test_lwpval32_rri: +; X64: # BB#0: +; X64-NEXT: addl %esi, %esi +; X64-NEXT: lwpval $-19088744, %esi, %edi # imm = 0xFEDCBA98 +; X64-NEXT: retq + %1 = add i32 %a1, %a1 + tail call void @llvm.x86.lwpval32(i32 %a0, i32 %1, i32 4275878552) + ret void +} + +define void @test_lwpval32_rmi(i32 %a0, i32 *%p1) nounwind { +; X86-LABEL: test_lwpval32_rmi: +; X86: # BB#0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: lwpval $305419896, (%eax), %ecx # imm = 0x12345678 +; X86-NEXT: retl +; +; X64-LABEL: test_lwpval32_rmi: +; X64: # BB#0: +; X64-NEXT: lwpval $305419896, (%rsi), %edi # imm = 0x12345678 +; X64-NEXT: retq + %a1 = load i32, i32 *%p1 + tail call void @llvm.x86.lwpval32(i32 %a0, i32 %a1, i32 305419896) + ret void +} + +declare void @llvm.x86.llwpcb(i8*) nounwind +declare i8* @llvm.x86.slwpcb() nounwind +declare i8 @llvm.x86.lwpins32(i32, i32, i32) nounwind +declare void @llvm.x86.lwpval32(i32, i32, i32) nounwind diff --git a/test/CodeGen/X86/masked_gather_scatter.ll b/test/CodeGen/X86/masked_gather_scatter.ll index 1a15cab97e2e..29a662fb217e 100644 --- a/test/CodeGen/X86/masked_gather_scatter.ll +++ b/test/CodeGen/X86/masked_gather_scatter.ll @@ -54,13 +54,13 @@ define <16 x float> @test1(float* %base, <16 x i32> %ind) { %sext_ind = sext <16 x i32> %ind to <16 x i64> %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind - %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef) + %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef) ret <16 x float>%res } -declare <16 x i32> @llvm.masked.gather.v16i32(<16 x i32*>, i32, <16 x i1>, <16 x i32>) -declare <16 x float> @llvm.masked.gather.v16f32(<16 x float*>, i32, <16 x i1>, <16 x float>) -declare <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> , i32, <8 x i1> , <8 x i32> ) +declare <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*>, i32, <16 x i1>, <16 x i32>) +declare <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*>, i32, <16 x i1>, <16 x float>) +declare <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> , i32, <8 x i1> , <8 x i32> ) ; SCALAR-LABEL: test2 @@ -111,7 +111,7 @@ define <16 x float> @test2(float* %base, <16 x i32> %ind, i16 %mask) { %sext_ind = sext <16 x i32> %ind to <16 x i64> %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind %imask = bitcast i16 %mask to <16 x i1> - %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.random, i32 4, <16 x i1> %imask, <16 x float>undef) + %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> %imask, <16 x float>undef) ret <16 x float> %res } @@ -152,7 +152,7 @@ define <16 x i32> @test3(i32* %base, <16 x i32> %ind, i16 %mask) { %sext_ind = sext <16 x i32> %ind to <16 x i64> %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i64> %sext_ind %imask = bitcast i16 %mask to <16 x i1> - %res = call <16 x i32> @llvm.masked.gather.v16i32(<16 x i32*> %gep.random, i32 4, <16 x i1> %imask, <16 x i32>undef) + %res = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> %gep.random, i32 4, <16 x i1> %imask, <16 x i32>undef) ret <16 x i32> %res } @@ -205,8 +205,8 @@ define <16 x i32> @test4(i32* %base, <16 x i32> %ind, i16 %mask) { %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind %imask = bitcast i16 %mask to <16 x i1> - %gt1 = call <16 x i32> @llvm.masked.gather.v16i32(<16 x i32*> %gep.random, i32 4, <16 x i1> %imask, <16 x i32>undef) - %gt2 = call <16 x i32> @llvm.masked.gather.v16i32(<16 x i32*> %gep.random, i32 4, <16 x i1> %imask, <16 x i32>%gt1) + %gt1 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> %gep.random, i32 4, <16 x i1> %imask, <16 x i32>undef) + %gt2 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> %gep.random, i32 4, <16 x i1> %imask, <16 x i32>%gt1) %res = add <16 x i32> %gt1, %gt2 ret <16 x i32> %res } @@ -270,13 +270,13 @@ define void @test5(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i32>%val) { %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind %imask = bitcast i16 %mask to <16 x i1> - call void @llvm.masked.scatter.v16i32(<16 x i32>%val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) - call void @llvm.masked.scatter.v16i32(<16 x i32>%val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) + call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32>%val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) + call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32>%val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) ret void } -declare void @llvm.masked.scatter.v8i32(<8 x i32> , <8 x i32*> , i32 , <8 x i1> ) -declare void @llvm.masked.scatter.v16i32(<16 x i32> , <16 x i32*> , i32 , <16 x i1> ) +declare void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> , <8 x i32*> , i32 , <8 x i1> ) +declare void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> , <16 x i32*> , i32 , <16 x i1> ) ; SCALAR-LABEL: test6 @@ -326,9 +326,9 @@ define <8 x i32> @test6(<8 x i32>%a1, <8 x i32*> %ptr) { ; SKX_32-NEXT: vmovdqa %ymm2, %ymm0 ; SKX_32-NEXT: retl - %a = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %ptr, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef) + %a = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> %ptr, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef) - call void @llvm.masked.scatter.v8i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>) + call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>) ret <8 x i32>%a } @@ -384,8 +384,8 @@ define <8 x i32> @test7(i32* %base, <8 x i32> %ind, i8 %mask) { %gep.random = getelementptr i32, <8 x i32*> %broadcast.splat, <8 x i32> %ind %imask = bitcast i8 %mask to <8 x i1> - %gt1 = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %gep.random, i32 4, <8 x i1> %imask, <8 x i32>undef) - %gt2 = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %gep.random, i32 4, <8 x i1> %imask, <8 x i32>%gt1) + %gt1 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> %gep.random, i32 4, <8 x i1> %imask, <8 x i32>undef) + %gt2 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> %gep.random, i32 4, <8 x i1> %imask, <8 x i32>%gt1) %res = add <8 x i32> %gt1, %gt2 ret <8 x i32> %res } @@ -444,8 +444,8 @@ define <16 x i32> @test8(<16 x i32*> %ptr.random, <16 x i32> %ind, i16 %mask) { ; SKX_32-NEXT: retl %imask = bitcast i16 %mask to <16 x i1> - %gt1 = call <16 x i32> @llvm.masked.gather.v16i32(<16 x i32*> %ptr.random, i32 4, <16 x i1> %imask, <16 x i32>undef) - %gt2 = call <16 x i32> @llvm.masked.gather.v16i32(<16 x i32*> %ptr.random, i32 4, <16 x i1> %imask, <16 x i32>%gt1) + %gt1 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> %ptr.random, i32 4, <16 x i1> %imask, <16 x i32>undef) + %gt2 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> %ptr.random, i32 4, <16 x i1> %imask, <16 x i32>%gt1) %res = add <16 x i32> %gt1, %gt2 ret <16 x i32> %res } @@ -522,7 +522,7 @@ entry: %broadcast.splat = shufflevector <8 x %struct.ST*> %broadcast.splatinsert, <8 x %struct.ST*> undef, <8 x i32> zeroinitializer %arrayidx = getelementptr %struct.ST, <8 x %struct.ST*> %broadcast.splat, <8 x i64> %ind1, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>, <8 x i32><i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, <8 x i32> %ind5, <8 x i64> <i64 13, i64 13, i64 13, i64 13, i64 13, i64 13, i64 13, i64 13> - %res = call <8 x i32 > @llvm.masked.gather.v8i32(<8 x i32*>%arrayidx, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef) + %res = call <8 x i32 > @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*>%arrayidx, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef) ret <8 x i32> %res } @@ -591,7 +591,7 @@ entry: %broadcast.splat = shufflevector <8 x %struct.ST*> %broadcast.splatinsert, <8 x %struct.ST*> undef, <8 x i32> zeroinitializer %arrayidx = getelementptr %struct.ST, <8 x %struct.ST*> %broadcast.splat, <8 x i64> %i1, i32 2, i32 1, <8 x i32> %ind5, i64 13 - %res = call <8 x i32 > @llvm.masked.gather.v8i32(<8 x i32*>%arrayidx, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef) + %res = call <8 x i32 > @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*>%arrayidx, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef) ret <8 x i32> %res } @@ -632,7 +632,7 @@ define <16 x float> @test11(float* %base, i32 %ind) { %gep.random = getelementptr float, <16 x float*> %broadcast.splat, i32 %ind - %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef) + %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef) ret <16 x float>%res } @@ -671,7 +671,7 @@ define <16 x float> @test12(float* %base, <16 x i32> %ind) { %sext_ind = sext <16 x i32> %ind to <16 x i64> %gep.random = getelementptr float, float *%base, <16 x i64> %sext_ind - %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef) + %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef) ret <16 x float>%res } @@ -710,7 +710,7 @@ define <16 x float> @test13(float* %base, <16 x i32> %ind) { %sext_ind = sext <16 x i32> %ind to <16 x i64> %gep.random = getelementptr float, float *%base, <16 x i64> %sext_ind - %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef) + %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef) ret <16 x float>%res } @@ -772,13 +772,13 @@ define <16 x float> @test14(float* %base, i32 %ind, <16 x float*> %vec) { %gep.random = getelementptr float, <16 x float*> %broadcast.splat, i32 %ind - %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef) + %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef) ret <16 x float>%res } -declare <4 x float> @llvm.masked.gather.v4f32(<4 x float*>, i32, <4 x i1>, <4 x float>) -declare <4 x double> @llvm.masked.gather.v4f64(<4 x double*>, i32, <4 x i1>, <4 x double>) -declare <2 x double> @llvm.masked.gather.v2f64(<2 x double*>, i32, <2 x i1>, <2 x double>) +declare <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*>, i32, <4 x i1>, <4 x float>) +declare <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*>, i32, <4 x i1>, <4 x double>) +declare <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*>, i32, <2 x i1>, <2 x double>) ; Gather smaller than existing instruction define <4 x float> @test15(float* %base, <4 x i32> %ind, <4 x i1> %mask) { @@ -831,7 +831,7 @@ define <4 x float> @test15(float* %base, <4 x i32> %ind, <4 x i1> %mask) { %sext_ind = sext <4 x i32> %ind to <4 x i64> %gep.random = getelementptr float, float* %base, <4 x i64> %sext_ind - %res = call <4 x float> @llvm.masked.gather.v4f32(<4 x float*> %gep.random, i32 4, <4 x i1> %mask, <4 x float> undef) + %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.random, i32 4, <4 x i1> %mask, <4 x float> undef) ret <4 x float>%res } @@ -890,7 +890,7 @@ define <4 x double> @test16(double* %base, <4 x i32> %ind, <4 x i1> %mask, <4 x %sext_ind = sext <4 x i32> %ind to <4 x i64> %gep.random = getelementptr double, double* %base, <4 x i64> %sext_ind - %res = call <4 x double> @llvm.masked.gather.v4f64(<4 x double*> %gep.random, i32 4, <4 x i1> %mask, <4 x double> %src0) + %res = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> %gep.random, i32 4, <4 x i1> %mask, <4 x double> %src0) ret <4 x double>%res } @@ -942,15 +942,15 @@ define <2 x double> @test17(double* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x %sext_ind = sext <2 x i32> %ind to <2 x i64> %gep.random = getelementptr double, double* %base, <2 x i64> %sext_ind - %res = call <2 x double> @llvm.masked.gather.v2f64(<2 x double*> %gep.random, i32 4, <2 x i1> %mask, <2 x double> %src0) + %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %gep.random, i32 4, <2 x i1> %mask, <2 x double> %src0) ret <2 x double>%res } -declare void @llvm.masked.scatter.v4i32(<4 x i32> , <4 x i32*> , i32 , <4 x i1> ) -declare void @llvm.masked.scatter.v4f64(<4 x double> , <4 x double*> , i32 , <4 x i1> ) -declare void @llvm.masked.scatter.v2i64(<2 x i64> , <2 x i64*> , i32 , <2 x i1> ) -declare void @llvm.masked.scatter.v2i32(<2 x i32> , <2 x i32*> , i32 , <2 x i1> ) -declare void @llvm.masked.scatter.v2f32(<2 x float> , <2 x float*> , i32 , <2 x i1> ) +declare void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> , <4 x i32*> , i32 , <4 x i1> ) +declare void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> , <4 x double*> , i32 , <4 x i1> ) +declare void @llvm.masked.scatter.v2i64.v2p0i64(<2 x i64> , <2 x i64*> , i32 , <2 x i1> ) +declare void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> , <2 x i32*> , i32 , <2 x i1> ) +declare void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float> , <2 x float*> , i32 , <2 x i1> ) define void @test18(<4 x i32>%a1, <4 x i32*> %ptr, <4 x i1>%mask) { ; @@ -995,7 +995,7 @@ define void @test18(<4 x i32>%a1, <4 x i32*> %ptr, <4 x i1>%mask) { ; SKX_32-NEXT: vptestmd %xmm2, %xmm2, %k1 ; SKX_32-NEXT: vpscatterdd %xmm0, (,%xmm1) {%k1} ; SKX_32-NEXT: retl - call void @llvm.masked.scatter.v4i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask) + call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask) ret void } @@ -1049,7 +1049,7 @@ define void @test19(<4 x double>%a1, double* %ptr, <4 x i1>%mask, <4 x i64> %ind ; SKX_32-NEXT: vzeroupper ; SKX_32-NEXT: retl %gep = getelementptr double, double* %ptr, <4 x i64> %ind - call void @llvm.masked.scatter.v4f64(<4 x double> %a1, <4 x double*> %gep, i32 8, <4 x i1> %mask) + call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> %a1, <4 x double*> %gep, i32 8, <4 x i1> %mask) ret void } @@ -1103,7 +1103,7 @@ define void @test20(<2 x float>%a1, <2 x float*> %ptr, <2 x i1> %mask) { ; SKX_32-NEXT: kshiftrb $6, %k0, %k1 ; SKX_32-NEXT: vscatterdps %xmm0, (,%xmm1) {%k1} ; SKX_32-NEXT: retl - call void @llvm.masked.scatter.v2f32(<2 x float> %a1, <2 x float*> %ptr, i32 4, <2 x i1> %mask) + call void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float> %a1, <2 x float*> %ptr, i32 4, <2 x i1> %mask) ret void } @@ -1157,12 +1157,12 @@ define void @test21(<2 x i32>%a1, <2 x i32*> %ptr, <2 x i1>%mask) { ; SKX_32-NEXT: vpscatterqd %xmm0, (,%ymm1) {%k1} ; SKX_32-NEXT: vzeroupper ; SKX_32-NEXT: retl - call void @llvm.masked.scatter.v2i32(<2 x i32> %a1, <2 x i32*> %ptr, i32 4, <2 x i1> %mask) + call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> %a1, <2 x i32*> %ptr, i32 4, <2 x i1> %mask) ret void } ; The result type requires widening -declare <2 x float> @llvm.masked.gather.v2f32(<2 x float*>, i32, <2 x i1>, <2 x float>) +declare <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*>, i32, <2 x i1>, <2 x float>) define <2 x float> @test22(float* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x float> %src0) { ; @@ -1222,12 +1222,12 @@ define <2 x float> @test22(float* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x fl ; SKX_32-NEXT: retl %sext_ind = sext <2 x i32> %ind to <2 x i64> %gep.random = getelementptr float, float* %base, <2 x i64> %sext_ind - %res = call <2 x float> @llvm.masked.gather.v2f32(<2 x float*> %gep.random, i32 4, <2 x i1> %mask, <2 x float> %src0) + %res = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> %gep.random, i32 4, <2 x i1> %mask, <2 x float> %src0) ret <2 x float>%res } -declare <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*>, i32, <2 x i1>, <2 x i32>) -declare <2 x i64> @llvm.masked.gather.v2i64(<2 x i64*>, i32, <2 x i1>, <2 x i64>) +declare <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*>, i32, <2 x i1>, <2 x i32>) +declare <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*>, i32, <2 x i1>, <2 x i64>) define <2 x i32> @test23(i32* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i32> %src0) { ; @@ -1276,7 +1276,7 @@ define <2 x i32> @test23(i32* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i32> % ; SKX_32-NEXT: retl %sext_ind = sext <2 x i32> %ind to <2 x i64> %gep.random = getelementptr i32, i32* %base, <2 x i64> %sext_ind - %res = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %gep.random, i32 4, <2 x i1> %mask, <2 x i32> %src0) + %res = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> %gep.random, i32 4, <2 x i1> %mask, <2 x i32> %src0) ret <2 x i32>%res } @@ -1320,7 +1320,7 @@ define <2 x i32> @test24(i32* %base, <2 x i32> %ind) { ; SKX_32-NEXT: retl %sext_ind = sext <2 x i32> %ind to <2 x i64> %gep.random = getelementptr i32, i32* %base, <2 x i64> %sext_ind - %res = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %gep.random, i32 4, <2 x i1> <i1 true, i1 true>, <2 x i32> undef) + %res = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> %gep.random, i32 4, <2 x i1> <i1 true, i1 true>, <2 x i32> undef) ret <2 x i32>%res } @@ -1371,7 +1371,7 @@ define <2 x i64> @test25(i64* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i64> % ; SKX_32-NEXT: retl %sext_ind = sext <2 x i32> %ind to <2 x i64> %gep.random = getelementptr i64, i64* %base, <2 x i64> %sext_ind - %res = call <2 x i64> @llvm.masked.gather.v2i64(<2 x i64*> %gep.random, i32 8, <2 x i1> %mask, <2 x i64> %src0) + %res = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> %gep.random, i32 8, <2 x i1> %mask, <2 x i64> %src0) ret <2 x i64>%res } @@ -1418,7 +1418,7 @@ define <2 x i64> @test26(i64* %base, <2 x i32> %ind, <2 x i64> %src0) { ; SKX_32-NEXT: retl %sext_ind = sext <2 x i32> %ind to <2 x i64> %gep.random = getelementptr i64, i64* %base, <2 x i64> %sext_ind - %res = call <2 x i64> @llvm.masked.gather.v2i64(<2 x i64*> %gep.random, i32 8, <2 x i1> <i1 true, i1 true>, <2 x i64> %src0) + %res = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> %gep.random, i32 8, <2 x i1> <i1 true, i1 true>, <2 x i64> %src0) ret <2 x i64>%res } @@ -1466,7 +1466,7 @@ define <2 x float> @test27(float* %base, <2 x i32> %ind) { ; SKX_32-NEXT: retl %sext_ind = sext <2 x i32> %ind to <2 x i64> %gep.random = getelementptr float, float* %base, <2 x i64> %sext_ind - %res = call <2 x float> @llvm.masked.gather.v2f32(<2 x float*> %gep.random, i32 4, <2 x i1> <i1 true, i1 true>, <2 x float> undef) + %res = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> %gep.random, i32 4, <2 x i1> <i1 true, i1 true>, <2 x float> undef) ret <2 x float>%res } @@ -1515,7 +1515,7 @@ define void @test28(<2 x i32>%a1, <2 x i32*> %ptr) { ; SKX_32-NEXT: vpscatterqd %xmm0, (,%ymm1) {%k1} ; SKX_32-NEXT: vzeroupper ; SKX_32-NEXT: retl - call void @llvm.masked.scatter.v2i32(<2 x i32> %a1, <2 x i32*> %ptr, i32 4, <2 x i1> <i1 true, i1 true>) + call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> %a1, <2 x i32*> %ptr, i32 4, <2 x i1> <i1 true, i1 true>) ret void } @@ -1568,23 +1568,23 @@ define <16 x float> @test29(float* %base, <16 x i32> %ind) { %sext_ind = sext <16 x i32> %ind to <16 x i64> %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind - %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <16 x float> undef) + %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <16 x float> undef) ret <16 x float>%res } ; Check non-power-of-2 case. It should be scalarized. -declare <3 x i32> @llvm.masked.gather.v3i32(<3 x i32*>, i32, <3 x i1>, <3 x i32>) +declare <3 x i32> @llvm.masked.gather.v3i32.v3p0i32(<3 x i32*>, i32, <3 x i1>, <3 x i32>) define <3 x i32> @test30(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x i32> %src0) { ; ALL-LABEL: test30: ; ALL-NOT: gather %sext_ind = sext <3 x i32> %ind to <3 x i64> %gep.random = getelementptr i32, <3 x i32*> %base, <3 x i64> %sext_ind - %res = call <3 x i32> @llvm.masked.gather.v3i32(<3 x i32*> %gep.random, i32 4, <3 x i1> %mask, <3 x i32> %src0) + %res = call <3 x i32> @llvm.masked.gather.v3i32.v3p0i32(<3 x i32*> %gep.random, i32 4, <3 x i1> %mask, <3 x i32> %src0) ret <3 x i32>%res } -declare <16 x float*> @llvm.masked.gather.v16p0f32(<16 x float**>, i32, <16 x i1>, <16 x float*>) +declare <16 x float*> @llvm.masked.gather.v16p0f32.v16p0p0f32(<16 x float**>, i32, <16 x i1>, <16 x float*>) ; KNL-LABEL: test31 ; KNL: vpgatherqq @@ -1626,7 +1626,7 @@ define <16 x float*> @test31(<16 x float**> %ptrs) { ; SKX_32-NEXT: vmovdqa64 %zmm1, %zmm0 ; SKX_32-NEXT: retl - %res = call <16 x float*> @llvm.masked.gather.v16p0f32(<16 x float**> %ptrs, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float*> undef) + %res = call <16 x float*> @llvm.masked.gather.v16p0f32.v16p0p0f32(<16 x float**> %ptrs, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float*> undef) ret <16 x float*>%res } @@ -1672,7 +1672,7 @@ define <16 x i32> @test_gather_16i32(<16 x i32*> %ptrs, <16 x i1> %mask, <16 x i ; SKX_32-NEXT: vpgatherdd (,%zmm0), %zmm2 {%k1} ; SKX_32-NEXT: vmovdqa64 %zmm2, %zmm0 ; SKX_32-NEXT: retl - %res = call <16 x i32> @llvm.masked.gather.v16i32(<16 x i32*> %ptrs, i32 4, <16 x i1> %mask, <16 x i32> %src0) + %res = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> %ptrs, i32 4, <16 x i1> %mask, <16 x i32> %src0) ret <16 x i32> %res } define <16 x i64> @test_gather_16i64(<16 x i64*> %ptrs, <16 x i1> %mask, <16 x i64> %src0) { @@ -1749,10 +1749,10 @@ define <16 x i64> @test_gather_16i64(<16 x i64*> %ptrs, <16 x i1> %mask, <16 x i ; SKX_32-NEXT: movl %ebp, %esp ; SKX_32-NEXT: popl %ebp ; SKX_32-NEXT: retl - %res = call <16 x i64> @llvm.masked.gather.v16i64(<16 x i64*> %ptrs, i32 4, <16 x i1> %mask, <16 x i64> %src0) + %res = call <16 x i64> @llvm.masked.gather.v16i64.v16p0i64(<16 x i64*> %ptrs, i32 4, <16 x i1> %mask, <16 x i64> %src0) ret <16 x i64> %res } -declare <16 x i64> @llvm.masked.gather.v16i64(<16 x i64*> %ptrs, i32, <16 x i1> %mask, <16 x i64> %src0) +declare <16 x i64> @llvm.masked.gather.v16i64.v16p0i64(<16 x i64*> %ptrs, i32, <16 x i1> %mask, <16 x i64> %src0) define <16 x float> @test_gather_16f32(<16 x float*> %ptrs, <16 x i1> %mask, <16 x float> %src0) { ; KNL_64-LABEL: test_gather_16f32: ; KNL_64: # BB#0: @@ -1795,7 +1795,7 @@ define <16 x float> @test_gather_16f32(<16 x float*> %ptrs, <16 x i1> %mask, <16 ; SKX_32-NEXT: vgatherdps (,%zmm0), %zmm2 {%k1} ; SKX_32-NEXT: vmovaps %zmm2, %zmm0 ; SKX_32-NEXT: retl - %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %ptrs, i32 4, <16 x i1> %mask, <16 x float> %src0) + %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %ptrs, i32 4, <16 x i1> %mask, <16 x float> %src0) ret <16 x float> %res } define <16 x double> @test_gather_16f64(<16 x double*> %ptrs, <16 x i1> %mask, <16 x double> %src0) { @@ -1872,10 +1872,10 @@ define <16 x double> @test_gather_16f64(<16 x double*> %ptrs, <16 x i1> %mask, < ; SKX_32-NEXT: movl %ebp, %esp ; SKX_32-NEXT: popl %ebp ; SKX_32-NEXT: retl - %res = call <16 x double> @llvm.masked.gather.v16f64(<16 x double*> %ptrs, i32 4, <16 x i1> %mask, <16 x double> %src0) + %res = call <16 x double> @llvm.masked.gather.v16f64.v16p0f64(<16 x double*> %ptrs, i32 4, <16 x i1> %mask, <16 x double> %src0) ret <16 x double> %res } -declare <16 x double> @llvm.masked.gather.v16f64(<16 x double*> %ptrs, i32, <16 x i1> %mask, <16 x double> %src0) +declare <16 x double> @llvm.masked.gather.v16f64.v16p0f64(<16 x double*> %ptrs, i32, <16 x i1> %mask, <16 x double> %src0) define void @test_scatter_16i32(<16 x i32*> %ptrs, <16 x i1> %mask, <16 x i32> %src0) { ; KNL_64-LABEL: test_scatter_16i32: ; KNL_64: # BB#0: @@ -1918,7 +1918,7 @@ define void @test_scatter_16i32(<16 x i32*> %ptrs, <16 x i1> %mask, <16 x i32> % ; SKX_32-NEXT: vpscatterdd %zmm2, (,%zmm0) {%k1} ; SKX_32-NEXT: vzeroupper ; SKX_32-NEXT: retl - call void @llvm.masked.scatter.v16i32(<16 x i32> %src0, <16 x i32*> %ptrs, i32 4, <16 x i1> %mask) + call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %src0, <16 x i32*> %ptrs, i32 4, <16 x i1> %mask) ret void } define void @test_scatter_16i64(<16 x i64*> %ptrs, <16 x i1> %mask, <16 x i64> %src0) { @@ -1993,10 +1993,10 @@ define void @test_scatter_16i64(<16 x i64*> %ptrs, <16 x i1> %mask, <16 x i64> % ; SKX_32-NEXT: popl %ebp ; SKX_32-NEXT: vzeroupper ; SKX_32-NEXT: retl - call void @llvm.masked.scatter.v16i64(<16 x i64> %src0, <16 x i64*> %ptrs, i32 4, <16 x i1> %mask) + call void @llvm.masked.scatter.v16i64.v16p0i64(<16 x i64> %src0, <16 x i64*> %ptrs, i32 4, <16 x i1> %mask) ret void } -declare void @llvm.masked.scatter.v16i64(<16 x i64> %src0, <16 x i64*> %ptrs, i32, <16 x i1> %mask) +declare void @llvm.masked.scatter.v16i64.v16p0i64(<16 x i64> %src0, <16 x i64*> %ptrs, i32, <16 x i1> %mask) define void @test_scatter_16f32(<16 x float*> %ptrs, <16 x i1> %mask, <16 x float> %src0) { ; KNL_64-LABEL: test_scatter_16f32: ; KNL_64: # BB#0: @@ -2039,10 +2039,10 @@ define void @test_scatter_16f32(<16 x float*> %ptrs, <16 x i1> %mask, <16 x floa ; SKX_32-NEXT: vscatterdps %zmm2, (,%zmm0) {%k1} ; SKX_32-NEXT: vzeroupper ; SKX_32-NEXT: retl - call void @llvm.masked.scatter.v16f32(<16 x float> %src0, <16 x float*> %ptrs, i32 4, <16 x i1> %mask) + call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %src0, <16 x float*> %ptrs, i32 4, <16 x i1> %mask) ret void } -declare void @llvm.masked.scatter.v16f32(<16 x float> %src0, <16 x float*> %ptrs, i32, <16 x i1> %mask) +declare void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %src0, <16 x float*> %ptrs, i32, <16 x i1> %mask) define void @test_scatter_16f64(<16 x double*> %ptrs, <16 x i1> %mask, <16 x double> %src0) { ; KNL_64-LABEL: test_scatter_16f64: ; KNL_64: # BB#0: @@ -2115,10 +2115,10 @@ define void @test_scatter_16f64(<16 x double*> %ptrs, <16 x i1> %mask, <16 x dou ; SKX_32-NEXT: popl %ebp ; SKX_32-NEXT: vzeroupper ; SKX_32-NEXT: retl - call void @llvm.masked.scatter.v16f64(<16 x double> %src0, <16 x double*> %ptrs, i32 4, <16 x i1> %mask) + call void @llvm.masked.scatter.v16f64.v16p0f64(<16 x double> %src0, <16 x double*> %ptrs, i32 4, <16 x i1> %mask) ret void } -declare void @llvm.masked.scatter.v16f64(<16 x double> %src0, <16 x double*> %ptrs, i32, <16 x i1> %mask) +declare void @llvm.masked.scatter.v16f64.v16p0f64(<16 x double> %src0, <16 x double*> %ptrs, i32, <16 x i1> %mask) define <4 x i64> @test_pr28312(<4 x i64*> %p1, <4 x i1> %k, <4 x i1> %k2,<4 x i64> %d) { ; KNL_64-LABEL: test_pr28312: @@ -2193,11 +2193,11 @@ define <4 x i64> @test_pr28312(<4 x i64*> %p1, <4 x i1> %k, <4 x i1> %k2,<4 x i6 ; SKX_32-NEXT: movl %ebp, %esp ; SKX_32-NEXT: popl %ebp ; SKX_32-NEXT: retl - %g1 = call <4 x i64> @llvm.masked.gather.v4i64(<4 x i64*> %p1, i32 8, <4 x i1> %k, <4 x i64> undef) - %g2 = call <4 x i64> @llvm.masked.gather.v4i64(<4 x i64*> %p1, i32 8, <4 x i1> %k, <4 x i64> undef) - %g3 = call <4 x i64> @llvm.masked.gather.v4i64(<4 x i64*> %p1, i32 8, <4 x i1> %k, <4 x i64> undef) + %g1 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> %p1, i32 8, <4 x i1> %k, <4 x i64> undef) + %g2 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> %p1, i32 8, <4 x i1> %k, <4 x i64> undef) + %g3 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> %p1, i32 8, <4 x i1> %k, <4 x i64> undef) %a = add <4 x i64> %g1, %g2 %b = add <4 x i64> %a, %g3 ret <4 x i64> %b } -declare <4 x i64> @llvm.masked.gather.v4i64(<4 x i64*>, i32, <4 x i1>, <4 x i64>) +declare <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*>, i32, <4 x i1>, <4 x i64>) diff --git a/test/CodeGen/X86/stack-folding-lwp.ll b/test/CodeGen/X86/stack-folding-lwp.ll new file mode 100644 index 000000000000..edf2798ff846 --- /dev/null +++ b/test/CodeGen/X86/stack-folding-lwp.ll @@ -0,0 +1,49 @@ +; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+lwp < %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-unknown" + +; Stack reload folding tests. +; +; By including a nop call with sideeffects we can force a partial register spill of the +; relevant registers and check that the reload is correctly folded into the instruction. + +define i8 @stack_fold_lwpins_u32(i32 %a0, i32 %a1) { +; CHECK-LABEL: stack_fold_lwpins_u32 +; CHECK: # BB#0: +; CHECK: lwpins $2814, {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Folded Reload + %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() + %2 = tail call i8 @llvm.x86.lwpins32(i32 %a0, i32 %a1, i32 2814) + ret i8 %2 +} +declare i8 @llvm.x86.lwpins32(i32, i32, i32) + +define i8 @stack_fold_lwpins_u64(i64 %a0, i32 %a1) { +; CHECK-LABEL: stack_fold_lwpins_u64 +; CHECK: # BB#0: +; CHECK: lwpins $2814, {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 4-byte Folded Reload + %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() + %2 = tail call i8 @llvm.x86.lwpins64(i64 %a0, i32 %a1, i32 2814) + ret i8 %2 +} +declare i8 @llvm.x86.lwpins64(i64, i32, i32) + +define void @stack_fold_lwpval_u32(i32 %a0, i32 %a1) { +; CHECK-LABEL: stack_fold_lwpval_u32 +; CHECK: # BB#0: +; CHECK: lwpval $2814, {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Folded Reload + %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() + tail call void @llvm.x86.lwpval32(i32 %a0, i32 %a1, i32 2814) + ret void +} +declare void @llvm.x86.lwpval32(i32, i32, i32) + +define void @stack_fold_lwpval_u64(i64 %a0, i32 %a1) { +; CHECK-LABEL: stack_fold_lwpval_u64 +; CHECK: # BB#0: +; CHECK: lwpval $2814, {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 4-byte Folded Reload + %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() + tail call void @llvm.x86.lwpval64(i64 %a0, i32 %a1, i32 2814) + ret void +} +declare void @llvm.x86.lwpval64(i64, i32, i32) diff --git a/test/CodeGen/X86/version_directive.ll b/test/CodeGen/X86/version_directive.ll index 8e4e6dc70e61..ac5eda71dbc6 100644 --- a/test/CodeGen/X86/version_directive.ll +++ b/test/CodeGen/X86/version_directive.ll @@ -1,4 +1,5 @@ ; RUN: llc -mtriple x86_64-apple-darwin15.0.0 -o - /dev/null | FileCheck %s ; RUN: llc -mtriple x86_64-apple-macosx10.11.0 -o - /dev/null | FileCheck %s +; RUN: llc -mtriple x86_64-apple-macos10.11.0 -o - /dev/null | FileCheck %s ; CHECK: .macosx_version_min 10, 11 diff --git a/test/CodeGen/X86/x86-32-intrcc.ll b/test/CodeGen/X86/x86-32-intrcc.ll index 9794f2cb3e46..ac0e7e11e0e8 100644 --- a/test/CodeGen/X86/x86-32-intrcc.ll +++ b/test/CodeGen/X86/x86-32-intrcc.ll @@ -57,23 +57,23 @@ define x86_intrcc void @test_isr_ecode(%struct.interrupt_frame* %frame, i32 %eco define x86_intrcc void @test_isr_clobbers(%struct.interrupt_frame* %frame, i32 %ecode) { call void asm sideeffect "", "~{eax},~{ebx},~{ebp}"() ; CHECK-LABEL: test_isr_clobbers - ; CHECK-SSE-NEXT: pushl %ebp - ; CHECK-SSE-NEXT: pushl %ebx - ; CHECK-SSE-NEXT; pushl %eax - ; CHECK-SSE-NEXT: popl %eax - ; CHECK-SSE-NEXT: popl %ebx - ; CHECK-SSE-NEXT: popl %ebp - ; CHECK-SSE-NEXT: addl $4, %esp - ; CHECK-SSE-NEXT: iretl + ; CHECK: pushl %ebp + ; CHECK: pushl %ebx + ; CHECK: pushl %eax + ; CHECK: popl %eax + ; CHECK: popl %ebx + ; CHECK: popl %ebp + ; CHECK: addl $4, %esp + ; CHECK: iretl ; CHECK0-LABEL: test_isr_clobbers - ; CHECK0-SSE-NEXT: pushl %ebp - ; CHECK0-SSE-NEXT: pushl %ebx - ; CHECK0-SSE-NEXT; pushl %eax - ; CHECK0-SSE-NEXT: popl %eax - ; CHECK0-SSE-NEXT: popl %ebx - ; CHECK0-SSE-NEXT: popl %ebp - ; CHECK0-SSE-NEXT: addl $4, %esp - ; CHECK0-SSE-NEXT: iretl + ; CHECK0: pushl %ebp + ; CHECK0: pushl %ebx + ; CHECK0: pushl %eax + ; CHECK0: popl %eax + ; CHECK0: popl %ebx + ; CHECK0: popl %ebp + ; CHECK0: addl $4, %esp + ; CHECK0: iretl ret void } diff --git a/test/CodeGen/X86/x86-64-intrcc.ll b/test/CodeGen/X86/x86-64-intrcc.ll index c8bc9e716ce5..75ca1af79b31 100644 --- a/test/CodeGen/X86/x86-64-intrcc.ll +++ b/test/CodeGen/X86/x86-64-intrcc.ll @@ -59,32 +59,33 @@ define x86_intrcc void @test_isr_ecode(%struct.interrupt_frame* %frame, i64 %eco define x86_intrcc void @test_isr_clobbers(%struct.interrupt_frame* %frame, i64 %ecode) { call void asm sideeffect "", "~{rax},~{rbx},~{rbp},~{r11},~{xmm0}"() ; CHECK-LABEL: test_isr_clobbers - ; CHECK-SSE-NEXT: pushq %rax - ; CHECK-SSE-NEXT: pushq %rax - ; CHECK-SSE-NEXT; pushq %r11 - ; CHECK-SSE-NEXT: pushq %rbp - ; CHECK-SSE-NEXT: pushq %rbx - ; CHECK-SSE-NEXT: movaps %xmm0 - ; CHECK-SSE-NEXT: movaps %xmm0 - ; CHECK-SSE-NEXT: popq %rbx - ; CHECK-SSE-NEXT: popq %rbp - ; CHECK-SSE-NEXT: popq %r11 - ; CHECK-SSE-NEXT: popq %rax - ; CHECK-SSE-NEXT: addq $8, %rsp - ; CHECK-SSE-NEXT: iretq + + ; CHECK: pushq %rax + ; CHECK: pushq %rbp + ; CHECK: pushq %r11 + ; CHECK: pushq %rbx + ; CHECK: movaps %xmm0 + ; CHECK: movaps {{.*}}, %xmm0 + ; CHECK: popq %rbx + ; CHECK: popq %r11 + ; CHECK: popq %rbp + ; CHECK: popq %rax + ; CHECK: addq $16, %rsp + ; CHECK: iretq ; CHECK0-LABEL: test_isr_clobbers - ; CHECK0-SSE-NEXT: pushq %rax - ; CHECK0-SSE-NEXT; pushq %r11 - ; CHECK0-SSE-NEXT: pushq %rbp - ; CHECK0-SSE-NEXT: pushq %rbx - ; CHECK0-SSE-NEXT: movaps %xmm0 - ; CHECK0-SSE-NEXT: movaps %xmm0 - ; CHECK0-SSE-NEXT: popq %rbx - ; CHECK0-SSE-NEXT: popq %rbp - ; CHECK0-SSE-NEXT: popq %r11 - ; CHECK0-SSE-NEXT: popq %rax - ; CHECK0-SSE-NEXT: addq $16, %rsp - ; CHECK0-SSE-NEXT: iretq + + ; CHECK0: pushq %rax + ; CHECK0: pushq %rbp + ; CHECK0: pushq %r11 + ; CHECK0: pushq %rbx + ; CHECK0: movaps %xmm0 + ; CHECK0: movaps {{.*}}, %xmm0 + ; CHECK0: popq %rbx + ; CHECK0: popq %r11 + ; CHECK0: popq %rbp + ; CHECK0: popq %rax + ; CHECK0: addq $16, %rsp + ; CHECK0: iretq ret void } diff --git a/test/CodeGen/X86/x86-no_caller_saved_registers-preserve.ll b/test/CodeGen/X86/x86-no_caller_saved_registers-preserve.ll new file mode 100644 index 000000000000..7e370c25e31b --- /dev/null +++ b/test/CodeGen/X86/x86-no_caller_saved_registers-preserve.ll @@ -0,0 +1,54 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py for function "bar" +; RUN: llc -mtriple=x86_64-unknown-unknown < %s | FileCheck %s + +;; In functions with 'no_caller_saved_registers' attribute, all registers should
+;; be preserved except for registers used for passing/returning arguments. +;; In the following function registers %RDI, %RSI and %XMM0 are used to store +;; arguments %a0, %a1 and %b0 accordingally. The value is returned in %RAX. +;; The above registers should not be preserved, however other registers +;; (that are modified by the function) should be preserved (%RDX and %XMM1). +define x86_64_sysvcc i32 @bar(i32 %a0, i32 %a1, float %b0) #0 { +; CHECK-LABEL: bar: +; CHECK: # BB#0: +; CHECK-NEXT: pushq %rdx +; CHECK-NEXT: .Lcfi0: +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) # 16-byte Spill +; CHECK-NEXT: .Lcfi1: +; CHECK-NEXT: .cfi_offset %rdx, -16 +; CHECK-NEXT: .Lcfi2: +; CHECK-NEXT: .cfi_offset %xmm1, -32 +; CHECK-NEXT: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: movl $4, %eax +; CHECK-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: popq %rdx +; CHECK-NEXT: retq + call void asm sideeffect "", "~{rax},~{rdx},~{xmm1},~{rdi},~{rsi},~{xmm0}"() + ret i32 4 +} + +;; Because "bar" has 'no_caller_saved_registers' attribute, function "foo"
+;; doesn't need to preserve registers except for the arguments passed
+;; to "bar" (%ESI, %EDI and %XMM0). +define x86_64_sysvcc float @foo(i32 %a0, i32 %a1, float %b0) { +; CHECK-LABEL: foo
+; CHECK: movaps %xmm0, %xmm1
+; CHECK-NEXT: movl %esi, %ecx
+; CHECK-NEXT: movl %edi, %edx
+; CHECK-NEXT: callq bar
+; CHECK-NEXT: addl %edx, %eax
+; CHECK-NEXT: addl %ecx, %eax
+; CHECK-NEXT: xorps %xmm0, %xmm0
+; CHECK-NEXT: cvtsi2ssl %eax, %xmm0
+; CHECK-NEXT: addss %xmm0, %xmm1
+; CHECK: retq + %call = call i32 @bar(i32 %a0, i32 %a1, float %b0) #0 + %c0 = add i32 %a0, %call + %c1 = add i32 %c0, %a1 + %c2 = sitofp i32 %c1 to float + %c3 = fadd float %c2, %b0 + ret float %c3 +} + +attributes #0 = { "no_caller_saved_registers" } diff --git a/test/CodeGen/X86/x86-no_caller_saved_registers.ll b/test/CodeGen/X86/x86-no_caller_saved_registers.ll new file mode 100644 index 000000000000..9c62e3ee6ba7 --- /dev/null +++ b/test/CodeGen/X86/x86-no_caller_saved_registers.ll @@ -0,0 +1,31 @@ +; RUN: llc -mtriple=x86_64-unknown-unknown < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-unknown-unknown -O0 < %s | FileCheck %s
+; RUN: llc -mtriple=i686-unknown-unknown -mattr=+sse2 < %s | FileCheck %s
+; RUN: llc -mtriple=i686-unknown-unknown -mattr=+sse2 -O0 < %s | FileCheck %s
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; In functions with 'no_caller_saved_registers' attribute, all registers should
+;; be preserved except for registers used for passing/returning arguments.
+;; The test checks that function "bar" preserves xmm0 register.
+;; It also checks that caller function "foo" does not store registers for callee
+;; "bar". For example, there is no store/load/access to xmm registers.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+define i32 @bar(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %a8) #0 {
+; CHECK-LABEL: bar
+; CHECK: mov{{.*}} %xmm0
+; CHECK: mov{{.*}} {{.*}}, %xmm0
+; CHECK: ret
+ call void asm sideeffect "", "~{xmm0}"()
+ ret i32 1
+}
+
+define x86_intrcc void @foo(i8* nocapture readnone %c) {
+; CHECK-LABEL: foo
+; CHECK-NOT: xmm
+entry:
+ tail call i32 @bar(i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8) #0
+ ret void
+}
+
+attributes #0 = { "no_caller_saved_registers" }
diff --git a/test/DebugInfo/Inputs/dwarfdump-header.elf-x86-64 b/test/DebugInfo/Inputs/dwarfdump-header.elf-x86-64 Binary files differindex 447813419e3e..21c1eacd0714 100644 --- a/test/DebugInfo/Inputs/dwarfdump-header.elf-x86-64 +++ b/test/DebugInfo/Inputs/dwarfdump-header.elf-x86-64 diff --git a/test/DebugInfo/Inputs/dwarfdump-header.s b/test/DebugInfo/Inputs/dwarfdump-header.s index ce51e987f38a..c5cf48597765 100644 --- a/test/DebugInfo/Inputs/dwarfdump-header.s +++ b/test/DebugInfo/Inputs/dwarfdump-header.s @@ -1,5 +1,6 @@ -# Test object to verify dwarfdump handles v4 and v5 CU/TU headers. +# Test object to verify dwarfdump handles v4 and v5 CU/TU/line headers. # We have a representative set of units: v4 CU, v5 CU, v4 TU, v5 split TU. +# We have v4 and v5 line-table headers. # # To generate the test object: # llvm-mc -triple x86_64-unknown-linux dwarfdump-header.s -filetype=obj \ @@ -28,6 +29,8 @@ dwo_TU_5: .byte 0x0e # DW_FORM_strp .byte 0x03 # DW_AT_name .byte 0x0e # DW_FORM_strp + .byte 0x10 # DW_AT_stmt_list + .byte 0x17 # DW_FORM_sec_offset .byte 0x00 # EOM(1) .byte 0x00 # EOM(2) .byte 0x02 # Abbrev code @@ -81,10 +84,11 @@ CU_4_version: .short 4 # DWARF version number .long .debug_abbrev # Offset Into Abbrev. Section .byte 8 # Address Size (in bytes) -# The compile-unit DIE, which has just DW_AT_producer and DW_AT_name. +# The compile-unit DIE, with DW_AT_producer, DW_AT_name, DW_AT_stmt_list. .byte 1 .long str_producer .long str_CU_4 + .long LH_4_start .byte 0 # NULL CU_4_end: @@ -95,10 +99,11 @@ CU_5_version: .byte 1 # DWARF Unit Type .byte 8 # Address Size (in bytes) .long .debug_abbrev # Offset Into Abbrev. Section -# The compile-unit DIE, which has just DW_AT_producer and DW_AT_name. +# The compile-unit DIE, with DW_AT_producer, DW_AT_name, DW_AT_stmt_list. .byte 1 .long str_producer .long str_CU_5 + .long LH_5_start .byte 0 # NULL CU_5_end: @@ -147,3 +152,106 @@ TU_split_5_type: .byte 0 # NULL .byte 0 # NULL TU_split_5_end: + + .section .debug_line,"",@progbits +# DWARF v4 line-table header. +LH_4_start: + .long LH_4_end-LH_4_version # Length of Unit +LH_4_version: + .short 4 # DWARF version number + .long LH_4_header_end-LH_4_params # Length of Prologue +LH_4_params: + .byte 1 # Minimum Instruction Length + .byte 1 # Maximum Operations per Instruction + .byte 1 # Default is_stmt + .byte -5 # Line Base + .byte 14 # Line Range + .byte 13 # Opcode Base + .byte 0 # Standard Opcode Lengths + .byte 1 + .byte 1 + .byte 1 + .byte 1 + .byte 0 + .byte 0 + .byte 0 + .byte 1 + .byte 0 + .byte 0 + .byte 1 + # Directory table + .asciz "Directory4a" + .asciz "Directory4b" + .byte 0 + # File table + .asciz "File4a" # File name 1 + .byte 1 # Directory index 1 + .byte 0x41 # Timestamp 1 + .byte 0x42 # File Size 1 + .asciz "File4b" # File name 2 + .byte 0 # Directory index 2 + .byte 0x43 # Timestamp 2 + .byte 0x44 # File Size 2 + .byte 0 # End of list +LH_4_header_end: + # Line number program, which is empty. +LH_4_end: + +# DWARF v5 line-table header. +LH_5_start: + .long LH_5_end-LH_5_version # Length of Unit +LH_5_version: + .short 5 # DWARF version number + .byte 8 # Address Size + .byte 0 # Segment Selector Size + .long LH_5_header_end-LH_5_params # Length of Prologue +LH_5_params: + .byte 1 # Minimum Instruction Length + .byte 1 # Maximum Operations per Instruction + .byte 1 # Default is_stmt + .byte -5 # Line Base + .byte 14 # Line Range + .byte 13 # Opcode Base + .byte 0 # Standard Opcode Lengths + .byte 1 + .byte 1 + .byte 1 + .byte 1 + .byte 0 + .byte 0 + .byte 0 + .byte 1 + .byte 0 + .byte 0 + .byte 1 + # Directory table format + .byte 1 # One element per directory entry + .byte 1 # DW_LNCT_path + .byte 0x08 # DW_FORM_string + # Directory table entries + .byte 2 # Two directories + .asciz "Directory5a" + .asciz "Directory5b" + # File table format + .byte 4 # Four elements per file entry + .byte 1 # DW_LNCT_path + .byte 0x08 # DW_FORM_string + .byte 2 # DW_LNCT_directory_index + .byte 0x0b # DW_FORM_data1 + .byte 3 # DW_LNCT_timestamp + .byte 0x0f # DW_FORM_udata + .byte 4 # DW_LNCT_size + .byte 0x0f # DW_FORM_udata + # File table entries + .byte 2 # Two files + .asciz "File5a" + .byte 1 + .byte 0x51 + .byte 0x52 + .asciz "File5b" + .byte 2 + .byte 0x53 + .byte 0x54 +LH_5_header_end: + # Line number program, which is empty. +LH_5_end: diff --git a/test/DebugInfo/dwarfdump-header.test b/test/DebugInfo/dwarfdump-header.test index 3947c8b438d2..222e506dac37 100644 --- a/test/DebugInfo/dwarfdump-header.test +++ b/test/DebugInfo/dwarfdump-header.test @@ -7,13 +7,13 @@ CHECK-LABEL: .debug_info contents: The v4 CU header. -CHECK: 0x00000000: Compile Unit: length = 0x00000011 version = 0x0004 abbr_offset = 0x0000 addr_size = 0x08 (next unit at 0x00000015) +CHECK: 0x00000000: Compile Unit: length = 0x00000015 version = 0x0004 abbr_offset = 0x0000 addr_size = 0x08 (next unit at 0x00000019) CHECK: 0x0000000b: DW_TAG_compile_unit The v5 normal CU header. -CHECK: 0x00000015: Compile Unit: length = 0x00000012 version = 0x0005 unit_type = DW_UT_compile abbr_offset = 0x0000 addr_size = 0x08 (next unit at 0x0000002b) -CHECK: 0x00000021: DW_TAG_compile_unit +CHECK: 0x00000019: Compile Unit: length = 0x00000016 version = 0x0005 unit_type = DW_UT_compile abbr_offset = 0x0000 addr_size = 0x08 (next unit at 0x00000033) +CHECK: 0x00000025: DW_TAG_compile_unit CHECK-LABEL: .debug_types contents: @@ -27,3 +27,33 @@ CHECK: .debug_types.dwo contents: CHECK: 0x00000000: Type Unit: length = 0x00000020 version = 0x0005 unit_type = DW_UT_split_type abbr_offset = 0x0000 addr_size = 0x08 name = 'V5_split_type_unit' type_signature = 0x8899aabbccddeeff type_offset = 0x001d (next unit at 0x00000024) CHECK: 0x00000018: DW_TAG_type_unit + +CHECK-LABEL: .debug_line contents: + +The v4 line table header. + +CHECK: Line table prologue: +CHECK: version: 4 +CHECK-NOT: address_size +CHECK-NOT: seg_select_size +CHECK: max_ops_per_inst: 1 +CHECK: include_directories[ 1] = 'Directory4a' +CHECK: include_directories[ 2] = 'Directory4b' +CHECK-NOT: include_directories +CHECK: file_names[ 1] 1 0x00000041 0x00000042 File4a{{$}} +CHECK: file_names[ 2] 0 0x00000043 0x00000044 File4b{{$}} +CHECK-NOT: file_names + +The v5 line table header. + +CHECK: Line table prologue: +CHECK: version: 5 +CHECK: address_size: 8 +CHECK: seg_select_size: 0 +CHECK: max_ops_per_inst: 1 +CHECK: include_directories[ 1] = 'Directory5a' +CHECK: include_directories[ 2] = 'Directory5b' +CHECK-NOT: include_directories +CHECK: file_names[ 1] 1 0x00000051 0x00000052 File5a{{$}} +CHECK: file_names[ 2] 2 0x00000053 0x00000054 File5b{{$}} +CHECK-NOT: file_names diff --git a/test/Feature/intrinsics.ll b/test/Feature/intrinsics.ll index 278cb9564e62..bbf30d3cc231 100644 --- a/test/Feature/intrinsics.ll +++ b/test/Feature/intrinsics.ll @@ -69,5 +69,5 @@ define void @trap() { ret void } -; CHECK: attributes #0 = { nounwind readnone } +; CHECK: attributes #0 = { nounwind readnone speculatable } ; CHECK: attributes #1 = { noreturn nounwind } diff --git a/test/MC/AArch64/arm32-large-relocs.s b/test/MC/AArch64/arm32-large-relocs.s deleted file mode 100644 index 1ac86c0871a6..000000000000 --- a/test/MC/AArch64/arm32-large-relocs.s +++ /dev/null @@ -1,31 +0,0 @@ -// RUN: llvm-mc -target-abi=ilp32 -triple=arm64-linux-gnu -show-encoding -o - \ -// RUN: %s \ -// RUN: | FileCheck %s -// RUN: llvm-mc -target-abi=ilp32 -triple=arm64-linux-gnu -show-encoding \ -// RUN: -filetype=obj -o - %s \ -// RUN: | llvm-objdump -r - \ -// RUN: | FileCheck --check-prefix=CHECK-OBJ %s - - movz x2, #:abs_g0:sym - movk w3, #:abs_g0_nc:sym - movz x13, #:abs_g0_s:sym - movn x17, #:abs_g0_s:sym -// CHECK: movz x2, #:abs_g0:sym // encoding: [0bAAA00010,A,0b100AAAAA,0xd2] -// CHECK-NEXT: // fixup A - offset: 0, value: :abs_g0:sym, kind: fixup_aarch64_movw -// CHECK: movk w3, #:abs_g0_nc:sym // encoding: [0bAAA00011,A,0b100AAAAA,0x72] -// CHECK-NEXT: // fixup A - offset: 0, value: :abs_g0_nc:sym, kind: fixup_aarch64_movw -// CHECK: movz x13, #:abs_g0_s:sym // encoding: [0bAAA01101,A,0b100AAAAA,0xd2] -// CHECK-NEXT: // fixup A - offset: 0, value: :abs_g0_s:sym, kind: fixup_aarch64_movw -// CHECK: movn x17, #:abs_g0_s:sym // encoding: [0bAAA10001,A,0b100AAAAA,0x92] -// CHECK-NEXT: // fixup A - offset: 0, value: :abs_g0_s:sym, kind: fixup_aarch64_movw - -// CHECK-OBJ: 0 R_AARCH64_P32_MOVW_UABS_G0 sym -// CHECK-OBJ: 4 R_AARCH64_P32_MOVW_UABS_G0_NC sym -// CHECK-OBJ: 8 R_AARCH64_P32_MOVW_SABS_G0 sym -// CHECK-OBJ: c R_AARCH64_P32_MOVW_SABS_G0 sym - - movz x4, #:abs_g1:sym -// CHECK: movz x4, #:abs_g1:sym // encoding: [0bAAA00100,A,0b101AAAAA,0xd2] -// CHECK-NEXT: // fixup A - offset: 0, value: :abs_g1:sym, kind: fixup_aarch64_movw - -// CHECK-OBJ: 10 R_AARCH64_P32_MOVW_UABS_G1 sym diff --git a/test/MC/AArch64/arm32-tls-relocs.s b/test/MC/AArch64/arm32-tls-relocs.s deleted file mode 100644 index 390da05529fa..000000000000 --- a/test/MC/AArch64/arm32-tls-relocs.s +++ /dev/null @@ -1,290 +0,0 @@ -// RUN: llvm-mc -target-abi=ilp32 -triple=arm64-none-linux-gnu \ -// RUN: -show-encoding < %s | FileCheck --check-prefix=CHECK-ILP32 %s -// RUN: llvm-mc -target-abi=ilp32 -triple=arm64-none-linux-gnu \ -// RUN: -filetype=obj < %s -o - | \ -// RUN: llvm-readobj -r -t | FileCheck --check-prefix=CHECK-ELF-ILP32 %s - -//////////////////////////////////////////////////////////////////////////////// -// TLS initial-exec forms -//////////////////////////////////////////////////////////////////////////////// - - adrp x11, :gottprel:var - ldr w10, [x0, #:gottprel_lo12:var] - ldr w9, :gottprel:var -// CHECK-ILP32: adrp x11, :gottprel:var // encoding: [0x0b'A',A,A,0x90'A'] -// CHECK-ILP32-NEXT: // fixup A - offset: 0, value: :gottprel:var, kind: fixup_aarch64_pcrel_adrp_imm21 -// CHECK-ILP32: ldr w10, [x0, :gottprel_lo12:var] // encoding: [0x0a,0bAAAAAA00,0b01AAAAAA,0xb9] -// CHECK-ILP32-NEXT: // fixup A - offset: 0, value: :gottprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale4 -// CHECK-ILP32: ldr w9, :gottprel:var // encoding: [0bAAA01001,A,A,0x18] -// CHECK-ILP32-NEXT: // fixup A - offset: 0, value: :gottprel:var, kind: fixup_aarch64_ldr_pcrel_imm19 - -// CHECK-ELF-ILP32: {{0x[0-9A-F]+}} R_AARCH64_P32_TLSIE_ADR_GOTTPREL_PAGE21 [[VARSYM:[^ ]+]] -// CHECK-ELF-ILP32-NEXT: {{0x[0-9A-F]+}} R_AARCH64_P32_TLSIE_LD32_GOTTPREL_LO12_NC [[VARSYM]] -// CHECK-ELF-ILP32-NEXT: {{0x[0-9A-F]+}} R_AARCH64_P32_TLSIE_LD_GOTTPREL_PREL19 [[VARSYM]] - - -//////////////////////////////////////////////////////////////////////////////// -// TLS local-exec forms -//////////////////////////////////////////////////////////////////////////////// - - movz x5, #:tprel_g1:var - movn x6, #:tprel_g1:var - movz w7, #:tprel_g1:var -// CHECK-ILP32: movz x5, #:tprel_g1:var // encoding: [0bAAA00101,A,0b101AAAAA,0x92] -// CHECK-ILP32-NEXT: // fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_aarch64_movw -// CHECK-ILP32: movn x6, #:tprel_g1:var // encoding: [0bAAA00110,A,0b101AAAAA,0x92] -// CHECK-ILP32-NEXT: // fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_aarch64_movw -// CHECK-ILP32: movz w7, #:tprel_g1:var // encoding: [0bAAA00111,A,0b101AAAAA,0x12] -// CHECK-ILP32-NEXT: // fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_aarch64_movw - -// CHECK-ELF-ILP32: {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLE_MOVW_TPREL_G1 [[VARSYM]] -// CHECK-ELF-ILP32: {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLE_MOVW_TPREL_G1 [[VARSYM]] -// CHECK-ELF-ILP32: {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLE_MOVW_TPREL_G1 [[VARSYM]] - - - movz x11, #:tprel_g0:var - movn x12, #:tprel_g0:var - movz w13, #:tprel_g0:var -// CHECK-ILP32: movz x11, #:tprel_g0:var // encoding: [0bAAA01011,A,0b100AAAAA,0x92] -// CHECK-ILP32-NEXT: // fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_aarch64_movw -// CHECK-ILP32: movn x12, #:tprel_g0:var // encoding: [0bAAA01100,A,0b100AAAAA,0x92] -// CHECK-ILP32-NEXT: // fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_aarch64_movw -// CHECK-ILP32: movz w13, #:tprel_g0:var // encoding: [0bAAA01101,A,0b100AAAAA,0x12] -// CHECK-ILP32-NEXT: // fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_aarch64_movw - -// CHECK-ELF-ILP32-NEXT: {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLE_MOVW_TPREL_G0 [[VARSYM]] -// CHECK-ELF-ILP32-NEXT: {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLE_MOVW_TPREL_G0 [[VARSYM]] -// CHECK-ELF-ILP32-NEXT: {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLE_MOVW_TPREL_G0 [[VARSYM]] - - - movk w15, #:tprel_g0_nc:var - movk w16, #:tprel_g0_nc:var -// CHECK-ILP32: movk w15, #:tprel_g0_nc:var // encoding: [0bAAA01111,A,0b100AAAAA,0x72] -// CHECK-ILP32-NEXT: // fixup A - offset: 0, value: :tprel_g0_nc:var, kind: fixup_aarch64_movw -// CHECK-ILP32: movk w16, #:tprel_g0_nc:var // encoding: [0bAAA10000,A,0b100AAAAA,0x72] -// CHECK-ILP32-NEXT: // fixup A - offset: 0, value: :tprel_g0_nc:var, kind: fixup_aarch64_movw - -// CHECK-ELF-ILP32-NEXT: {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLE_MOVW_TPREL_G0_NC [[VARSYM]] -// CHECK-ELF-ILP32-NEXT: {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLE_MOVW_TPREL_G0_NC [[VARSYM]] - - - add x21, x22, #:tprel_lo12:var -// CHECK-ILP32: add x21, x22, :tprel_lo12:var // encoding: [0xd5,0bAAAAAA10,0b00AAAAAA,0x91] -// CHECK-ILP32-NEXT: // fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_aarch64_add_imm12 - -// CHECK-ELF-ILP32-NEXT: {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLE_ADD_TPREL_LO12 [[VARSYM]] - - - add x25, x26, #:tprel_lo12_nc:var -// CHECK-ILP32: add x25, x26, :tprel_lo12_nc:var // encoding: [0x59,0bAAAAAA11,0b00AAAAAA,0x91] -// CHECK-ILP32-NEXT: // fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_aarch64_add_imm12 - -// CHECK-ELF-ILP32-NEXT: {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLE_ADD_TPREL_LO12_NC [[VARSYM]] - - - ldrb w29, [x30, #:tprel_lo12:var] - ldrsb x29, [x28, #:tprel_lo12_nc:var] -// CHECK-ILP32: ldrb w29, [x30, :tprel_lo12:var] // encoding: [0xdd,0bAAAAAA11,0b01AAAAAA,0x39] -// CHECK-ILP32-NEXT: // fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale1 -// CHECK-ILP32: ldrsb x29, [x28, :tprel_lo12_nc:var] // encoding: [0x9d,0bAAAAAA11,0b10AAAAAA,0x39] -// CHECK-ILP32-NEXT: // fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale1 - -// CHECK-ELF-ILP32-NEXT: {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLE_LDST8_TPREL_LO12 [[VARSYM]] -// CHECK-ELF-ILP32-NEXT: {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLE_LDST8_TPREL_LO12_NC [[VARSYM]] - - - strh w27, [x26, #:tprel_lo12:var] - ldrsh x25, [x24, #:tprel_lo12_nc:var] -// CHECK-ILP32: strh w27, [x26, :tprel_lo12:var] // encoding: [0x5b,0bAAAAAA11,0b00AAAAAA,0x79] -// CHECK-ILP32-NEXT: // fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale2 -// CHECK-ILP32: ldrsh x25, [x24, :tprel_lo12_nc:var] // encoding: [0x19,0bAAAAAA11,0b10AAAAAA,0x79] -// CHECK-ILP32-NEXT: // fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale2 - -// CHECK-ELF-ILP32-NEXT: {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLE_LDST16_TPREL_LO12 [[VARSYM]] -// CHECK-ELF-ILP32-NEXT: {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLE_LDST16_TPREL_LO12_NC [[VARSYM]] - - - ldr w23, [x22, #:tprel_lo12:var] - ldrsw x21, [x20, #:tprel_lo12_nc:var] -// CHECK-ILP32: ldr w23, [x22, :tprel_lo12:var] // encoding: [0xd7,0bAAAAAA10,0b01AAAAAA,0xb9] -// CHECK-ILP32-NEXT: // fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale4 -// CHECK-ILP32: ldrsw x21, [x20, :tprel_lo12_nc:var] // encoding: [0x95,0bAAAAAA10,0b10AAAAAA,0xb9] -// CHECK-ILP32-NEXT: // fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale4 - -// CHECK-ELF-ILP32-NEXT: {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLE_LDST32_TPREL_LO12 [[VARSYM]] -// CHECK-ELF-ILP32-NEXT: {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLE_LDST32_TPREL_LO12_NC [[VARSYM]] - - ldr x19, [x18, #:tprel_lo12:var] - str x17, [x16, #:tprel_lo12_nc:var] -// CHECK-ILP32: ldr x19, [x18, :tprel_lo12:var] // encoding: [0x53,0bAAAAAA10,0b01AAAAAA,0xf9] -// CHECK-ILP32-NEXT: // fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale8 -// CHECK-ILP32: str x17, [x16, :tprel_lo12_nc:var] // encoding: [0x11,0bAAAAAA10,0b00AAAAAA,0xf9] -// CHECK-ILP32-NEXT: // fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale8 - -// CHECK-ELF-ILP32-NEXT: {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLE_LDST64_TPREL_LO12 [[VARSYM]] -// CHECK-ELF-ILP32-NEXT: {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLE_LDST64_TPREL_LO12_NC [[VARSYM]] - - - ldr q24, [x23, :tprel_lo12:var] - str q22, [x21, :tprel_lo12_nc:var] -// CHECK-ILP32: ldr q24, [x23, :tprel_lo12:var] // encoding: [0xf8,0bAAAAAA10,0b11AAAAAA,0x3d] -// CHECK-ILP32-NEXT: // fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale16 -// CHECK-ILP32: str q22, [x21, :tprel_lo12_nc:var] // encoding: [0xb6,0bAAAAAA10,0b10AAAAAA,0x3d] -// CHECK-ILP32-NEXT: // fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale16 - -// CHECK-ELF-ILP32-NEXT: {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLE_LDST128_TPREL_LO12 [[VARSYM]] -// CHECK-ELF-ILP32-NEXT: {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLE_LDST128_TPREL_LO12_NC [[VARSYM]] - -//////////////////////////////////////////////////////////////////////////////// -// TLS local-dynamic forms -//////////////////////////////////////////////////////////////////////////////// - - movz x5, #:dtprel_g1:var - movn x6, #:dtprel_g1:var - movz w7, #:dtprel_g1:var -// CHECK-ILP32: movz x5, #:dtprel_g1:var // encoding: [0bAAA00101,A,0b101AAAAA,0x92] -// CHECK-ILP32-NEXT: // fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_aarch64_movw -// CHECK-ILP32: movn x6, #:dtprel_g1:var // encoding: [0bAAA00110,A,0b101AAAAA,0x92] -// CHECK-ILP32-NEXT: // fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_aarch64_movw -// CHECK-ILP32: movz w7, #:dtprel_g1:var // encoding: [0bAAA00111,A,0b101AAAAA,0x12] -// CHECK-ILP32-NEXT: // fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_aarch64_movw - -// CHECK-ELF-ILP32-NEXT: {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLD_MOVW_DTPREL_G1 [[VARSYM]] -// CHECK-ELF-ILP32-NEXT: {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLD_MOVW_DTPREL_G1 [[VARSYM]] -// CHECK-ELF-ILP32-NEXT: {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLD_MOVW_DTPREL_G1 [[VARSYM]] - - - movz x11, #:dtprel_g0:var - movn x12, #:dtprel_g0:var - movz w13, #:dtprel_g0:var -// CHECK-ILP32: movz x11, #:dtprel_g0:var // encoding: [0bAAA01011,A,0b100AAAAA,0x92] -// CHECK-ILP32-NEXT: // fixup A - offset: 0, value: :dtprel_g0:var, kind: fixup_aarch64_movw -// CHECK-ILP32: movn x12, #:dtprel_g0:var // encoding: [0bAAA01100,A,0b100AAAAA,0x92] -// CHECK-ILP32-NEXT: // fixup A - offset: 0, value: :dtprel_g0:var, kind: fixup_aarch64_movw -// CHECK-ILP32: movz w13, #:dtprel_g0:var // encoding: [0bAAA01101,A,0b100AAAAA,0x12] -// CHECK-ILP32-NEXT: // fixup A - offset: 0, value: :dtprel_g0:var, kind: fixup_aarch64_movw - -// CHECK-ELF-ILP32-NEXT: {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLD_MOVW_DTPREL_G0 [[VARSYM]] -// CHECK-ELF-ILP32-NEXT: {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLD_MOVW_DTPREL_G0 [[VARSYM]] -// CHECK-ELF-ILP32-NEXT: {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLD_MOVW_DTPREL_G0 [[VARSYM]] - - - movk x15, #:dtprel_g0_nc:var - movk w16, #:dtprel_g0_nc:var -// CHECK-ILP32: movk x15, #:dtprel_g0_nc:var // encoding: [0bAAA01111,A,0b100AAAAA,0xf2] -// CHECK-ILP32-NEXT: // fixup A - offset: 0, value: :dtprel_g0_nc:var, kind: fixup_aarch64_movw -// CHECK-ILP32: movk w16, #:dtprel_g0_nc:var // encoding: [0bAAA10000,A,0b100AAAAA,0x72] -// CHECK-ILP32-NEXT: // fixup A - offset: 0, value: :dtprel_g0_nc:var, kind: fixup_aarch64_movw - -// CHECK-ELF-ILP32-NEXT: {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLD_MOVW_DTPREL_G0_NC [[VARSYM]] -// CHECK-ELF-ILP32-NEXT: {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLD_MOVW_DTPREL_G0_NC [[VARSYM]] - - - add x21, x22, #:dtprel_lo12:var -// CHECK-ILP32: add x21, x22, :dtprel_lo12:var // encoding: [0xd5,0bAAAAAA10,0b00AAAAAA,0x91] -// CHECK-ILP32-NEXT: // fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_aarch64_add_imm12 - -// CHECK-ELF-ILP32-NEXT: {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLD_ADD_DTPREL_LO12 [[VARSYM]] - - - add x25, x26, #:dtprel_lo12_nc:var -// CHECK-ILP32: add x25, x26, :dtprel_lo12_nc:var // encoding: [0x59,0bAAAAAA11,0b00AAAAAA,0x91] -// CHECK-ILP32-NEXT: // fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_aarch64_add_imm12 - -// CHECK-ELF-ILP32-NEXT: {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLD_ADD_DTPREL_LO12_NC [[VARSYM]] - - - add x0, x0, #:dtprel_hi12:var_tlsld, lsl #12 - add x0, x0, #:tprel_hi12:var_tlsle, lsl #12 - -// CHECK-ELF-ILP32: R_AARCH64_P32_TLSLD_ADD_DTPREL_HI12 var_tlsld -// CHECK-ELF-ILP32: R_AARCH64_P32_TLSLE_ADD_TPREL_HI12 var_tlsle - - - ldrb w29, [x30, #:dtprel_lo12:var] - ldrsb x29, [x28, #:dtprel_lo12_nc:var] -// CHECK-ILP32: ldrb w29, [x30, :dtprel_lo12:var] // encoding: [0xdd,0bAAAAAA11,0b01AAAAAA,0x39] -// CHECK-ILP32-NEXT: // fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale1 -// CHECK-ILP32: ldrsb x29, [x28, :dtprel_lo12_nc:var] // encoding: [0x9d,0bAAAAAA11,0b10AAAAAA,0x39] -// CHECK-ILP32-NEXT: // fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale1 - -// CHECK-ELF-ILP32-NEXT: {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLD_LDST8_DTPREL_LO12 [[VARSYM]] -// CHECK-ELF-ILP32-NEXT: {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLD_LDST8_DTPREL_LO12_NC [[VARSYM]] - - - strh w27, [x26, #:dtprel_lo12:var] - ldrsh x25, [x24, #:dtprel_lo12_nc:var] -// CHECK-ILP32: strh w27, [x26, :dtprel_lo12:var] // encoding: [0x5b,0bAAAAAA11,0b00AAAAAA,0x79] -// CHECK-ILP32-NEXT: // fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale2 -// CHECK-ILP32: ldrsh x25, [x24, :dtprel_lo12_nc:var] // encoding: [0x19,0bAAAAAA11,0b10AAAAAA,0x79] -// CHECK-ILP32-NEXT: // fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale2 - -// CHECK-ELF-ILP32-NEXT: {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLD_LDST16_DTPREL_LO12 [[VARSYM]] -// CHECK-ELF-ILP32-NEXT: {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLD_LDST16_DTPREL_LO12_NC [[VARSYM]] - - - ldr w23, [x22, #:dtprel_lo12:var] - ldrsw x21, [x20, #:dtprel_lo12_nc:var] -// CHECK-ILP32: ldr w23, [x22, :dtprel_lo12:var] // encoding: [0xd7,0bAAAAAA10,0b01AAAAAA,0xb9] -// CHECK-ILP32-NEXT: // fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale4 -// CHECK-ILP32: ldrsw x21, [x20, :dtprel_lo12_nc:var] // encoding: [0x95,0bAAAAAA10,0b10AAAAAA,0xb9] -// CHECK-ILP32-NEXT: // fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale4 - -// CHECK-ELF-ILP32-NEXT: {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLD_LDST32_DTPREL_LO12 [[VARSYM]] -// CHECK-ELF-ILP32-NEXT: {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLD_LDST32_DTPREL_LO12_NC [[VARSYM]] - - ldr x19, [x18, #:dtprel_lo12:var] - str x17, [x16, #:dtprel_lo12_nc:var] -// CHECK-ILP32: ldr x19, [x18, :dtprel_lo12:var] // encoding: [0x53,0bAAAAAA10,0b01AAAAAA,0xf9] -// CHECK-ILP32-NEXT: // fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale8 -// CHECK-ILP32: str x17, [x16, :dtprel_lo12_nc:var] // encoding: [0x11,0bAAAAAA10,0b00AAAAAA,0xf9] -// CHECK-ILP32-NEXT: // fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale8 - -// CHECK-ELF-ILP32-NEXT: {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLD_LDST64_DTPREL_LO12 [[VARSYM]] -// CHECK-ELF-ILP32-NEXT: {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLD_LDST64_DTPREL_LO12_NC [[VARSYM]] - - ldr q24, [x23, #:dtprel_lo12:var] - str q22, [x21, #:dtprel_lo12_nc:var] -// CHECK-ILP32: ldr q24, [x23, :dtprel_lo12:var] // encoding: [0xf8,0bAAAAAA10,0b11AAAAAA,0x3d] -// CHECK-ILP32-NEXT: // fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_aarch64_ldst_imm12_scale16 -// CHECK-ILP32: str q22, [x21, :dtprel_lo12_nc:var] // encoding: [0xb6,0bAAAAAA10,0b10AAAAAA,0x3d] -// CHECK-ILP32-NEXT: // fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_aarch64_ldst_imm12_scale16 - -// CHECK-ELF-ILP32-NEXT: {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLD_LDST128_DTPREL_LO12 [[VARSYM]] -// CHECK-ELF-ILP32-NEXT: {{0x[0-9A-F]+}} R_AARCH64_P32_TLSLD_LDST128_DTPREL_LO12_NC [[VARSYM]] - -//////////////////////////////////////////////////////////////////////////////// -// TLS descriptor forms -//////////////////////////////////////////////////////////////////////////////// - - adrp x8, :tlsdesc:var - ldr w7, [x6, #:tlsdesc_lo12:var] - add x5, x4, #:tlsdesc_lo12:var - .tlsdesccall var - blr x3 - -// CHECK-ILP32: adrp x8, :tlsdesc:var // encoding: [0x08'A',A,A,0x90'A'] -// CHECK-ILP32-NEXT: // fixup A - offset: 0, value: :tlsdesc:var, kind: fixup_aarch64_pcrel_adrp_imm21 -// CHECK-ILP32: ldr w7, [x6, :tlsdesc_lo12:var] // encoding: [0xc7,0bAAAAAA00,0b01AAAAAA,0xb9] -// CHECK-ILP32-NEXT: // fixup A - offset: 0, value: :tlsdesc_lo12:var, kind: fixup_aarch64_ldst_imm12_scale4 -// CHECK-ILP32: add x5, x4, :tlsdesc_lo12:var // encoding: [0x85,0bAAAAAA00,0b00AAAAAA,0x91] -// CHECK-ILP32-NEXT: // fixup A - offset: 0, value: :tlsdesc_lo12:var, kind: fixup_aarch64_add_imm12 -// CHECK-ILP32: .tlsdesccall var // encoding: [] -// CHECK-ILP32-NEXT: // fixup A - offset: 0, value: var, kind: fixup_aarch64_tlsdesc_call -// CHECK-ILP32: blr x3 // encoding: [0x60,0x00,0x3f,0xd6] - - -// CHECK-ELF-ILP32-NEXT: {{0x[0-9A-F]+}} R_AARCH64_P32_TLSDESC_ADR_PAGE21 [[VARSYM]] -// CHECK-ELF-ILP32-NEXT: {{0x[0-9A-F]+}} R_AARCH64_P32_TLSDESC_LD32_LO12 [[VARSYM]] -// CHECK-ELF-ILP32-NEXT: {{0x[0-9A-F]+}} R_AARCH64_P32_TLSDESC_ADD_LO12 [[VARSYM]] -// CHECK-ELF-ILP32-NEXT: {{0x[0-9A-F]+}} R_AARCH64_P32_TLSDESC_CALL [[VARSYM]] - - // Make sure symbol 5 has type STT_TLS: - -// CHECK-ELF-ILP32: Symbols [ -// CHECK-ELF-ILP32: Symbol { -// CHECK-ELF-ILP32: Name: var -// CHECK-ELF-ILP32-NEXT: Value: -// CHECK-ELF-ILP32-NEXT: Size: -// CHECK-ELF-ILP32-NEXT: Binding: Global -// CHECK-ELF-ILP32-NEXT: Type: TLS diff --git a/test/MC/AArch64/elf-reloc-pcreladdressing-ilp32.s b/test/MC/AArch64/elf-reloc-pcreladdressing-ilp32.s deleted file mode 100644 index c08192e7e0d6..000000000000 --- a/test/MC/AArch64/elf-reloc-pcreladdressing-ilp32.s +++ /dev/null @@ -1,17 +0,0 @@ -// RUN: llvm-mc -target-abi=ilp32 -triple=aarch64-none-linux-gnu \ -// RUN: -filetype=obj %s -o - | \ -// RUN: llvm-readobj -r | FileCheck -check-prefix=OBJ-ILP32 %s - adr x2, some_label - adrp x5, some_label - - adrp x5, :got:some_label - ldr w0, [x5, #:got_lo12:some_label] - -// OBJ-ILP32: Relocations [ -// OBJ-ILP32-NEXT: Section {{.*}} .rela.text { -// OBJ-ILP32-NEXT: 0x0 R_AARCH64_P32_ADR_PREL_LO21 some_label 0x0 -// OBJ-ILP32-NEXT: 0x4 R_AARCH64_P32_ADR_PREL_PG_HI21 some_label 0x0 -// OBJ-ILP32-NEXT: 0x8 R_AARCH64_P32_ADR_GOT_PAGE some_label 0x0 -// OBJ-ILP32-NEXT: 0xC R_AARCH64_P32_LD32_GOT_LO12_NC some_label 0x0 -// OBJ-ILP32-NEXT: } -// OBJ-ILP32-NEXT: ] diff --git a/test/MC/AArch64/lp64-diagnostics.s b/test/MC/AArch64/lp64-diagnostics.s deleted file mode 100644 index 942923ffccb1..000000000000 --- a/test/MC/AArch64/lp64-diagnostics.s +++ /dev/null @@ -1,13 +0,0 @@ -// RUN: not llvm-mc -triple aarch64-none-linux-gnu < %s 2> %t2 -filetype=obj \ -// RUN: >/dev/null -// RUN: FileCheck --check-prefix=CHECK-ERROR %s < %t2 - - ldr w24, [x23, :tlsdesc_lo12:sym] - ldr s22, [x21, :tlsdesc_lo12:sym] - -// CHECK-ERROR: error: LP64 4 byte TLSDESC load/store relocation not supported (ILP32 eqv: TLSDESC_LD64_LO12) -// CHECK-ERROR: ldr w24, [x23, :tlsdesc_lo12:sym] -// CHECK-ERROR: ^ -// CHECK-ERROR: error: LP64 4 byte TLSDESC load/store relocation not supported (ILP32 eqv: TLSDESC_LD64_LO12) -// CHECK-ERROR: ldr s22, [x21, :tlsdesc_lo12:sym] -// CHECK-ERROR: ^ diff --git a/test/MC/Disassembler/X86/x86-32.txt b/test/MC/Disassembler/X86/x86-32.txt index 9dd49e51d91b..1b865d37bf0b 100644 --- a/test/MC/Disassembler/X86/x86-32.txt +++ b/test/MC/Disassembler/X86/x86-32.txt @@ -773,3 +773,21 @@ #CHECK: getsec 0x0f 0x37 + +#CHECK: llwpcb %ecx +0x8f 0xe9 0x78 0x12 0xc1 + +#CHECK: slwpcb %ecx +0x8f 0xe9 0x78 0x12 0xc9 + +# CHECK: lwpins $305419896, %ebx, %eax +0x8f 0xea 0x78 0x12 0xc3 0x78 0x56 0x34 0x12 + +# CHECK: lwpins $591751049, (%esp), %edx +0x8f 0xea 0x68 0x12 0x04 0x24 0x89 0x67 0x45 0x23 + +# CHECK: lwpval $1737075661, %ebx, %eax +0x8f 0xea 0x78 0x12 0xcb 0xcd 0xab 0x89 0x67 + +# CHECK: lwpval $2309737967, (%esp), %edx +0x8f 0xea 0x68 0x12 0x0c 0x24 0xef 0xcd 0xab 0x89 diff --git a/test/MC/Disassembler/X86/x86-64.txt b/test/MC/Disassembler/X86/x86-64.txt index 1511347306a7..659ad9051fd5 100644 --- a/test/MC/Disassembler/X86/x86-64.txt +++ b/test/MC/Disassembler/X86/x86-64.txt @@ -456,3 +456,27 @@ # CHECK: callq -32769 0xe8 0xff 0x7f 0xff 0xff + +# CHECK: llwpcb %rax +0x8f 0xe9 0xf8 0x12 0xc0 + +# CHECK: slwpcb %rax +0x8f 0xe9 0xf8 0x12 0xc8 + +# CHECK: lwpins $305419896, %ebx, %rax +0x8f 0xea 0xf8 0x12 0xc3 0x78 0x56 0x34 0x12 + +# CHECK: lwpins $591751049, (%rsp), %rdx +0x8f 0xea 0xe8 0x12 0x04 0x24 0x89 0x67 0x45 0x23 + +# CHECK: lwpins $591751049, (%esp), %edx +0x67 0x8f 0xea 0x68 0x12 0x04 0x24 0x89 0x67 0x45 0x23 + +# CHECK: lwpval $1737075661, %ebx, %rax +0x8f 0xea 0xf8 0x12 0xcb 0xcd 0xab 0x89 0x67 + +# CHECK: lwpval $2309737967, (%rsp), %rdx +0x8f 0xea 0xe8 0x12 0x0c 0x24 0xef 0xcd 0xab 0x89 + +# CHECK: lwpval $2309737967, (%esp), %edx +0x67 0x8f 0xea 0x68 0x12 0x0c 0x24 0xef 0xcd 0xab 0x89 diff --git a/test/MC/Hexagon/PacketRules/endloop_branches.s b/test/MC/Hexagon/PacketRules/endloop_branches.s new file mode 100644 index 000000000000..fbaa246c0684 --- /dev/null +++ b/test/MC/Hexagon/PacketRules/endloop_branches.s @@ -0,0 +1,12 @@ +# RUN: not llvm-mc -triple=hexagon -filetype=asm %s 2>&1 | FileCheck %s + +# Check that a branch in an end-loop packet is caught. + +{ jump unknown +}:endloop0 +# CHECK: 5:3: error: packet marked with `:endloop0' cannot contain instructions that modify register + +{ jump unknown +}:endloop1 + +# CHECK: 9:3: error: packet marked with `:endloop1' cannot contain instructions that modify register diff --git a/test/MC/Hexagon/PacketRules/restrict_ax.s b/test/MC/Hexagon/PacketRules/restrict_ax.s new file mode 100644 index 000000000000..b8f7a1f782c3 --- /dev/null +++ b/test/MC/Hexagon/PacketRules/restrict_ax.s @@ -0,0 +1,4 @@ +{ r0=memw_locked(r0) + r1=-mpyi(r0,#0) } +# RUN: not llvm-mc -arch=hexagon -filetype=asm %s 2>%t; FileCheck %s --check-prefix=CHECK00 <%t +# CHECK00: 1:3: error: Instruction can only be in a packet with ALU or non-FPU XTYPE instructions diff --git a/test/MC/Hexagon/dealloc-return-jump.s b/test/MC/Hexagon/dealloc-return-jump.s new file mode 100644 index 000000000000..0d480bef85d2 --- /dev/null +++ b/test/MC/Hexagon/dealloc-return-jump.s @@ -0,0 +1,7 @@ +# RUN: not llvm-mc -arch=hexagon -mcpu=hexagonv62 -filetype=obj -o - %s +# Check that a duplex involving dealloc_return is correctly checked +# dealloc_return cannot be involved in a double jump packet + +{ r0=add(r0,#-1) + p0=cmp.eq(r0,r0); if (p0.new) jump:nt 0 + if (p0) dealloc_return } diff --git a/test/MC/Hexagon/endloop.s b/test/MC/Hexagon/endloop.s deleted file mode 100644 index d537eb00ed05..000000000000 --- a/test/MC/Hexagon/endloop.s +++ /dev/null @@ -1,19 +0,0 @@ -# RUN: not llvm-mc -triple=hexagon -filetype=asm %s 2>&1 | FileCheck %s - -# Check that a branch in an end-loop packet is caught. - -1: -{ - r0 = #1 - p0 = cmp.eq (r1, r2) - if (p0) jump 1b -}:endloop0 - -2: -{ - r0 = #1 - p0 = cmp.eq (r1, r2) - if (p0) jump 2b -}:endloop1 - -# CHECK: rror: packet marked with `:endloop{{.}}' cannot contain instructions that modify register diff --git a/test/MC/Hexagon/iconst.s b/test/MC/Hexagon/iconst.s index 917cc64ba953..156d1abe8cb4 100644 --- a/test/MC/Hexagon/iconst.s +++ b/test/MC/Hexagon/iconst.s @@ -2,5 +2,5 @@ a: # CHECK: r0 = add(r0,#0) -# CHECK: R_HEX_23_REG +# CHECK: R_HEX_27_REG r0 = iconst(#a) diff --git a/test/MC/Hexagon/plt-rel.s b/test/MC/Hexagon/plt-rel.s new file mode 100644 index 000000000000..cba3d7e9ef2e --- /dev/null +++ b/test/MC/Hexagon/plt-rel.s @@ -0,0 +1,13 @@ +# RUN: llvm-mc -arch=hexagon -filetype=obj %s | llvm-objdump -d -r - | FileCheck %s + +call foo@GDPLT +# CHECK: R_HEX_GD_PLT_B22_PCREL +call ##foo@GDPLT +# CHECK: R_HEX_GD_PLT_B32_PCREL_X +# CHECK-NEXT: R_HEX_GD_PLT_B22_PCREL_X + +call foo@LDPLT +# CHECK: R_HEX_LD_PLT_B22_PCREL +call ##foo@LDPLT +# CHECK: R_HEX_LD_PLT_B32_PCREL_X +# CHECK-NEXT: R_HEX_LD_PLT_B22_PCREL_X diff --git a/test/MC/Hexagon/solo-axok.s b/test/MC/Hexagon/solo-axok.s new file mode 100644 index 000000000000..2df5796e628d --- /dev/null +++ b/test/MC/Hexagon/solo-axok.s @@ -0,0 +1,9 @@ +# RUN: not llvm-mc -arch=hexagon -filetype=asm -mcpu=hexagonv55 %s 2>%t; FileCheck %s < %t +# +{ + sp=asrh(r6) + l2fetch(fp,r23:22) + p2=r7 + p1=dfclass(r31:30,#6) +} +# CHECK: rror: Instruction can only diff --git a/test/MC/X86/lwp-x86_64.s b/test/MC/X86/lwp-x86_64.s new file mode 100644 index 000000000000..92f15967461e --- /dev/null +++ b/test/MC/X86/lwp-x86_64.s @@ -0,0 +1,25 @@ +# RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s --check-prefix=CHECK + +llwpcb %rcx +# CHECK: llwpcb %rcx +# CHECK: encoding: [0x8f,0xe9,0xf8,0x12,0xc1] + +slwpcb %rax +# CHECK: slwpcb %rax +# CHECK: encoding: [0x8f,0xe9,0xf8,0x12,0xc8] + +lwpins $305419896, %ebx, %rax +# CHECK: lwpins $305419896, %ebx, %rax +# CHECK: encoding: [0x8f,0xea,0xf8,0x12,0xc3,0x78,0x56,0x34,0x12] + +lwpins $591751049, (%rsp), %rdx +# CHECK: lwpins $591751049, (%rsp), %rdx +# CHECK: encoding: [0x8f,0xea,0xe8,0x12,0x04,0x24,0x89,0x67,0x45,0x23] + +lwpval $1737075661, %ebx, %rax +# CHECK: lwpval $1737075661, %ebx, %rax +# CHECK: encoding: [0x8f,0xea,0xf8,0x12,0xcb,0xcd,0xab,0x89,0x67] + +lwpval $2309737967, (%rsp), %rdx +# CHECK: lwpval $2309737967, (%rsp), %rdx +# CHECK: encoding: [0x8f,0xea,0xe8,0x12,0x0c,0x24,0xef,0xcd,0xab,0x89] diff --git a/test/MC/X86/lwp.s b/test/MC/X86/lwp.s new file mode 100644 index 000000000000..43d6f2cd7e3b --- /dev/null +++ b/test/MC/X86/lwp.s @@ -0,0 +1,32 @@ +# RUN: llvm-mc -triple i686-unknown-unknown --show-encoding %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-X86 +# RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-X64 + +llwpcb %ecx +# CHECK: llwpcb %ecx +# CHECK-X86: encoding: [0x8f,0xe9,0x78,0x12,0xc1] +# CHECK-X64: encoding: [0x8f,0xe9,0x78,0x12,0xc1] + +slwpcb %eax +# CHECK: slwpcb %eax +# CHECK-X86: encoding: [0x8f,0xe9,0x78,0x12,0xc8] +# CHECK-X64: encoding: [0x8f,0xe9,0x78,0x12,0xc8] + +lwpins $305419896, %ebx, %eax +# CHECK: lwpins $305419896, %ebx, %eax +# CHECK-X86: encoding: [0x8f,0xea,0x78,0x12,0xc3,0x78,0x56,0x34,0x12] +# CHECK-X64: encoding: [0x8f,0xea,0x78,0x12,0xc3,0x78,0x56,0x34,0x12] + +lwpins $591751049, (%esp), %edx +# CHECK: lwpins $591751049, (%esp), %edx +# CHECK-X86: encoding: [0x8f,0xea,0x68,0x12,0x04,0x24,0x89,0x67,0x45,0x23] +# CHECK-X64: encoding: [0x67,0x8f,0xea,0x68,0x12,0x04,0x24,0x89,0x67,0x45,0x23] + +lwpval $1737075661, %ebx, %eax +# CHECK: lwpval $1737075661, %ebx, %eax +# CHECK-X86: encoding: [0x8f,0xea,0x78,0x12,0xcb,0xcd,0xab,0x89,0x67] +# CHECK-X64: encoding: [0x8f,0xea,0x78,0x12,0xcb,0xcd,0xab,0x89,0x67] + +lwpval $2309737967, (%esp), %edx +# CHECK: lwpval $2309737967, (%esp), %edx +# CHECK-X86: encoding: [0x8f,0xea,0x68,0x12,0x0c,0x24,0xef,0xcd,0xab,0x89] +# CHECK-X64: encoding: [0x67,0x8f,0xea,0x68,0x12,0x0c,0x24,0xef,0xcd,0xab,0x89] diff --git a/test/Transforms/BBVectorize/simple-int.ll b/test/Transforms/BBVectorize/simple-int.ll index 6b50e2b4f2fc..b7f87fe1db0e 100644 --- a/test/Transforms/BBVectorize/simple-int.ll +++ b/test/Transforms/BBVectorize/simple-int.ll @@ -503,4 +503,4 @@ define i64 @testcttzneg(i64 %A1, i64 %A2, i64 %B1, i64 %B2) { ; CHECK: declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>) #0 ; CHECK: declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1) #0 ; CHECK: declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>, i1) #0 -; CHECK: attributes #0 = { nounwind readnone } +; CHECK: attributes #0 = { nounwind readnone speculatable } diff --git a/test/Transforms/CodeExtractor/PartialInlineOptRemark.ll b/test/Transforms/CodeExtractor/PartialInlineOptRemark.ll index c8808182f717..b2442b8b173c 100644 --- a/test/Transforms/CodeExtractor/PartialInlineOptRemark.ll +++ b/test/Transforms/CodeExtractor/PartialInlineOptRemark.ll @@ -64,6 +64,22 @@ bb2: ; preds = %bb1, %bb ret i32 %tmp3, !dbg !19 } +define i32 @bar_cold(i32 %arg) local_unnamed_addr #3 !dbg !5 { +bb: + %tmp = icmp slt i32 %arg, 0, !dbg !7 + br i1 %tmp, label %bb1, label %bb2, !dbg !8 + +bb1: ; preds = %bb + tail call void (...) @foo() #0, !dbg !9 + tail call void (...) @foo() #0, !dbg !10 + tail call void (...) @foo() #0, !dbg !11 + br label %bb2, !dbg !18 + +bb2: ; preds = %bb1, %bb + %tmp3 = phi i32 [ 0, %bb1 ], [ 1, %bb ] + ret i32 %tmp3, !dbg !19 +} + ; Function Attrs: nounwind declare void @foo(...) local_unnamed_addr #0 @@ -73,16 +89,19 @@ bb: ; CHECK:remark{{.*}}bar partially inlined into dummy_caller ; CHECK-NOT:remark{{.*}}bar_noinline partially inlined into dummy_caller ; CHECK-NOT:remark{{.*}}bar_alwaysinline partially inlined into dummy_caller +; CHECK-NOT:remark{{.*}}bar_cold partially inlined into dummy_caller ; LIMIT-NOT:remark{{.*}}bar partially inlined into dummy_caller %tmp = tail call i32 @bar(i32 %arg), !dbg !21 %tmp2 = tail call i32 @bar_noinline(i32 %arg), !dbg !21 %tmp3 = tail call i32 @bar_alwaysinline(i32 %arg), !dbg !21 + %tmp4 = tail call i32 @bar_cold(i32 %arg), !dbg !21 ret i32 %tmp, !dbg !22 } attributes #0 = { nounwind } attributes #1 = { noinline nounwind } attributes #2 = { alwaysinline nounwind } +attributes #3 = { cold nounwind } !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!3} diff --git a/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll b/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll index dfa999e1b34f..aec00e81bb48 100644 --- a/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll +++ b/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll @@ -39,7 +39,7 @@ bb2: ; preds = %bb1, %bb declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone ; CHECK: attributes #0 = { nounwind ssp } -; CHECK: attributes #1 = { nounwind readnone } +; CHECK: attributes #1 = { nounwind readnone speculatable } ; CHECK: attributes #2 = { noinline nounwind ssp } ; CHECK: attributes [[NUW]] = { nounwind } diff --git a/test/Transforms/FunctionAttrs/readattrs.ll b/test/Transforms/FunctionAttrs/readattrs.ll index 988557e27152..3728a7179724 100644 --- a/test/Transforms/FunctionAttrs/readattrs.ll +++ b/test/Transforms/FunctionAttrs/readattrs.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -functionattrs -S | FileCheck %s ; RUN: opt < %s -aa-pipeline=basic-aa -passes='cgscc(function-attrs)' -S | FileCheck %s @x = global i32 0 @@ -68,22 +69,22 @@ entry: } ; CHECK: declare void @llvm.masked.scatter -declare void @llvm.masked.scatter.v4i32(<4 x i32>%val, <4 x i32*>, i32, <4 x i1>) +declare void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32>%val, <4 x i32*>, i32, <4 x i1>) ; CHECK-NOT: readnone ; CHECK-NOT: readonly ; CHECK: define void @test9 define void @test9(<4 x i32*> %ptrs, <4 x i32>%val) { - call void @llvm.masked.scatter.v4i32(<4 x i32>%val, <4 x i32*> %ptrs, i32 4, <4 x i1><i1 true, i1 false, i1 true, i1 false>) + call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32>%val, <4 x i32*> %ptrs, i32 4, <4 x i1><i1 true, i1 false, i1 true, i1 false>) ret void } ; CHECK: declare <4 x i32> @llvm.masked.gather -declare <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>) +declare <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>) ; CHECK: readonly ; CHECK: define <4 x i32> @test10 define <4 x i32> @test10(<4 x i32*> %ptrs) { - %res = call <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> %ptrs, i32 4, <4 x i1><i1 true, i1 false, i1 true, i1 false>, <4 x i32>undef) + %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1><i1 true, i1 false, i1 true, i1 false>, <4 x i32>undef) ret <4 x i32> %res } diff --git a/test/Transforms/FunctionImport/unnamed-globals.ll b/test/Transforms/FunctionImport/unnamed-globals.ll deleted file mode 100644 index 167fad28f439..000000000000 --- a/test/Transforms/FunctionImport/unnamed-globals.ll +++ /dev/null @@ -1,10 +0,0 @@ -; Make sure we don't crash when referencing an unnamed global. -; RUN: opt %s -module-summary-analysis -S - -@0 = external global [1 x { i64 }] - -define internal void @tinkywinky() { - call void @patatino(i64 ptrtoint ([1 x { i64 }]* @0 to i64), i64 4) - ret void -} -declare void @patatino(i64, i64) diff --git a/test/Transforms/GVN/2016-08-30-MaskedScatterGather.ll b/test/Transforms/GVN/2016-08-30-MaskedScatterGather.ll index 3f8fdcc8eafb..5b10a1bfc79e 100644 --- a/test/Transforms/GVN/2016-08-30-MaskedScatterGather.ll +++ b/test/Transforms/GVN/2016-08-30-MaskedScatterGather.ll @@ -1,7 +1,7 @@ ; RUN: opt < %s -basicaa -gvn -S | FileCheck %s -declare void @llvm.masked.scatter.v2i32(<2 x i32> , <2 x i32*> , i32 , <2 x i1> ) -declare <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*>, i32, <2 x i1>, <2 x i32>) +declare void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> , <2 x i32*> , i32 , <2 x i1> ) +declare <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*>, i32, <2 x i1>, <2 x i32>) ; This test ensures that masked scatter and gather operations, which take vectors of pointers, ; do not have pointer aliasing ignored when being processed. @@ -20,18 +20,18 @@ entry: %tmp.i = insertelement <2 x i32*> undef, i32* %tmp.0, i32 0 %tmp = insertelement <2 x i32*> %tmp.i, i32* %tmp.1, i32 1 ; Read from in1 and in2 - %in1.v = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %in1, i32 1, <2 x i1> <i1 true, i1 true>, <2 x i32> undef) #1 - %in2.v = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %in2, i32 1, <2 x i1> <i1 true, i1 true>, <2 x i32> undef) #1 + %in1.v = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> %in1, i32 1, <2 x i1> <i1 true, i1 true>, <2 x i32> undef) #1 + %in2.v = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> %in2, i32 1, <2 x i1> <i1 true, i1 true>, <2 x i32> undef) #1 ; Store in1 to the allocas - call void @llvm.masked.scatter.v2i32(<2 x i32> %in1.v, <2 x i32*> %tmp, i32 1, <2 x i1> <i1 true, i1 true>); + call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> %in1.v, <2 x i32*> %tmp, i32 1, <2 x i1> <i1 true, i1 true>); ; Read in1 from the allocas ; This gather should alias the scatter we just saw - %tmp.v.0 = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %tmp, i32 1, <2 x i1> <i1 true, i1 true>, <2 x i32> undef) #1 + %tmp.v.0 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> %tmp, i32 1, <2 x i1> <i1 true, i1 true>, <2 x i32> undef) #1 ; Store in2 to the allocas - call void @llvm.masked.scatter.v2i32(<2 x i32> %in2.v, <2 x i32*> %tmp, i32 1, <2 x i1> <i1 true, i1 true>); + call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> %in2.v, <2 x i32*> %tmp, i32 1, <2 x i1> <i1 true, i1 true>); ; Read in2 from the allocas ; This gather should alias the scatter we just saw, and not be eliminated - %tmp.v.1 = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %tmp, i32 1, <2 x i1> <i1 true, i1 true>, <2 x i32> undef) #1 + %tmp.v.1 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> %tmp, i32 1, <2 x i1> <i1 true, i1 true>, <2 x i32> undef) #1 ; Store in2 to out for good measure %tmp.v.1.0 = extractelement <2 x i32> %tmp.v.1, i32 0 %tmp.v.1.1 = extractelement <2 x i32> %tmp.v.1, i32 1 diff --git a/test/Transforms/InstCombine/intrinsics.ll b/test/Transforms/InstCombine/intrinsics.ll index b9e208440581..66ab7f48aeff 100644 --- a/test/Transforms/InstCombine/intrinsics.ll +++ b/test/Transforms/InstCombine/intrinsics.ll @@ -284,7 +284,7 @@ define i32 @cttz(i32 %a) { define i1 @cttz_knownbits(i32 %arg) { ; CHECK-LABEL: @cttz_knownbits( ; CHECK-NEXT: [[OR:%.*]] = or i32 [[ARG:%.*]], 4 -; CHECK-NEXT: [[CNT:%.*]] = call i32 @llvm.cttz.i32(i32 [[OR]], i1 true) #0 +; CHECK-NEXT: [[CNT:%.*]] = call i32 @llvm.cttz.i32(i32 [[OR]], i1 true) ; CHECK-NEXT: [[RES:%.*]] = icmp eq i32 [[CNT]], 4 ; CHECK-NEXT: ret i1 [[RES]] ; @@ -307,7 +307,7 @@ define i8 @ctlz(i8 %a) { define i1 @ctlz_knownbits(i8 %arg) { ; CHECK-LABEL: @ctlz_knownbits( ; CHECK-NEXT: [[OR:%.*]] = or i8 [[ARG:%.*]], 32 -; CHECK-NEXT: [[CNT:%.*]] = call i8 @llvm.ctlz.i8(i8 [[OR]], i1 true) #0 +; CHECK-NEXT: [[CNT:%.*]] = call i8 @llvm.ctlz.i8(i8 [[OR]], i1 true) ; CHECK-NEXT: [[RES:%.*]] = icmp eq i8 [[CNT]], 4 ; CHECK-NEXT: ret i1 [[RES]] ; diff --git a/test/Transforms/InstCombine/masked_intrinsics.ll b/test/Transforms/InstCombine/masked_intrinsics.ll index ce79ce56b5cb..d5403d17ddc2 100644 --- a/test/Transforms/InstCombine/masked_intrinsics.ll +++ b/test/Transforms/InstCombine/masked_intrinsics.ll @@ -2,8 +2,8 @@ declare <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptrs, i32, <2 x i1> %mask, <2 x double> %src0) declare void @llvm.masked.store.v2f64.p0v2f64(<2 x double> %val, <2 x double>* %ptrs, i32, <2 x i1> %mask) -declare <2 x double> @llvm.masked.gather.v2f64(<2 x double*> %ptrs, i32, <2 x i1> %mask, <2 x double> %passthru) -declare void @llvm.masked.scatter.v2f64(<2 x double> %val, <2 x double*> %ptrs, i32, <2 x i1> %mask) +declare <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32, <2 x i1> %mask, <2 x double> %passthru) +declare void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> %val, <2 x double*> %ptrs, i32, <2 x i1> %mask) define <2 x double> @load_zeromask(<2 x double>* %ptr, <2 x double> %passthru) { %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 1, <2 x i1> zeroinitializer, <2 x double> %passthru) @@ -49,7 +49,7 @@ define void @store_onemask(<2 x double>* %ptr, <2 x double> %val) { } define <2 x double> @gather_zeromask(<2 x double*> %ptrs, <2 x double> %passthru) { - %res = call <2 x double> @llvm.masked.gather.v2f64(<2 x double*> %ptrs, i32 5, <2 x i1> zeroinitializer, <2 x double> %passthru) + %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 5, <2 x i1> zeroinitializer, <2 x double> %passthru) ret <2 x double> %res ; CHECK-LABEL: @gather_zeromask( @@ -57,7 +57,7 @@ define <2 x double> @gather_zeromask(<2 x double*> %ptrs, <2 x double> %passthru } define void @scatter_zeromask(<2 x double*> %ptrs, <2 x double> %val) { - call void @llvm.masked.scatter.v2f64(<2 x double> %val, <2 x double*> %ptrs, i32 6, <2 x i1> zeroinitializer) + call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> %val, <2 x double*> %ptrs, i32 6, <2 x i1> zeroinitializer) ret void ; CHECK-LABEL: @scatter_zeromask( diff --git a/test/Transforms/InstCombine/pow-sqrt.ll b/test/Transforms/InstCombine/pow-sqrt.ll index 52175f1b1247..82db192ed801 100644 --- a/test/Transforms/InstCombine/pow-sqrt.ll +++ b/test/Transforms/InstCombine/pow-sqrt.ll @@ -6,7 +6,7 @@ define double @pow_half(double %x) { } ; CHECK-LABEL: define double @pow_half( -; CHECK-NEXT: %sqrt = call fast double @sqrt(double %x) +; CHECK-NEXT: %sqrt = call fast double @sqrt(double %x) #1 ; CHECK-NEXT: ret double %sqrt define double @pow_neghalf(double %x) { @@ -15,8 +15,11 @@ define double @pow_neghalf(double %x) { } ; CHECK-LABEL: define double @pow_neghalf( -; CHECK-NEXT: %sqrt = call fast double @sqrt(double %x) #0 +; CHECK-NEXT: %sqrt = call fast double @sqrt(double %x) #1 ; CHECK-NEXT: %sqrtrecip = fdiv fast double 1.000000e+00, %sqrt ; CHECK-NEXT: ret double %sqrtrecip -declare double @llvm.pow.f64(double, double) +declare double @llvm.pow.f64(double, double) #0 + +attributes #0 = { nounwind readnone speculatable } +attributes #1 = { nounwind readnone } diff --git a/test/Transforms/InstCombine/sub-xor.ll b/test/Transforms/InstCombine/sub-xor.ll index 812305d8e489..adcca8480594 100644 --- a/test/Transforms/InstCombine/sub-xor.ll +++ b/test/Transforms/InstCombine/sub-xor.ll @@ -27,7 +27,7 @@ declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone define i32 @test2(i32 %x) nounwind { ; CHECK-LABEL: @test2( -; CHECK-NEXT: [[COUNT:%.*]] = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true) #0 +; CHECK-NEXT: [[COUNT:%.*]] = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true) ; CHECK-NEXT: [[SUB:%.*]] = xor i32 [[COUNT]], 31 ; CHECK-NEXT: ret i32 [[SUB]] ; diff --git a/test/Transforms/LoopDeletion/unreachable-loops.ll b/test/Transforms/LoopDeletion/unreachable-loops.ll new file mode 100644 index 000000000000..147a85670121 --- /dev/null +++ b/test/Transforms/LoopDeletion/unreachable-loops.ll @@ -0,0 +1,336 @@ +; RUN: opt < %s -loop-deletion -verify-dom-info -S | FileCheck %s + +; Checking that we can delete loops that are never executed. +; We do not change the constant conditional branch statement (where the not-taken target +; is the loop) to an unconditional one. + +; delete the infinite loop because it is never executed. +define void @test1(i64 %n, i64 %m) nounwind { +; CHECK-LABEL: test1 +; CHECK-LABEL: entry: +; CHECK-NEXT: br i1 true, label %return, label %bb.preheader +; CHECK-NOT: bb: +entry: + br i1 true, label %return, label %bb + +bb: + %x.0 = phi i64 [ 0, %entry ], [ %t0, %bb ] + %t0 = add i64 %x.0, 1 + %t1 = icmp slt i64 %x.0, %n + %t3 = icmp sgt i64 %x.0, %m + %t4 = and i1 %t1, %t3 + br i1 true, label %bb, label %return + +return: + ret void +} + +; FIXME: We can delete this infinite loop. Currently we do not, +; because the infinite loop has no exit block. +define void @test2(i64 %n, i64 %m) nounwind { +; CHECK-LABEL: test2 +; CHECK-LABEL: entry: +; CHECK-NEXT: br i1 true, label %return, label %bb.preheader +; CHECK-LABEL: bb: +; CHECK: br label %bb +entry: + br i1 true, label %return, label %bb + +bb: + %x.0 = phi i64 [ 0, %entry ], [ %t0, %bb ] + %t0 = add i64 %x.0, 1 + %t1 = icmp slt i64 %x.0, %n + %t3 = icmp sgt i64 %x.0, %m + %t4 = and i1 %t1, %t3 + br label %bb + +return: + ret void +} + +; There are multiple exiting blocks and a single exit block. +; Since it is a never executed loop, we do not care about the values +; from different exiting paths and we can +; delete the loop. +define i64 @test3(i64 %n, i64 %m, i64 %maybe_zero) nounwind { + +; CHECK-NOT: bb: +; CHECK-NOT: bb2: +; CHECK-NOT: bb3: +; CHECK-LABEL: return.loopexit: +; CHECK-NEXT: %x.lcssa.ph = phi i64 [ undef, %bb.preheader ] +; CHECK-NEXT: br label %return +; CHECK-LABEL: return: +; CHECK-NEXT: %x.lcssa = phi i64 [ 20, %entry ], [ %x.lcssa.ph, %return.loopexit ] +; CHECK-NEXT: ret i64 %x.lcssa +entry: + br i1 false, label %bb, label %return + +bb: + %x.0 = phi i64 [ 0, %entry ], [ %t0, %bb3 ] + %t0 = add i64 %x.0, 1 + %t1 = icmp slt i64 %x.0, %n + br i1 %t1, label %bb2, label %return + +bb2: + %t2 = icmp slt i64 %x.0, %m + %unused1 = udiv i64 42, %maybe_zero + br i1 %t2, label %bb3, label %return + +bb3: + %t3 = icmp slt i64 %x.0, %m + %unused2 = sdiv i64 42, %maybe_zero + br i1 %t3, label %bb, label %return + +return: +; the only valid value fo x.lcssa is 20. + %x.lcssa = phi i64 [ 12, %bb ], [ 14, %bb2 ], [ 16, %bb3 ], [20, %entry ] + ret i64 %x.lcssa +} + +; Cannot delete the loop, since it may be executed at runtime. +define void @test4(i64 %n, i64 %m, i1 %cond) { +; CHECK-LABEL: test4 +; CHECK-LABEL: bb: +entry: + br i1 %cond, label %looppred1, label %looppred2 + +looppred1: + br i1 true, label %return, label %bb + +looppred2: + br i1 false, label %return, label %bb + +bb: + %x.0 = phi i64 [ 0, %looppred1 ], [ 1, %looppred2 ], [ %t0, %bb ] + %t0 = add i64 %x.0, 1 + %t1 = icmp slt i64 %x.0, %n + %t3 = icmp sgt i64 %x.0, %m + %t4 = and i1 %t1, %t3 + br i1 true, label %bb, label %return + +return: + ret void +} + +; multiple constant conditional branches with loop not-taken in all cases. +define void @test5(i64 %n, i64 %m, i1 %cond) nounwind { +; CHECK-LABEL: test5 +; CHECK-LABEL: looppred1: +; CHECK-NEXT: br i1 true, label %return, label %bb.preheader +; CHECK-LABEL: looppred2: +; CHECK-NEXT: br i1 true, label %return, label %bb.preheader +; CHECK-NOT: bb: +entry: + br i1 %cond, label %looppred1, label %looppred2 + +looppred1: + br i1 true, label %return, label %bb + +looppred2: + br i1 true, label %return, label %bb + +bb: + %x.0 = phi i64 [ 0, %looppred1 ], [ 1, %looppred2 ], [ %t0, %bb ] + %t0 = add i64 %x.0, 1 + %t1 = icmp slt i64 %x.0, %n + %t3 = icmp sgt i64 %x.0, %m + %t4 = and i1 %t1, %t3 + br i1 true, label %bb, label %return + +return: + ret void +} + +; Don't delete this infinite loop because the loop +; is executable at runtime. +define void @test6(i64 %n, i64 %m) nounwind { +; CHECK-LABEL: test6 +; CHECK-LABEL: entry: +; CHECK-NEXT: br i1 true, label %bb.preheader, label %bb.preheader +; CHECK: bb: +entry: + br i1 true, label %bb, label %bb + +bb: + %x.0 = phi i64 [ 0, %entry ], [ 0, %entry ], [ %t0, %bb ] + %t0 = add i64 %x.0, 1 + %t1 = icmp slt i64 %x.0, %n + %t3 = icmp sgt i64 %x.0, %m + %t4 = and i1 %t1, %t3 + br i1 true, label %bb, label %return + +return: + ret void +} + +declare i64 @foo(i64) +; The loop L2 is never executed and is a subloop, with an +; exit block that branches back to parent loop. +; Here we can delete loop L2, while L1 still exists. +define i64 @test7(i64 %n) { +; CHECK-LABEL: test7 +; CHECK-LABEL: L1: +; CHECK: br i1 true, label %L1Latch, label %L2.preheader +; CHECK-LABEL: L2.preheader: +; CHECK-NEXT: br label %L1Latch.loopexit +; CHECK-LABEL: L1Latch.loopexit: +; CHECK: br label %L1Latch +; CHECK-LABEL: L1Latch: +; CHECK-NEXT: %y = phi i64 [ %y.next, %L1 ], [ %y.L2.lcssa, %L1Latch.loopexit ] +; CHECK: br i1 %cond2, label %exit, label %L1 +entry: + br label %L1 + +L1: + %y.next = phi i64 [ 0, %entry ], [ %y.add, %L1Latch ] + br i1 true, label %L1Latch, label %L2 + +L2: + %x = phi i64 [ 0, %L1 ], [ %x.next, %L2 ] + %x.next = add i64 %x, 1 + %y.L2 = call i64 @foo(i64 %x.next) + %cond = icmp slt i64 %x.next, %n + br i1 %cond, label %L2, label %L1Latch + +L1Latch: + %y = phi i64 [ %y.next, %L1 ], [ %y.L2, %L2 ] + %y.add = add i64 %y, %n + %cond2 = icmp eq i64 %y.add, 42 + br i1 %cond2, label %exit, label %L1 + +exit: + ret i64 %y.add +} + + +; Show recursive deletion of loops. Since we start with subloops and progress outward +; to parent loop, we first delete the loop L2. Now loop L1 becomes a non-loop since it's backedge +; from L2's preheader to L1's exit block is never taken. So, L1 gets deleted as well. +define void @test8(i64 %n) { +; CHECK-LABEL: test8 +; CHECK-LABEL: entry: +; CHECK-NEXT: br label %exit +; CHECK-LABEL: exit: +; CHECK-NEXT: ret void +entry: + br label %L1 + +L1: + br i1 true, label %exit, label %L2 + +L2: + %x = phi i64 [ 0, %L1 ], [ %x.next, %L2 ] + %x.next = add i64 %x, 1 + %y.L2 = call i64 @foo(i64 %x.next) + %cond = icmp slt i64 %x.next, %n + br i1 %cond, label %L2, label %L1 + +exit: + ret void +} + + +; Delete a loop (L2) which has subloop (L3). +; Here we delete loop L2, but leave L3 as is. +; FIXME: Can delete L3 as well, by iteratively going backward through the single +; predecessor of L3 until we reach L1's block that guarantees L3 is never +; executed. +define void @test9(i64 %n) { +; CHECK-LABEL: test9 +; CHECK-LABEL: L2.preheader: +; CHECK-NEXT: br label %L3.preheader +; CHECK-NOT: L2: +; CHECK-LABEL: L3.preheader: +; CHECK-NEXT: %y.L2.lcssa = phi i64 [ undef, %L2.preheader ] +; CHECK-NEXT: br label %L3 +; CHECK-LABEL: L3: +; CHECK: br i1 %cond2, label %L3, label %L1.loopexit +entry: + br label %L1 + +L1: + br i1 true, label %exit, label %L2 + +L2: + %x = phi i64 [ 0, %L1 ], [ %x.next, %L2 ] + %x.next = add i64 %x, 1 + %y.L2 = call i64 @foo(i64 %x.next) + %cond = icmp slt i64 %x.next, %n + br i1 %cond, label %L2, label %L3 + +L3: + %cond2 = icmp slt i64 %y.L2, %n + br i1 %cond2, label %L3, label %L1 + +exit: + ret void +} + +; We cannot delete L3 because of call within it. +; Since L3 is not deleted, and entirely contained within L2, L2 is also not +; deleted. +; FIXME: We can delete unexecutable loops having +; subloops contained entirely within them. +define void @test10(i64 %n) { +; CHECK-LABEL: test10 +; CHECK: L2: +; CHECK: L3: +entry: + br label %L1 + +L1: + br i1 true, label %exit, label %L2 + +L2: + %x = phi i64 [ 0, %L1 ], [ %x.next, %L3 ] + %x.next = add i64 %x, 1 + %y.L2 = call i64 @foo(i64 %x.next) + %cond = icmp slt i64 %x.next, %n + br i1 %cond, label %L1, label %L3 + +L3: + %y.L3 = phi i64 [ %y.L2, %L2 ], [ %y.L3.next, %L3 ] + %y.L3.next = add i64 %y.L3, 1 + %dummy = call i64 @foo(i64 %y.L3.next) + %cond2 = icmp slt i64 %y.L3, %n + br i1 %cond2, label %L3, label %L2 + +exit: + ret void +} + +; same as test10, but L3 does not contain call. +; So, in the first iteration, all statements of L3 are made invariant, and L3 is +; deleted. +; In the next iteration, since L2 is never executed and has no subloops, we delete +; L2 as well. Finally, the outermost loop L1 is deleted. +define void @test11(i64 %n) { +; CHECK-LABEL: test11 +; CHECK-LABEL: entry: +; CHECK-NEXT: br label %exit +; CHECK-LABEL: exit: +; CHECK-NEXT: ret void +entry: + br label %L1 + +L1: + br i1 true, label %exit, label %L2 + +L2: + %x = phi i64 [ 0, %L1 ], [ %x.next, %L3 ] + %x.next = add i64 %x, 1 + %y.L2 = call i64 @foo(i64 %x.next) + %cond = icmp slt i64 %x.next, %n + br i1 %cond, label %L1, label %L3 + +L3: + %y.L3 = phi i64 [ %y.L2, %L2 ], [ %y.L3.next, %L3 ] + %y.L3.next = add i64 %y.L3, 1 + %cond2 = icmp slt i64 %y.L3, %n + br i1 %cond2, label %L3, label %L2 + +exit: + ret void +} + diff --git a/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll b/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll index 82f2e064a581..e18159f24624 100644 --- a/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll +++ b/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll @@ -36,7 +36,7 @@ target triple = "x86_64-unknown-linux-gnu" ; CHECK-NEXT: [[WIDE_VEC1:%.*]] = load <80 x float>, <80 x float>* [[TMP4]], align 4 ; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <80 x float> [[WIDE_VEC1]], <80 x float> undef, <16 x i32> <i32 0, i32 5, i32 10, i32 15, i32 20, i32 25, i32 30, i32 35, i32 40, i32 45, i32 50, i32 55, i32 60, i32 65, i32 70, i32 75> ; CHECK-NEXT: [[TMP5:%.*]] = fadd <16 x float> [[STRIDED_VEC2]], [[TMP2]] -; CHECK-NEXT: call void @llvm.masked.scatter.v16f32(<16 x float> [[TMP5]], <16 x float*> [[TMP3]], i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>) +; CHECK-NEXT: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[TMP5]], <16 x float*> [[TMP3]], i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>) ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], <i64 80, i64 80, i64 80, i64 80, i64 80, i64 80, i64 80, i64 80, i64 80, i64 80, i64 80, i64 80, i64 80, i64 80, i64 80, i64 80> ; CHECK: br i1 {{.*}}, label %middle.block, label %vector.body diff --git a/test/Transforms/LoopVectorize/X86/gather_scatter.ll b/test/Transforms/LoopVectorize/X86/gather_scatter.ll index 2ce357540d0b..8ef59613e646 100644 --- a/test/Transforms/LoopVectorize/X86/gather_scatter.ll +++ b/test/Transforms/LoopVectorize/X86/gather_scatter.ll @@ -17,9 +17,9 @@ target triple = "x86_64-pc_linux" ;} ;AVX512-LABEL: @foo1 -;AVX512: llvm.masked.load.v16i32 -;AVX512: llvm.masked.gather.v16f32 -;AVX512: llvm.masked.store.v16f32 +;AVX512: llvm.masked.load.v16i32.p0v16i32 +;AVX512: llvm.masked.gather.v16f32.v16p0f32 +;AVX512: llvm.masked.store.v16f32.p0v16f32 ;AVX512: ret void ; Function Attrs: nounwind uwtable @@ -96,8 +96,8 @@ for.end: ; preds = %for.cond ;AVX512-LABEL: @foo2 ;AVX512: getelementptr inbounds %struct.In, %struct.In* %in, <16 x i64> {{.*}}, i32 1 -;AVX512: llvm.masked.gather.v16f32 -;AVX512: llvm.masked.scatter.v16f32 +;AVX512: llvm.masked.gather.v16f32.v16p0f32 +;AVX512: llvm.masked.scatter.v16f32.v16p0f32 ;AVX512: ret void define void @foo2(%struct.In* noalias %in, float* noalias %out, i32* noalias %trigger, i32* noalias %index) #0 { entry: @@ -171,10 +171,10 @@ for.end: ; preds = %for.cond ;AVX512-LABEL: @foo3 ;AVX512: getelementptr inbounds %struct.In, %struct.In* %in, <16 x i64> {{.*}}, i32 1 -;AVX512: llvm.masked.gather.v16f32 +;AVX512: llvm.masked.gather.v16f32.v16p0f32 ;AVX512: fadd <16 x float> ;AVX512: getelementptr inbounds %struct.Out, %struct.Out* %out, <16 x i64> {{.*}}, i32 1 -;AVX512: llvm.masked.scatter.v16f32 +;AVX512: llvm.masked.scatter.v16f32.v16p0f32 ;AVX512: ret void %struct.Out = type { float, float } @@ -233,4 +233,194 @@ for.inc: ; preds = %if.end for.end: ; preds = %for.cond ret void } -declare void @llvm.masked.scatter.v16f32(<16 x float>, <16 x float*>, i32, <16 x i1>) +declare void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float>, <16 x float*>, i32, <16 x i1>) + +; The same as @foo2 but scatter/gather argument is a vecotr of ptrs with addresspace 1 + +;AVX512-LABEL: @foo2_addrspace +;AVX512: getelementptr inbounds %struct.In, %struct.In addrspace(1)* %in, <16 x i64> {{.*}}, i32 1 +;AVX512: llvm.masked.gather.v16f32.v16p1f32 +;AVX512: llvm.masked.scatter.v16f32.v16p1f32 +;AVX512: ret void +define void @foo2_addrspace(%struct.In addrspace(1)* noalias %in, float addrspace(1)* noalias %out, i32* noalias %trigger, i32* noalias %index) #0 { +entry: + %in.addr = alloca %struct.In addrspace(1)*, align 8 + %out.addr = alloca float addrspace(1)*, align 8 + %trigger.addr = alloca i32*, align 8 + %index.addr = alloca i32*, align 8 + %i = alloca i32, align 4 + store %struct.In addrspace(1)* %in, %struct.In addrspace(1)** %in.addr, align 8 + store float addrspace(1)* %out, float addrspace(1)** %out.addr, align 8 + store i32* %trigger, i32** %trigger.addr, align 8 + store i32* %index, i32** %index.addr, align 8 + store i32 0, i32* %i, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %0 = load i32, i32* %i, align 4 + %cmp = icmp slt i32 %0, 4096 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %1 = load i32, i32* %i, align 4 + %idxprom = sext i32 %1 to i64 + %2 = load i32*, i32** %trigger.addr, align 8 + %arrayidx = getelementptr inbounds i32, i32* %2, i64 %idxprom + %3 = load i32, i32* %arrayidx, align 4 + %cmp1 = icmp sgt i32 %3, 0 + br i1 %cmp1, label %if.then, label %if.end + +if.then: ; preds = %for.body + %4 = load i32, i32* %i, align 4 + %idxprom2 = sext i32 %4 to i64 + %5 = load %struct.In addrspace(1)*, %struct.In addrspace(1)** %in.addr, align 8 + %arrayidx3 = getelementptr inbounds %struct.In, %struct.In addrspace(1)* %5, i64 %idxprom2 + %b = getelementptr inbounds %struct.In, %struct.In addrspace(1)* %arrayidx3, i32 0, i32 1 + %6 = load float, float addrspace(1)* %b, align 4 + %add = fadd float %6, 5.000000e-01 + %7 = load i32, i32* %i, align 4 + %idxprom4 = sext i32 %7 to i64 + %8 = load float addrspace(1)*, float addrspace(1)** %out.addr, align 8 + %arrayidx5 = getelementptr inbounds float, float addrspace(1)* %8, i64 %idxprom4 + store float %add, float addrspace(1)* %arrayidx5, align 4 + br label %if.end + +if.end: ; preds = %if.then, %for.body + br label %for.inc + +for.inc: ; preds = %if.end + %9 = load i32, i32* %i, align 4 + %inc = add nsw i32 %9, 16 + store i32 %inc, i32* %i, align 4 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} + +; Same as foo2_addrspace but here only the input has the non-default address space. + +;AVX512-LABEL: @foo2_addrspace2 +;AVX512: getelementptr inbounds %struct.In, %struct.In addrspace(1)* %in, <16 x i64> {{.*}}, i32 1 +;AVX512: llvm.masked.gather.v16f32.v16p1f32 +;AVX512: llvm.masked.scatter.v16f32.v16p0f32 +;AVX512: ret void +define void @foo2_addrspace2(%struct.In addrspace(1)* noalias %in, float addrspace(0)* noalias %out, i32* noalias %trigger, i32* noalias %index) { +entry: + %in.addr = alloca %struct.In addrspace(1)*, align 8 + %out.addr = alloca float addrspace(0)*, align 8 + %trigger.addr = alloca i32*, align 8 + %index.addr = alloca i32*, align 8 + %i = alloca i32, align 4 + store %struct.In addrspace(1)* %in, %struct.In addrspace(1)** %in.addr, align 8 + store float addrspace(0)* %out, float addrspace(0)** %out.addr, align 8 + store i32* %trigger, i32** %trigger.addr, align 8 + store i32* %index, i32** %index.addr, align 8 + store i32 0, i32* %i, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %0 = load i32, i32* %i, align 4 + %cmp = icmp slt i32 %0, 4096 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %1 = load i32, i32* %i, align 4 + %idxprom = sext i32 %1 to i64 + %2 = load i32*, i32** %trigger.addr, align 8 + %arrayidx = getelementptr inbounds i32, i32* %2, i64 %idxprom + %3 = load i32, i32* %arrayidx, align 4 + %cmp1 = icmp sgt i32 %3, 0 + br i1 %cmp1, label %if.then, label %if.end + +if.then: ; preds = %for.body + %4 = load i32, i32* %i, align 4 + %idxprom2 = sext i32 %4 to i64 + %5 = load %struct.In addrspace(1)*, %struct.In addrspace(1)** %in.addr, align 8 + %arrayidx3 = getelementptr inbounds %struct.In, %struct.In addrspace(1)* %5, i64 %idxprom2 + %b = getelementptr inbounds %struct.In, %struct.In addrspace(1)* %arrayidx3, i32 0, i32 1 + %6 = load float, float addrspace(1)* %b, align 4 + %add = fadd float %6, 5.000000e-01 + %7 = load i32, i32* %i, align 4 + %idxprom4 = sext i32 %7 to i64 + %8 = load float addrspace(0)*, float addrspace(0)** %out.addr, align 8 + %arrayidx5 = getelementptr inbounds float, float addrspace(0)* %8, i64 %idxprom4 + store float %add, float addrspace(0)* %arrayidx5, align 4 + br label %if.end + +if.end: ; preds = %if.then, %for.body + br label %for.inc + +for.inc: ; preds = %if.end + %9 = load i32, i32* %i, align 4 + %inc = add nsw i32 %9, 16 + store i32 %inc, i32* %i, align 4 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} + +; Same as foo2_addrspace but here only the output has the non-default address space. + +;AVX512-LABEL: @foo2_addrspace3 +;AVX512: getelementptr inbounds %struct.In, %struct.In* %in, <16 x i64> {{.*}}, i32 1 +;AVX512: llvm.masked.gather.v16f32.v16p0f32 +;AVX512: llvm.masked.scatter.v16f32.v16p1f32 +;AVX512: ret void + +define void @foo2_addrspace3(%struct.In addrspace(0)* noalias %in, float addrspace(1)* noalias %out, i32* noalias %trigger, i32* noalias %index) { +entry: + %in.addr = alloca %struct.In addrspace(0)*, align 8 + %out.addr = alloca float addrspace(1)*, align 8 + %trigger.addr = alloca i32*, align 8 + %index.addr = alloca i32*, align 8 + %i = alloca i32, align 4 + store %struct.In addrspace(0)* %in, %struct.In addrspace(0)** %in.addr, align 8 + store float addrspace(1)* %out, float addrspace(1)** %out.addr, align 8 + store i32* %trigger, i32** %trigger.addr, align 8 + store i32* %index, i32** %index.addr, align 8 + store i32 0, i32* %i, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %0 = load i32, i32* %i, align 4 + %cmp = icmp slt i32 %0, 4096 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %1 = load i32, i32* %i, align 4 + %idxprom = sext i32 %1 to i64 + %2 = load i32*, i32** %trigger.addr, align 8 + %arrayidx = getelementptr inbounds i32, i32* %2, i64 %idxprom + %3 = load i32, i32* %arrayidx, align 4 + %cmp1 = icmp sgt i32 %3, 0 + br i1 %cmp1, label %if.then, label %if.end + +if.then: ; preds = %for.body + %4 = load i32, i32* %i, align 4 + %idxprom2 = sext i32 %4 to i64 + %5 = load %struct.In addrspace(0)*, %struct.In addrspace(0)** %in.addr, align 8 + %arrayidx3 = getelementptr inbounds %struct.In, %struct.In addrspace(0)* %5, i64 %idxprom2 + %b = getelementptr inbounds %struct.In, %struct.In addrspace(0)* %arrayidx3, i32 0, i32 1 + %6 = load float, float addrspace(0)* %b, align 4 + %add = fadd float %6, 5.000000e-01 + %7 = load i32, i32* %i, align 4 + %idxprom4 = sext i32 %7 to i64 + %8 = load float addrspace(1)*, float addrspace(1)** %out.addr, align 8 + %arrayidx5 = getelementptr inbounds float, float addrspace(1)* %8, i64 %idxprom4 + store float %add, float addrspace(1)* %arrayidx5, align 4 + br label %if.end + +if.end: ; preds = %if.then, %for.body + br label %for.inc + +for.inc: ; preds = %if.end + %9 = load i32, i32* %i, align 4 + %inc = add nsw i32 %9, 16 + store i32 %inc, i32* %i, align 4 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} diff --git a/test/Transforms/LoopVectorize/X86/scatter_crash.ll b/test/Transforms/LoopVectorize/X86/scatter_crash.ll index bda4b2454ee2..aff372b562fb 100755 --- a/test/Transforms/LoopVectorize/X86/scatter_crash.ll +++ b/test/Transforms/LoopVectorize/X86/scatter_crash.ll @@ -23,11 +23,11 @@ define void @_Z3fn1v() #0 { ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, <16 x i64> [[VEC_IND]] ; CHECK-NEXT: [[TMP12:%.*]] = add nsw <16 x i64> [[TMP10]], [[VEC_IND3]] ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [10 x i32], <16 x [10 x i32]*> [[TMP11]], <16 x i64> [[TMP12]], i64 0 -; CHECK-NEXT: call void @llvm.masked.scatter.v16i32(<16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>, <16 x i32*> [[TMP13]], i32 16, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>) +; CHECK-NEXT: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>, <16 x i32*> [[TMP13]], i32 16, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>) ; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i64> [[VEC_IND3]], <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1> ; CHECK-NEXT: [[TMP15:%.*]] = add nsw <16 x i64> [[TMP10]], [[TMP14]] ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [10 x i32], <16 x [10 x i32]*> [[TMP11]], <16 x i64> [[TMP15]], i64 0 -; CHECK-NEXT: call void @llvm.masked.scatter.v16i32(<16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>, <16 x i32*> [[TMP16]], i32 8, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>) +; CHECK-NEXT: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>, <16 x i32*> [[TMP16]], i32 8, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>) ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32> ; CHECK-NEXT: [[VEC_IND_NEXT4]] = add <16 x i64> [[VEC_IND3]], <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32> diff --git a/test/Transforms/NewGVN/2016-08-30-MaskedScatterGather.ll b/test/Transforms/NewGVN/2016-08-30-MaskedScatterGather.ll index a3511c3ae968..b3087c1577ca 100644 --- a/test/Transforms/NewGVN/2016-08-30-MaskedScatterGather.ll +++ b/test/Transforms/NewGVN/2016-08-30-MaskedScatterGather.ll @@ -1,8 +1,8 @@ ; XFAIL: * ; RUN: opt < %s -basicaa -newgvn -S | FileCheck %s -declare void @llvm.masked.scatter.v2i32(<2 x i32> , <2 x i32*> , i32 , <2 x i1> ) -declare <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*>, i32, <2 x i1>, <2 x i32>) +declare void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> , <2 x i32*> , i32 , <2 x i1> ) +declare <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*>, i32, <2 x i1>, <2 x i32>) ; This test ensures that masked scatter and gather operations, which take vectors of pointers, ; do not have pointer aliasing ignored when being processed. @@ -21,18 +21,18 @@ entry: %tmp.i = insertelement <2 x i32*> undef, i32* %tmp.0, i32 0 %tmp = insertelement <2 x i32*> %tmp.i, i32* %tmp.1, i32 1 ; Read from in1 and in2 - %in1.v = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %in1, i32 1, <2 x i1> <i1 true, i1 true>, <2 x i32> undef) #1 - %in2.v = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %in2, i32 1, <2 x i1> <i1 true, i1 true>, <2 x i32> undef) #1 + %in1.v = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> %in1, i32 1, <2 x i1> <i1 true, i1 true>, <2 x i32> undef) #1 + %in2.v = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> %in2, i32 1, <2 x i1> <i1 true, i1 true>, <2 x i32> undef) #1 ; Store in1 to the allocas - call void @llvm.masked.scatter.v2i32(<2 x i32> %in1.v, <2 x i32*> %tmp, i32 1, <2 x i1> <i1 true, i1 true>); + call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> %in1.v, <2 x i32*> %tmp, i32 1, <2 x i1> <i1 true, i1 true>); ; Read in1 from the allocas ; This gather should alias the scatter we just saw - %tmp.v.0 = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %tmp, i32 1, <2 x i1> <i1 true, i1 true>, <2 x i32> undef) #1 + %tmp.v.0 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> %tmp, i32 1, <2 x i1> <i1 true, i1 true>, <2 x i32> undef) #1 ; Store in2 to the allocas - call void @llvm.masked.scatter.v2i32(<2 x i32> %in2.v, <2 x i32*> %tmp, i32 1, <2 x i1> <i1 true, i1 true>); + call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> %in2.v, <2 x i32*> %tmp, i32 1, <2 x i1> <i1 true, i1 true>); ; Read in2 from the allocas ; This gather should alias the scatter we just saw, and not be eliminated - %tmp.v.1 = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %tmp, i32 1, <2 x i1> <i1 true, i1 true>, <2 x i32> undef) #1 + %tmp.v.1 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> %tmp, i32 1, <2 x i1> <i1 true, i1 true>, <2 x i32> undef) #1 ; Store in2 to out for good measure %tmp.v.1.0 = extractelement <2 x i32> %tmp.v.1, i32 0 %tmp.v.1.1 = extractelement <2 x i32> %tmp.v.1, i32 1 diff --git a/test/Transforms/ObjCARC/basic.ll b/test/Transforms/ObjCARC/basic.ll index c10c3b1381b7..ad44f9d6fd39 100644 --- a/test/Transforms/ObjCARC/basic.ll +++ b/test/Transforms/ObjCARC/basic.ll @@ -3049,6 +3049,6 @@ define void @test67(i8* %x) { !4 = !DIFile(filename: "path/to/file", directory: "/path/to/dir") !5 = !{i32 2, !"Debug Info Version", i32 3} -; CHECK: attributes #0 = { nounwind readnone } +; CHECK: attributes #0 = { nounwind readnone speculatable } ; CHECK: attributes [[NUW]] = { nounwind } ; CHECK: ![[RELEASE]] = !{} diff --git a/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll b/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll index c856706d3f03..93a12a927d89 100644 --- a/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll +++ b/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll @@ -105,7 +105,7 @@ declare void @NSLog(i8*, ...) declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone ; CHECK: attributes #0 = { ssp uwtable } -; CHECK: attributes #1 = { nounwind readnone } +; CHECK: attributes #1 = { nounwind readnone speculatable } ; CHECK: attributes #2 = { nonlazybind } ; CHECK: attributes #3 = { noinline ssp uwtable } ; CHECK: attributes [[NUW]] = { nounwind } diff --git a/test/Transforms/SLPVectorizer/X86/call.ll b/test/Transforms/SLPVectorizer/X86/call.ll index 923cbe74a567..03b1e837a0ca 100644 --- a/test/Transforms/SLPVectorizer/X86/call.ll +++ b/test/Transforms/SLPVectorizer/X86/call.ll @@ -147,5 +147,5 @@ entry: ; CHECK: declare <2 x double> @llvm.pow.v2f64(<2 x double>, <2 x double>) [[ATTR0]] ; CHECK: declare <2 x double> @llvm.exp2.v2f64(<2 x double>) [[ATTR0]] -; CHECK: attributes [[ATTR0]] = { nounwind readnone } +; CHECK: attributes [[ATTR0]] = { nounwind readnone speculatable } diff --git a/test/Transforms/SpeculativeExecution/spec-other.ll b/test/Transforms/SpeculativeExecution/spec-other.ll new file mode 100644 index 000000000000..65e14b69e9e6 --- /dev/null +++ b/test/Transforms/SpeculativeExecution/spec-other.ll @@ -0,0 +1,32 @@ +; RUN: opt < %s -S -speculative-execution \ +; RUN: -spec-exec-max-speculation-cost 4 -spec-exec-max-not-hoisted 3 \ +; RUN: | FileCheck %s + +; CHECK-LABEL: @ifThen_extractvalue( +; CHECK: extractvalue +; CHECK: br i1 true +define void @ifThen_extractvalue() { + br i1 true, label %a, label %b + +a: + %x = extractvalue { i32, i32 } undef, 0 + br label %b + +b: + ret void +} + +; CHECK-LABEL: @ifThen_insertvalue( +; CHECK: insertvalue +; CHECK: br i1 true +define void @ifThen_insertvalue() { + br i1 true, label %a, label %b + +a: + %x = insertvalue { i32, i32 } undef, i32 undef, 0 + br label %b + +b: + ret void +} + diff --git a/test/Transforms/SpeculativeExecution/spec-vector.ll b/test/Transforms/SpeculativeExecution/spec-vector.ll new file mode 100644 index 000000000000..9c64f1fb1005 --- /dev/null +++ b/test/Transforms/SpeculativeExecution/spec-vector.ll @@ -0,0 +1,73 @@ +; RUN: opt < %s -S -speculative-execution \ +; RUN: -spec-exec-max-speculation-cost 4 -spec-exec-max-not-hoisted 3 \ +; RUN: | FileCheck %s + +; CHECK-LABEL: @ifThen_extractelement_constindex( +; CHECK: extractelement +; CHECK: br i1 true +define void @ifThen_extractelement_constindex() { + br i1 true, label %a, label %b + +a: + %x = extractelement <4 x i32> undef, i32 0 + br label %b + +b: + ret void +} + +; CHECK-LABEL: @ifThen_extractelement_varindex( +; CHECK: extractelement +; CHECK: br i1 true +define void @ifThen_extractelement_varindex(i32 %idx) { + br i1 true, label %a, label %b + +a: + %x = extractelement <4 x i32> undef, i32 %idx + br label %b + +b: + ret void +} + +; CHECK-LABEL: @ifThen_insertelement_constindex( +; CHECK: insertelement +; CHECK: br i1 true +define void @ifThen_insertelement_constindex() { + br i1 true, label %a, label %b + +a: + %x = insertelement <4 x i32> undef, i32 undef, i32 0 + br label %b + +b: + ret void +} + +; CHECK-LABEL: @ifThen_insertelement_varindex( +; CHECK: insertelement +; CHECK: br i1 true +define void @ifThen_insertelement_varindex(i32 %idx) { + br i1 true, label %a, label %b + +a: + %x = insertelement <4 x i32> undef, i32 undef, i32 %idx + br label %b + +b: + ret void +} + +; CHECK-LABEL: @ifThen_shufflevector( +; CHECK: shufflevector +; CHECK: br i1 true +define void @ifThen_shufflevector() { + br i1 true, label %a, label %b + +a: + %x = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> undef + br label %b + +b: + ret void +} diff --git a/test/Verifier/scatter_gather.ll b/test/Verifier/scatter_gather.ll new file mode 100644 index 000000000000..3b1b0ee19fd9 --- /dev/null +++ b/test/Verifier/scatter_gather.ll @@ -0,0 +1,122 @@ +; RUN: not opt -verify < %s 2>&1 | FileCheck %s + +; Mask is not a vector +; CHECK: Intrinsic has incorrect argument type! +define <16 x float> @gather2(<16 x float*> %ptrs, <16 x i1>* %mask, <16 x float> %passthru) { + %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %ptrs, i32 4, <16 x i1>* %mask, <16 x float> %passthru) + ret <16 x float> %res +} +declare <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*>, i32, <16 x i1>*, <16 x float>) + +; Mask length != return length +; CHECK: Intrinsic has incorrect argument type! +define <8 x float> @gather3(<8 x float*> %ptrs, <16 x i1> %mask, <8 x float> %passthru) { + %res = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %ptrs, i32 4, <16 x i1> %mask, <8 x float> %passthru) + ret <8 x float> %res +} +declare <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*>, i32, <16 x i1>, <8 x float>) + +; Return type is not a vector +; CHECK: Intrinsic has incorrect return type! +define <8 x float>* @gather4(<8 x float*> %ptrs, <8 x i1> %mask, <8 x float> %passthru) { + %res = call <8 x float>* @llvm.masked.gather.p0v8f32.v8p0f32(<8 x float*> %ptrs, i32 4, <8 x i1> %mask, <8 x float> %passthru) + ret <8 x float>* %res +} +declare <8 x float>* @llvm.masked.gather.p0v8f32.v8p0f32(<8 x float*>, i32, <8 x i1>, <8 x float>) + +; Value type is not a vector +; CHECK: Intrinsic has incorrect argument type! +define <8 x float> @gather5(<8 x float*>* %ptrs, <8 x i1> %mask, <8 x float> %passthru) { + %res = call <8 x float> @llvm.masked.gather.v8f32.p0v8p0f32(<8 x float*>* %ptrs, i32 4, <8 x i1> %mask, <8 x float> %passthru) + ret <8 x float> %res +} +declare <8 x float> @llvm.masked.gather.v8f32.p0v8p0f32(<8 x float*>*, i32, <8 x i1>, <8 x float>) + +; Value type is not a vector of pointers +; CHECK: Intrinsic has incorrect argument type! +define <8 x float> @gather6(<8 x float> %ptrs, <8 x i1> %mask, <8 x float> %passthru) { + %res = call <8 x float> @llvm.masked.gather.v8f32.v8f32(<8 x float> %ptrs, i32 4, <8 x i1> %mask, <8 x float> %passthru) + ret <8 x float> %res +} +declare <8 x float> @llvm.masked.gather.v8f32.v8f32(<8 x float>, i32, <8 x i1>, <8 x float>) + +; Value element type != vector of pointers element +; CHECK: Intrinsic has incorrect argument type! +define <8 x float> @gather7(<8 x double*> %ptrs, <8 x i1> %mask, <8 x float> %passthru) { + %res = call <8 x float> @llvm.masked.gather.v8f32.v8p0f64(<8 x double*> %ptrs, i32 4, <8 x i1> %mask, <8 x float> %passthru) + ret <8 x float> %res +} +declare <8 x float> @llvm.masked.gather.v8f32.v8p0f64(<8 x double*>, i32, <8 x i1>, <8 x float>) + +; Value length!= vector of pointers length +; CHECK: Intrinsic has incorrect argument type! +define <8 x float> @gather8(<16 x float*> %ptrs, <8 x i1> %mask, <8 x float> %passthru) { + %res = call <8 x float> @llvm.masked.gather.v8f32.v16p0f32(<16 x float*> %ptrs, i32 4, <8 x i1> %mask, <8 x float> %passthru) + ret <8 x float> %res +} +declare <8 x float> @llvm.masked.gather.v8f32.v16p0f32(<16 x float*>, i32, <8 x i1>, <8 x float>) + +; Passthru type doesn't match return type +; CHECK: Intrinsic has incorrect argument type! +define <16 x i32> @gather9(<16 x i32*> %ptrs, <16 x i1> %mask, <8 x i32> %passthru) { + %res = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> %ptrs, i32 4, <16 x i1> %mask, <8 x i32> %passthru) + ret <16 x i32> %res +} +declare <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*>, i32, <16 x i1>, <8 x i32>) + +; Mask is not a vector +; CHECK: Intrinsic has incorrect argument type! +define void @scatter2(<16 x float> %value, <16 x float*> %ptrs, <16 x i1>* %mask) { + call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %value, <16 x float*> %ptrs, i32 4, <16 x i1>* %mask) + ret void +} +declare void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float>, <16 x float*>, i32, <16 x i1>*) + +; Mask length != value length +; CHECK: Intrinsic has incorrect argument type! +define void @scatter3(<8 x float> %value, <8 x float*> %ptrs, <16 x i1> %mask) { + call void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float> %value, <8 x float*> %ptrs, i32 4, <16 x i1> %mask) + ret void +} +declare void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float>, <8 x float*>, i32, <16 x i1>) + +; Value type is not a vector +; CHECK: Intrinsic has incorrect argument type! +define void @scatter4(<8 x float>* %value, <8 x float*> %ptrs, <8 x i1> %mask) { + call void @llvm.masked.scatter.p0v8f32.v8p0f32(<8 x float>* %value, <8 x float*> %ptrs, i32 4, <8 x i1> %mask) + ret void +} +declare void @llvm.masked.scatter.p0v8f32.v8p0f32(<8 x float>*, <8 x float*>, i32, <8 x i1>) + +; ptrs is not a vector +; CHECK: Intrinsic has incorrect argument type! +define void @scatter5(<8 x float> %value, <8 x float*>* %ptrs, <8 x i1> %mask) { + call void @llvm.masked.scatter.v8f32.p0v8p0f32(<8 x float> %value, <8 x float*>* %ptrs, i32 4, <8 x i1> %mask) + ret void +} +declare void @llvm.masked.scatter.v8f32.p0v8p0f32(<8 x float>, <8 x float*>*, i32, <8 x i1>) + +; Value type is not a vector of pointers +; CHECK: Intrinsic has incorrect argument type! +define void @scatter6(<8 x float> %value, <8 x float> %ptrs, <8 x i1> %mask) { + call void @llvm.masked.scatter.v8f32.v8f32(<8 x float> %value, <8 x float> %ptrs, i32 4, <8 x i1> %mask) + ret void +} +declare void @llvm.masked.scatter.v8f32.v8f32(<8 x float>, <8 x float>, i32, <8 x i1>) + +; Value element type != vector of pointers element +; CHECK: Intrinsic has incorrect argument type! +define void @scatter7(<8 x float> %value, <8 x double*> %ptrs, <8 x i1> %mask) { + call void @llvm.masked.scatter.v8f32.v8p0f64(<8 x float> %value, <8 x double*> %ptrs, i32 4, <8 x i1> %mask) + ret void +} +declare void @llvm.masked.scatter.v8f32.v8p0f64(<8 x float>, <8 x double*>, i32, <8 x i1>) + +; Value length!= vector of pointers length +; CHECK: Intrinsic has incorrect argument type! +define void @scatter8(<8 x float> %value, <16 x float*> %ptrs, <8 x i1> %mask) { + call void @llvm.masked.scatter.v8f32.v16p0f32(<8 x float> %value, <16 x float*> %ptrs, i32 4, <8 x i1> %mask) + ret void +} +declare void @llvm.masked.scatter.v8f32.v16p0f32(<8 x float>, <16 x float*>, i32, <8 x i1>) + diff --git a/tools/llvm-pdbdump/C13DebugFragmentVisitor.cpp b/tools/llvm-pdbdump/C13DebugFragmentVisitor.cpp index 7c680ebb94cf..b38b36532a71 100644 --- a/tools/llvm-pdbdump/C13DebugFragmentVisitor.cpp +++ b/tools/llvm-pdbdump/C13DebugFragmentVisitor.cpp @@ -13,8 +13,8 @@ #include "llvm/DebugInfo/CodeView/ModuleDebugInlineeLinesFragment.h" #include "llvm/DebugInfo/CodeView/ModuleDebugLineFragment.h" #include "llvm/DebugInfo/PDB/Native/PDBFile.h" +#include "llvm/DebugInfo/PDB/Native/PDBStringTable.h" #include "llvm/DebugInfo/PDB/Native/RawError.h" -#include "llvm/DebugInfo/PDB/Native/StringTable.h" using namespace llvm; using namespace llvm::codeview; diff --git a/tools/llvm-pdbdump/Diff.cpp b/tools/llvm-pdbdump/Diff.cpp index 8c02d36044d8..418c2361ac32 100644 --- a/tools/llvm-pdbdump/Diff.cpp +++ b/tools/llvm-pdbdump/Diff.cpp @@ -15,8 +15,8 @@ #include "llvm/DebugInfo/PDB/Native/Formatters.h" #include "llvm/DebugInfo/PDB/Native/InfoStream.h" #include "llvm/DebugInfo/PDB/Native/PDBFile.h" +#include "llvm/DebugInfo/PDB/Native/PDBStringTable.h" #include "llvm/DebugInfo/PDB/Native/RawConstants.h" -#include "llvm/DebugInfo/PDB/Native/StringTable.h" #include "llvm/Support/FormatAdapters.h" #include "llvm/Support/FormatProviders.h" @@ -394,11 +394,17 @@ Error DiffStyle::diffStringTable() { StringRef S1, S2; if (I < IdList1.size()) { Id1 = IdList1[I]; - S1 = ST1.getStringForID(*Id1); + if (auto Result = ST1.getStringForID(*Id1)) + S1 = *Result; + else + return Result.takeError(); } if (I < IdList2.size()) { Id2 = IdList2[I]; - S2 = ST2.getStringForID(*Id2); + if (auto Result = ST2.getStringForID(*Id2)) + S2 = *Result; + else + return Result.takeError(); } if (Id1 == Id2 && S1 == S2) continue; @@ -418,10 +424,18 @@ Error DiffStyle::diffStringTable() { std::vector<StringRef> Strings1, Strings2; Strings1.reserve(IdList1.size()); Strings2.reserve(IdList2.size()); - for (auto ID : IdList1) - Strings1.push_back(ST1.getStringForID(ID)); - for (auto ID : IdList2) - Strings2.push_back(ST2.getStringForID(ID)); + for (auto ID : IdList1) { + auto S = ST1.getStringForID(ID); + if (!S) + return S.takeError(); + Strings1.push_back(*S); + } + for (auto ID : IdList2) { + auto S = ST2.getStringForID(ID); + if (!S) + return S.takeError(); + Strings2.push_back(*S); + } SmallVector<StringRef, 64> OnlyP; SmallVector<StringRef, 64> OnlyQ; diff --git a/tools/llvm-pdbdump/LLVMOutputStyle.cpp b/tools/llvm-pdbdump/LLVMOutputStyle.cpp index f3e28e0b08fc..ec1325ff2335 100644 --- a/tools/llvm-pdbdump/LLVMOutputStyle.cpp +++ b/tools/llvm-pdbdump/LLVMOutputStyle.cpp @@ -525,14 +525,17 @@ Error LLVMOutputStyle::dumpStringTable() { DictScope D(P, "String Table"); for (uint32_t I : IS->name_ids()) { - StringRef S = IS->getStringForID(I); - if (!S.empty()) { - llvm::SmallString<32> Str; - Str.append("'"); - Str.append(S); - Str.append("'"); - P.printString(Str); - } + auto ES = IS->getStringForID(I); + if (!ES) + return ES.takeError(); + + if (ES->empty()) + continue; + llvm::SmallString<32> Str; + Str.append("'"); + Str.append(*ES); + Str.append("'"); + P.printString(Str); } return Error::success(); } @@ -688,8 +691,11 @@ Error LLVMOutputStyle::dumpTpiStream(uint32_t StreamIdx) { const auto &ST = *ExpectedST; for (const auto &E : Tpi->getHashAdjusters()) { DictScope DHA(P); - StringRef Name = ST.getStringForID(E.first); - P.printString("Type", Name); + auto Name = ST.getStringForID(E.first); + if (!Name) + return Name.takeError(); + + P.printString("Type", *Name); P.printHex("TI", E.second); } } diff --git a/tools/llvm-pdbdump/YAMLOutputStyle.cpp b/tools/llvm-pdbdump/YAMLOutputStyle.cpp index 807d7f8b82e1..b94b5a4abf37 100644 --- a/tools/llvm-pdbdump/YAMLOutputStyle.cpp +++ b/tools/llvm-pdbdump/YAMLOutputStyle.cpp @@ -233,9 +233,12 @@ Error YAMLOutputStyle::dumpStringTable() { const auto &ST = ExpectedST.get(); for (auto ID : ST.name_ids()) { - StringRef S = ST.getStringForID(ID); - if (!S.empty()) - Obj.StringTable->push_back(S); + auto S = ST.getStringForID(ID); + if (!S) + return S.takeError(); + if (S->empty()) + continue; + Obj.StringTable->push_back(*S); } return Error::success(); } diff --git a/tools/llvm-pdbdump/llvm-pdbdump.cpp b/tools/llvm-pdbdump/llvm-pdbdump.cpp index 642e169613ba..4cdd87620c86 100644 --- a/tools/llvm-pdbdump/llvm-pdbdump.cpp +++ b/tools/llvm-pdbdump/llvm-pdbdump.cpp @@ -47,9 +47,9 @@ #include "llvm/DebugInfo/PDB/Native/NativeSession.h" #include "llvm/DebugInfo/PDB/Native/PDBFile.h" #include "llvm/DebugInfo/PDB/Native/PDBFileBuilder.h" +#include "llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h" #include "llvm/DebugInfo/PDB/Native/RawConstants.h" #include "llvm/DebugInfo/PDB/Native/RawError.h" -#include "llvm/DebugInfo/PDB/Native/StringTableBuilder.h" #include "llvm/DebugInfo/PDB/Native/TpiStream.h" #include "llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h" #include "llvm/DebugInfo/PDB/PDB.h" @@ -424,21 +424,6 @@ cl::list<std::string> InputFilename(cl::Positional, static ExitOnError ExitOnErr; -static uint32_t -getFileChecksumOffset(StringRef FileName, - ModuleDebugFileChecksumFragment &Checksums, - StringTableBuilder &Strings) { - // The offset in the line info record is the offset of the checksum - // entry for the corresponding file. That entry then contains an - // offset into the global string table of the file name. So to - // compute the proper offset to write into the line info record, we - // must first get its offset in the global string table, then ask the - // checksum builder to find the offset in its serialized buffer that - // it mapped that filename string table offset to. - uint32_t StringOffset = Strings.insert(FileName); - return Checksums.mapChecksumOffset(StringOffset); -} - static void yamlToPdb(StringRef Path) { BumpPtrAllocator Allocator; ErrorOr<std::unique_ptr<MemoryBuffer>> ErrorOrBuffer = @@ -490,6 +475,8 @@ static void yamlToPdb(StringRef Path) { for (auto F : Info.Features) InfoBuilder.addFeature(F); + auto &Strings = Builder.getStringTableBuilder().getStrings(); + const auto &Dbi = YamlObj.DbiStream.getValueOr(DefaultDbiStream); auto &DbiBuilder = Builder.getDbiBuilder(); DbiBuilder.setAge(Dbi.Age); @@ -516,35 +503,24 @@ static void yamlToPdb(StringRef Path) { // File Checksums must be emitted before line information, because line // info records use offsets into the checksum buffer to reference a file's // source file name. - auto Checksums = llvm::make_unique<ModuleDebugFileChecksumFragment>(); + auto Checksums = + llvm::make_unique<ModuleDebugFileChecksumFragment>(Strings); auto &ChecksumRef = *Checksums; if (!FLI.FileChecksums.empty()) { - auto &Strings = Builder.getStringTableBuilder(); - for (auto &FC : FLI.FileChecksums) { - uint32_t STOffset = Strings.insert(FC.FileName); - Checksums->addChecksum(STOffset, FC.Kind, FC.ChecksumBytes.Bytes); - } + for (auto &FC : FLI.FileChecksums) + Checksums->addChecksum(FC.FileName, FC.Kind, FC.ChecksumBytes.Bytes); } ModiBuilder.setC13FileChecksums(std::move(Checksums)); - // FIXME: StringTable / StringTableBuilder should really be in - // DebugInfoCodeView. This would allow us to construct the - // ModuleDebugLineFragment with a reference to the string table, - // and we could just pass strings around rather than having to - // remember how to calculate the right offset. - auto &Strings = Builder.getStringTableBuilder(); - for (const auto &Fragment : FLI.LineFragments) { - auto Lines = llvm::make_unique<ModuleDebugLineFragment>(); + auto Lines = + llvm::make_unique<ModuleDebugLineFragment>(ChecksumRef, Strings); Lines->setCodeSize(Fragment.CodeSize); Lines->setRelocationAddress(Fragment.RelocSegment, Fragment.RelocOffset); Lines->setFlags(Fragment.Flags); for (const auto &LC : Fragment.Blocks) { - uint32_t ChecksumOffset = - getFileChecksumOffset(LC.FileName, ChecksumRef, Strings); - - Lines->createBlock(ChecksumOffset); + Lines->createBlock(LC.FileName); if (Lines->hasColumnInfo()) { for (const auto &Item : zip(LC.Lines, LC.Columns)) { auto &L = std::get<0>(Item); @@ -567,18 +543,15 @@ static void yamlToPdb(StringRef Path) { for (const auto &Inlinee : FLI.Inlinees) { auto Inlinees = llvm::make_unique<ModuleDebugInlineeLineFragment>( - Inlinee.HasExtraFiles); + ChecksumRef, Inlinee.HasExtraFiles); for (const auto &Site : Inlinee.Sites) { - uint32_t FileOff = - getFileChecksumOffset(Site.FileName, ChecksumRef, Strings); - - Inlinees->addInlineSite(Site.Inlinee, FileOff, Site.SourceLineNum); + Inlinees->addInlineSite(Site.Inlinee, Site.FileName, + Site.SourceLineNum); if (!Inlinee.HasExtraFiles) continue; for (auto EF : Site.ExtraFiles) { - FileOff = getFileChecksumOffset(EF, ChecksumRef, Strings); - Inlinees->addExtraFile(FileOff); + Inlinees->addExtraFile(EF); } } ModiBuilder.addC13Fragment(std::move(Inlinees)); diff --git a/tools/llvm-readobj/COFFDumper.cpp b/tools/llvm-readobj/COFFDumper.cpp index a7088c1c7419..04386875b95a 100644 --- a/tools/llvm-readobj/COFFDumper.cpp +++ b/tools/llvm-readobj/COFFDumper.cpp @@ -29,6 +29,7 @@ #include "llvm/DebugInfo/CodeView/ModuleDebugInlineeLinesFragment.h" #include "llvm/DebugInfo/CodeView/ModuleDebugLineFragment.h" #include "llvm/DebugInfo/CodeView/RecordSerialization.h" +#include "llvm/DebugInfo/CodeView/StringTable.h" #include "llvm/DebugInfo/CodeView/SymbolDeserializer.h" #include "llvm/DebugInfo/CodeView/SymbolDumpDelegate.h" #include "llvm/DebugInfo/CodeView/SymbolDumper.h" @@ -124,7 +125,7 @@ private: StringRef SectionContents, StringRef Block); /// Given a .debug$S section, find the string table and file checksum table. - void initializeFileAndStringTables(StringRef Data); + void initializeFileAndStringTables(BinaryStreamReader &Reader); void cacheRelocations(); @@ -145,8 +146,12 @@ private: const llvm::object::COFFObjectFile *Obj; bool RelocCached = false; RelocMapTy RelocMap; - StringRef CVFileChecksumTable; - StringRef CVStringTable; + + BinaryByteStream ChecksumContents; + VarStreamArray<FileChecksumEntry> CVFileChecksumTable; + + BinaryByteStream StringTableContents; + StringTableRef CVStringTable; ScopedPrinter &Writer; TypeDatabase TypeDB; @@ -186,7 +191,7 @@ public: return CD.getFileNameForFileOffset(FileOffset); } - StringRef getStringTable() override { return CD.CVStringTable; } + StringTableRef getStringTable() override { return CD.CVStringTable; } private: COFFDumper &CD; @@ -725,30 +730,35 @@ void COFFDumper::printCodeViewDebugInfo() { } } -void COFFDumper::initializeFileAndStringTables(StringRef Data) { - while (!Data.empty() && (CVFileChecksumTable.data() == nullptr || - CVStringTable.data() == nullptr)) { +void COFFDumper::initializeFileAndStringTables(BinaryStreamReader &Reader) { + while (Reader.bytesRemaining() > 0 && + (!CVFileChecksumTable.valid() || !CVStringTable.valid())) { // The section consists of a number of subsection in the following format: // |SubSectionType|SubSectionSize|Contents...| uint32_t SubType, SubSectionSize; - error(consume(Data, SubType)); - error(consume(Data, SubSectionSize)); - if (SubSectionSize > Data.size()) - return error(object_error::parse_failed); + error(Reader.readInteger(SubType)); + error(Reader.readInteger(SubSectionSize)); + + StringRef Contents; + error(Reader.readFixedString(Contents, SubSectionSize)); + switch (ModuleDebugFragmentKind(SubType)) { - case ModuleDebugFragmentKind::FileChecksums: - CVFileChecksumTable = Data.substr(0, SubSectionSize); - break; - case ModuleDebugFragmentKind::StringTable: - CVStringTable = Data.substr(0, SubSectionSize); + case ModuleDebugFragmentKind::FileChecksums: { + ChecksumContents = BinaryByteStream(Contents, support::little); + BinaryStreamReader CSR(ChecksumContents); + error(CSR.readArray(CVFileChecksumTable, CSR.getLength())); break; + } + case ModuleDebugFragmentKind::StringTable: { + StringTableContents = BinaryByteStream(Contents, support::little); + error(CVStringTable.initialize(StringTableContents)); + } break; default: break; } + uint32_t PaddedSize = alignTo(SubSectionSize, 4); - if (PaddedSize > Data.size()) - error(object_error::parse_failed); - Data = Data.drop_front(PaddedSize); + error(Reader.skip(PaddedSize - SubSectionSize)); } } @@ -771,7 +781,9 @@ void COFFDumper::printCodeViewSymbolSection(StringRef SectionName, if (Magic != COFF::DEBUG_SECTION_MAGIC) return error(object_error::parse_failed); - initializeFileAndStringTables(Data); + BinaryByteStream FileAndStrings(Data, support::little); + BinaryStreamReader FSReader(FileAndStrings); + initializeFileAndStringTables(FSReader); // TODO: Convert this over to using ModuleSubstreamVisitor. while (!Data.empty()) { @@ -861,11 +873,7 @@ void COFFDumper::printCodeViewSymbolSection(StringRef SectionName, const FrameData *FD; error(SR.readObject(FD)); - if (FD->FrameFunc >= CVStringTable.size()) - error(object_error::parse_failed); - - StringRef FrameFunc = - CVStringTable.drop_front(FD->FrameFunc).split('\0').first; + StringRef FrameFunc = error(CVStringTable.getString(FD->FrameFunc)); DictScope S(W, "FrameData"); W.printHex("RvaStart", FD->RvaStart); @@ -971,10 +979,7 @@ void COFFDumper::printCodeViewFileChecksums(StringRef Subsection) { for (auto &FC : Checksums) { DictScope S(W, "FileChecksum"); - if (FC.FileNameOffset >= CVStringTable.size()) - error(object_error::parse_failed); - StringRef Filename = - CVStringTable.drop_front(FC.FileNameOffset).split('\0').first; + StringRef Filename = error(CVStringTable.getString(FC.FileNameOffset)); W.printHex("Filename", Filename, FC.FileNameOffset); W.printHex("ChecksumSize", FC.Checksum.size()); W.printEnum("ChecksumKind", uint8_t(FC.Kind), @@ -1008,23 +1013,16 @@ void COFFDumper::printCodeViewInlineeLines(StringRef Subsection) { StringRef COFFDumper::getFileNameForFileOffset(uint32_t FileOffset) { // The file checksum subsection should precede all references to it. - if (!CVFileChecksumTable.data() || !CVStringTable.data()) - error(object_error::parse_failed); - // Check if the file checksum table offset is valid. - if (FileOffset >= CVFileChecksumTable.size()) + if (!CVFileChecksumTable.valid() || !CVStringTable.valid()) error(object_error::parse_failed); - // The string table offset comes first before the file checksum. - StringRef Data = CVFileChecksumTable.drop_front(FileOffset); - uint32_t StringOffset; - error(consume(Data, StringOffset)); + auto Iter = CVFileChecksumTable.at(FileOffset); - // Check if the string table offset is valid. - if (StringOffset >= CVStringTable.size()) + // Check if the file checksum table offset is valid. + if (Iter == CVFileChecksumTable.end()) error(object_error::parse_failed); - // Return the null-terminated string. - return CVStringTable.drop_front(StringOffset).split('\0').first; + return error(CVStringTable.getString(Iter->FileNameOffset)); } void COFFDumper::printFileNameForOffset(StringRef Label, uint32_t FileOffset) { diff --git a/tools/llvm-readobj/llvm-readobj.h b/tools/llvm-readobj/llvm-readobj.h index 015692085e5e..840ddbabdc59 100644 --- a/tools/llvm-readobj/llvm-readobj.h +++ b/tools/llvm-readobj/llvm-readobj.h @@ -25,6 +25,11 @@ namespace llvm { LLVM_ATTRIBUTE_NORETURN void reportError(Twine Msg); void error(std::error_code EC); void error(llvm::Error EC); + template <typename T> T error(llvm::Expected<T> &&E) { + error(E.takeError()); + return std::move(*E); + } + template <class T> T unwrapOrError(ErrorOr<T> EO) { if (EO) return *EO; diff --git a/tools/llvm-shlib/CMakeLists.txt b/tools/llvm-shlib/CMakeLists.txt index 278158686296..3ebede00cc43 100644 --- a/tools/llvm-shlib/CMakeLists.txt +++ b/tools/llvm-shlib/CMakeLists.txt @@ -50,6 +50,10 @@ endif() target_link_libraries(LLVM PRIVATE ${LIB_NAMES}) +if (LLVM_DYLIB_SYMBOL_VERSIONING) + set_property(TARGET LLVM APPEND_STRING PROPERTY LINK_FLAGS " -Wl,--default-symver") +endif() + if (APPLE) set_property(TARGET LLVM APPEND_STRING PROPERTY LINK_FLAGS diff --git a/unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp b/unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp index 3f0e3dab72bd..a9d0d9ef4e69 100644 --- a/unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp +++ b/unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp @@ -10,6 +10,7 @@ #include "DwarfGenerator.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/Config/llvm-config.h" @@ -18,8 +19,8 @@ #include "llvm/DebugInfo/DWARF/DWARFDie.h" #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" #include "llvm/Object/ObjectFile.h" -#include "llvm/ObjectYAML/DWARFYAML.h" #include "llvm/ObjectYAML/DWARFEmitter.h" +#include "llvm/ObjectYAML/DWARFYAML.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/Error.h" #include "llvm/Support/MemoryBuffer.h" @@ -1191,10 +1192,7 @@ TEST(DWARFDebugInfo, TestEmptyChildren) { auto ErrOrSections = DWARFYAML::EmitDebugSections(StringRef(yamldata)); ASSERT_TRUE((bool)ErrOrSections); - - auto &DebugSections = *ErrOrSections; - - DWARFContextInMemory DwarfContext(DebugSections, 8); + DWARFContextInMemory DwarfContext(*ErrOrSections, 8); // Verify the number of compile units is correct. uint32_t NumCUs = DwarfContext.getNumCompileUnits(); @@ -1667,6 +1665,13 @@ TEST(DWARFDebugInfo, TestImplicitConstAbbrevs) { EXPECT_EQ(DIEs.find(Val2)->second, AbbrevPtrVal2); } +void VerifyError(DWARFContext &DwarfContext, StringRef Error) { + SmallString<1024> Str; + raw_svector_ostream Strm(Str); + EXPECT_FALSE(DwarfContext.verify(Strm, DIDT_All)); + EXPECT_TRUE(Str.str().contains(Error)); +} + TEST(DWARFDebugInfo, TestDwarfVerifyInvalidCURef) { // Create a single compile unit with a single function that has a DW_AT_type // that is CU relative. The CU offset is not valid becuase it is larger than @@ -1711,17 +1716,10 @@ TEST(DWARFDebugInfo, TestDwarfVerifyInvalidCURef) { )"; auto ErrOrSections = DWARFYAML::EmitDebugSections(StringRef(yamldata)); ASSERT_TRUE((bool)ErrOrSections); - - auto &DebugSections = *ErrOrSections; - - DWARFContextInMemory DwarfContext(DebugSections, 8); - - std::string str; - raw_string_ostream strm(str); - EXPECT_FALSE(DwarfContext.verify(strm, DIDT_All)); - const char *err = "error: DW_FORM_ref4 CU offset 0x00001234 is invalid " - "(must be less than CU size of 0x0000001a):"; - EXPECT_TRUE(strm.str().find(err) != std::string::npos); + DWARFContextInMemory DwarfContext(*ErrOrSections, 8); + VerifyError(DwarfContext, "error: DW_FORM_ref4 CU offset 0x00001234 is " + "invalid (must be less than CU size of " + "0x0000001a):"); } TEST(DWARFDebugInfo, TestDwarfVerifyInvalidRefAddr) { @@ -1766,17 +1764,9 @@ TEST(DWARFDebugInfo, TestDwarfVerifyInvalidRefAddr) { )"; auto ErrOrSections = DWARFYAML::EmitDebugSections(StringRef(yamldata)); ASSERT_TRUE((bool)ErrOrSections); - - auto &DebugSections = *ErrOrSections; - - DWARFContextInMemory DwarfContext(DebugSections, 8); - - std::string str; - raw_string_ostream strm(str); - EXPECT_FALSE(DwarfContext.verify(strm, DIDT_All)); - strm.flush(); - const char *err = "error: DW_FORM_ref_addr offset beyond .debug_info bounds:"; - EXPECT_TRUE(strm.str().find(err) != std::string::npos); + DWARFContextInMemory DwarfContext(*ErrOrSections, 8); + VerifyError(DwarfContext, + "error: DW_FORM_ref_addr offset beyond .debug_info bounds:"); } TEST(DWARFDebugInfo, TestDwarfVerifyInvalidRanges) { @@ -1810,18 +1800,9 @@ TEST(DWARFDebugInfo, TestDwarfVerifyInvalidRanges) { )"; auto ErrOrSections = DWARFYAML::EmitDebugSections(StringRef(yamldata)); ASSERT_TRUE((bool)ErrOrSections); - - auto &DebugSections = *ErrOrSections; - - DWARFContextInMemory DwarfContext(DebugSections, 8); - - std::string str; - raw_string_ostream strm(str); - EXPECT_FALSE(DwarfContext.verify(strm, DIDT_All)); - strm.flush(); - const char *err = "error: DW_AT_ranges offset is beyond .debug_ranges " - "bounds:"; - EXPECT_TRUE(strm.str().find(err) != std::string::npos); + DWARFContextInMemory DwarfContext(*ErrOrSections, 8); + VerifyError(DwarfContext, + "error: DW_AT_ranges offset is beyond .debug_ranges bounds:"); } TEST(DWARFDebugInfo, TestDwarfVerifyInvalidStmtList) { @@ -1855,18 +1836,10 @@ TEST(DWARFDebugInfo, TestDwarfVerifyInvalidStmtList) { )"; auto ErrOrSections = DWARFYAML::EmitDebugSections(StringRef(yamldata)); ASSERT_TRUE((bool)ErrOrSections); - - auto &DebugSections = *ErrOrSections; - - DWARFContextInMemory DwarfContext(DebugSections, 8); - - std::string str; - raw_string_ostream strm(str); - EXPECT_FALSE(DwarfContext.verify(strm, DIDT_All)); - strm.flush(); - const char *err = "error: DW_AT_stmt_list offset is beyond .debug_line " - "bounds: 0x00001000"; - EXPECT_TRUE(strm.str().find(err) != std::string::npos); + DWARFContextInMemory DwarfContext(*ErrOrSections, 8); + VerifyError( + DwarfContext, + "error: DW_AT_stmt_list offset is beyond .debug_line bounds: 0x00001000"); } TEST(DWARFDebugInfo, TestDwarfVerifyInvalidStrp) { @@ -1895,17 +1868,278 @@ TEST(DWARFDebugInfo, TestDwarfVerifyInvalidStrp) { )"; auto ErrOrSections = DWARFYAML::EmitDebugSections(StringRef(yamldata)); ASSERT_TRUE((bool)ErrOrSections); + DWARFContextInMemory DwarfContext(*ErrOrSections, 8); + VerifyError(DwarfContext, + "error: DW_FORM_strp offset beyond .debug_str bounds:"); +} + +TEST(DWARFDebugInfo, TestDwarfVerifyInvalidRefAddrBetween) { + // Create a single compile unit with a single function that has a DW_AT_type + // with a valid .debug_info offset, but the offset is between two DIEs. + const char *yamldata = R"( + debug_str: + - '' + - /tmp/main.c + - main + debug_abbrev: + - Code: 0x00000001 + Tag: DW_TAG_compile_unit + Children: DW_CHILDREN_yes + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Code: 0x00000002 + Tag: DW_TAG_subprogram + Children: DW_CHILDREN_no + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Attribute: DW_AT_type + Form: DW_FORM_ref_addr + debug_info: + - Length: + TotalLength: 22 + Version: 4 + AbbrOffset: 0 + AddrSize: 8 + Entries: + - AbbrCode: 0x00000001 + Values: + - Value: 0x0000000000000001 + - AbbrCode: 0x00000002 + Values: + - Value: 0x000000000000000D + - Value: 0x0000000000000011 + - AbbrCode: 0x00000000 + Values: + )"; + auto ErrOrSections = DWARFYAML::EmitDebugSections(StringRef(yamldata)); + ASSERT_TRUE((bool)ErrOrSections); + DWARFContextInMemory DwarfContext(*ErrOrSections, 8); + VerifyError( + DwarfContext, + "error: invalid DIE reference 0x00000011. Offset is in between DIEs:"); +} - auto &DebugSections = *ErrOrSections; +TEST(DWARFDebugInfo, TestDwarfVerifyInvalidLineSequence) { + // Create a single compile unit whose line table has a sequence in it where + // the address decreases. + StringRef yamldata = R"( + debug_str: + - '' + - /tmp/main.c + debug_abbrev: + - Code: 0x00000001 + Tag: DW_TAG_compile_unit + Children: DW_CHILDREN_no + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Attribute: DW_AT_stmt_list + Form: DW_FORM_sec_offset + debug_info: + - Length: + TotalLength: 16 + Version: 4 + AbbrOffset: 0 + AddrSize: 8 + Entries: + - AbbrCode: 0x00000001 + Values: + - Value: 0x0000000000000001 + - Value: 0x0000000000000000 + debug_line: + - Length: + TotalLength: 68 + Version: 2 + PrologueLength: 34 + MinInstLength: 1 + DefaultIsStmt: 1 + LineBase: 251 + LineRange: 14 + OpcodeBase: 13 + StandardOpcodeLengths: [ 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1 ] + IncludeDirs: + - /tmp + Files: + - Name: main.c + DirIdx: 1 + ModTime: 0 + Length: 0 + Opcodes: + - Opcode: DW_LNS_extended_op + ExtLen: 9 + SubOpcode: DW_LNE_set_address + Data: 4112 + - Opcode: DW_LNS_advance_line + SData: 9 + Data: 4112 + - Opcode: DW_LNS_copy + Data: 4112 + - Opcode: DW_LNS_advance_pc + Data: 18446744073709551600 + - Opcode: DW_LNS_extended_op + ExtLen: 1 + SubOpcode: DW_LNE_end_sequence + Data: 18446744073709551600 + )"; + auto ErrOrSections = DWARFYAML::EmitDebugSections(yamldata); + ASSERT_TRUE((bool)ErrOrSections); + DWARFContextInMemory DwarfContext(*ErrOrSections, 8); + VerifyError(DwarfContext, "error: .debug_line[0x00000000] row[1] decreases " + "in address from previous row:"); +} - DWARFContextInMemory DwarfContext(DebugSections, 8); +TEST(DWARFDebugInfo, TestDwarfVerifyInvalidLineFileIndex) { + // Create a single compile unit whose line table has a line table row with + // an invalid file index. + StringRef yamldata = R"( + debug_str: + - '' + - /tmp/main.c + debug_abbrev: + - Code: 0x00000001 + Tag: DW_TAG_compile_unit + Children: DW_CHILDREN_no + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Attribute: DW_AT_stmt_list + Form: DW_FORM_sec_offset + debug_info: + - Length: + TotalLength: 16 + Version: 4 + AbbrOffset: 0 + AddrSize: 8 + Entries: + - AbbrCode: 0x00000001 + Values: + - Value: 0x0000000000000001 + - Value: 0x0000000000000000 + debug_line: + - Length: + TotalLength: 61 + Version: 2 + PrologueLength: 34 + MinInstLength: 1 + DefaultIsStmt: 1 + LineBase: 251 + LineRange: 14 + OpcodeBase: 13 + StandardOpcodeLengths: [ 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1 ] + IncludeDirs: + - /tmp + Files: + - Name: main.c + DirIdx: 1 + ModTime: 0 + Length: 0 + Opcodes: + - Opcode: DW_LNS_extended_op + ExtLen: 9 + SubOpcode: DW_LNE_set_address + Data: 4096 + - Opcode: DW_LNS_advance_line + SData: 9 + Data: 4096 + - Opcode: DW_LNS_copy + Data: 4096 + - Opcode: DW_LNS_advance_pc + Data: 16 + - Opcode: DW_LNS_set_file + Data: 5 + - Opcode: DW_LNS_extended_op + ExtLen: 1 + SubOpcode: DW_LNE_end_sequence + Data: 5 + )"; + auto ErrOrSections = DWARFYAML::EmitDebugSections(yamldata); + ASSERT_TRUE((bool)ErrOrSections); + DWARFContextInMemory DwarfContext(*ErrOrSections, 8); + VerifyError(DwarfContext, "error: .debug_line[0x00000000][1] has invalid " + "file index 5 (valid values are [1,1]):"); +} - std::string str; - raw_string_ostream strm(str); - EXPECT_FALSE(DwarfContext.verify(strm, DIDT_All)); - strm.flush(); - const char *err = "error: DW_FORM_strp offset beyond .debug_str bounds:"; - EXPECT_TRUE(strm.str().find(err) != std::string::npos); +TEST(DWARFDebugInfo, TestDwarfVerifyCUDontShareLineTable) { + // Create a two compile units where both compile units share the same + // DW_AT_stmt_list value and verify we report the error correctly. + StringRef yamldata = R"( + debug_str: + - '' + - /tmp/main.c + - /tmp/foo.c + debug_abbrev: + - Code: 0x00000001 + Tag: DW_TAG_compile_unit + Children: DW_CHILDREN_no + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Attribute: DW_AT_stmt_list + Form: DW_FORM_sec_offset + debug_info: + - Length: + TotalLength: 16 + Version: 4 + AbbrOffset: 0 + AddrSize: 8 + Entries: + - AbbrCode: 0x00000001 + Values: + - Value: 0x0000000000000001 + - Value: 0x0000000000000000 + - Length: + TotalLength: 16 + Version: 4 + AbbrOffset: 0 + AddrSize: 8 + Entries: + - AbbrCode: 0x00000001 + Values: + - Value: 0x000000000000000D + - Value: 0x0000000000000000 + debug_line: + - Length: + TotalLength: 60 + Version: 2 + PrologueLength: 34 + MinInstLength: 1 + DefaultIsStmt: 1 + LineBase: 251 + LineRange: 14 + OpcodeBase: 13 + StandardOpcodeLengths: [ 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1 ] + IncludeDirs: + - /tmp + Files: + - Name: main.c + DirIdx: 1 + ModTime: 0 + Length: 0 + Opcodes: + - Opcode: DW_LNS_extended_op + ExtLen: 9 + SubOpcode: DW_LNE_set_address + Data: 4096 + - Opcode: DW_LNS_advance_line + SData: 9 + Data: 4096 + - Opcode: DW_LNS_copy + Data: 4096 + - Opcode: DW_LNS_advance_pc + Data: 256 + - Opcode: DW_LNS_extended_op + ExtLen: 1 + SubOpcode: DW_LNE_end_sequence + Data: 256 + )"; + auto ErrOrSections = DWARFYAML::EmitDebugSections(yamldata); + ASSERT_TRUE((bool)ErrOrSections); + DWARFContextInMemory DwarfContext(*ErrOrSections, 8); + VerifyError(DwarfContext, "error: two compile unit DIEs, 0x0000000b and " + "0x0000001f, have the same DW_AT_stmt_list section " + "offset:"); } } // end anonymous namespace diff --git a/unittests/DebugInfo/PDB/ErrorChecking.h b/unittests/DebugInfo/PDB/ErrorChecking.h index 6d4a7de7834a..f284bfd8bb7a 100644 --- a/unittests/DebugInfo/PDB/ErrorChecking.h +++ b/unittests/DebugInfo/PDB/ErrorChecking.h @@ -36,6 +36,18 @@ } \ } +#define EXPECT_EXPECTED_EQ(Val, Exp) \ + { \ + auto Result = Exp; \ + auto E = Result.takeError(); \ + EXPECT_FALSE(static_cast<bool>(E)); \ + if (E) { \ + consumeError(std::move(E)); \ + return; \ + } \ + EXPECT_EQ(Val, *Result); \ + } + #define EXPECT_UNEXPECTED(Exp) \ { \ auto E = Exp.takeError(); \ diff --git a/unittests/DebugInfo/PDB/StringTableBuilderTest.cpp b/unittests/DebugInfo/PDB/StringTableBuilderTest.cpp index 7c4838778e43..249bc4a03b87 100644 --- a/unittests/DebugInfo/PDB/StringTableBuilderTest.cpp +++ b/unittests/DebugInfo/PDB/StringTableBuilderTest.cpp @@ -9,8 +9,8 @@ #include "ErrorChecking.h" -#include "llvm/DebugInfo/PDB/Native/StringTable.h" -#include "llvm/DebugInfo/PDB/Native/StringTableBuilder.h" +#include "llvm/DebugInfo/PDB/Native/PDBStringTable.h" +#include "llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h" #include "llvm/Support/BinaryByteStream.h" #include "llvm/Support/BinaryStreamReader.h" #include "llvm/Support/BinaryStreamWriter.h" @@ -27,13 +27,13 @@ class StringTableBuilderTest : public ::testing::Test {}; TEST_F(StringTableBuilderTest, Simple) { // Create /names table contents. - StringTableBuilder Builder; + PDBStringTableBuilder Builder; EXPECT_EQ(1U, Builder.insert("foo")); EXPECT_EQ(5U, Builder.insert("bar")); EXPECT_EQ(1U, Builder.insert("foo")); EXPECT_EQ(9U, Builder.insert("baz")); - std::vector<uint8_t> Buffer(Builder.finalize()); + std::vector<uint8_t> Buffer(Builder.calculateSerializedSize()); MutableBinaryByteStream OutStream(Buffer, little); BinaryStreamWriter Writer(OutStream); EXPECT_NO_ERROR(Builder.commit(Writer)); @@ -41,15 +41,16 @@ TEST_F(StringTableBuilderTest, Simple) { // Reads the contents back. BinaryByteStream InStream(Buffer, little); BinaryStreamReader Reader(InStream); - StringTable Table; - EXPECT_NO_ERROR(Table.load(Reader)); + PDBStringTable Table; + EXPECT_NO_ERROR(Table.reload(Reader)); EXPECT_EQ(3U, Table.getNameCount()); EXPECT_EQ(1U, Table.getHashVersion()); - EXPECT_EQ("foo", Table.getStringForID(1)); - EXPECT_EQ("bar", Table.getStringForID(5)); - EXPECT_EQ("baz", Table.getStringForID(9)); - EXPECT_EQ(1U, Table.getIDForString("foo")); - EXPECT_EQ(5U, Table.getIDForString("bar")); - EXPECT_EQ(9U, Table.getIDForString("baz")); + + EXPECT_EXPECTED_EQ("foo", Table.getStringForID(1)); + EXPECT_EXPECTED_EQ("bar", Table.getStringForID(5)); + EXPECT_EXPECTED_EQ("baz", Table.getStringForID(9)); + EXPECT_EXPECTED_EQ(1U, Table.getIDForString("foo")); + EXPECT_EXPECTED_EQ(5U, Table.getIDForString("bar")); + EXPECT_EXPECTED_EQ(9U, Table.getIDForString("baz")); } diff --git a/unittests/ExecutionEngine/Orc/IndirectionUtilsTest.cpp b/unittests/ExecutionEngine/Orc/IndirectionUtilsTest.cpp index 4af3aa707a90..ab43c4af13f1 100644 --- a/unittests/ExecutionEngine/Orc/IndirectionUtilsTest.cpp +++ b/unittests/ExecutionEngine/Orc/IndirectionUtilsTest.cpp @@ -20,17 +20,15 @@ TEST(IndirectionUtilsTest, MakeStub) { LLVMContext Context; ModuleBuilder MB(Context, "x86_64-apple-macosx10.10", ""); Function *F = MB.createFunctionDecl<void(DummyStruct, DummyStruct)>(""); - SmallVector<AttributeList, 4> Attrs; - Attrs.push_back( - AttributeList::get(MB.getModule()->getContext(), 1U, - AttrBuilder().addAttribute(Attribute::StructRet))); - Attrs.push_back( - AttributeList::get(MB.getModule()->getContext(), 2U, - AttrBuilder().addAttribute(Attribute::ByVal))); - Attrs.push_back( - AttributeList::get(MB.getModule()->getContext(), ~0U, - AttrBuilder().addAttribute(Attribute::NoUnwind))); - F->setAttributes(AttributeList::get(MB.getModule()->getContext(), Attrs)); + AttributeSet FnAttrs = AttributeSet::get( + Context, AttrBuilder().addAttribute(Attribute::NoUnwind)); + AttributeSet RetAttrs; // None + AttributeSet ArgAttrs[2] = { + AttributeSet::get(Context, + AttrBuilder().addAttribute(Attribute::StructRet)), + AttributeSet::get(Context, AttrBuilder().addAttribute(Attribute::ByVal)), + }; + F->setAttributes(AttributeList::get(Context, FnAttrs, RetAttrs, ArgAttrs)); auto ImplPtr = orc::createImplPointer(*F->getType(), *MB.getModule(), "", nullptr); orc::makeStub(*F, *ImplPtr); diff --git a/unittests/IR/AttributesTest.cpp b/unittests/IR/AttributesTest.cpp index 7c3df2e19e8f..0df7a847f8a5 100644 --- a/unittests/IR/AttributesTest.cpp +++ b/unittests/IR/AttributesTest.cpp @@ -45,7 +45,7 @@ TEST(Attributes, Ordering) { AttributeList::get(C, 1, Attribute::SExt)}; AttributeList SetA = AttributeList::get(C, ASs); - AttributeList SetB = SetA.removeAttributes(C, 1, ASs[1]); + AttributeList SetB = SetA.removeAttributes(C, 1, ASs[1].getAttributes(1)); EXPECT_NE(SetA, SetB); } diff --git a/unittests/Support/BinaryStreamTest.cpp b/unittests/Support/BinaryStreamTest.cpp index 74c51e382d99..41567dad6226 100644 --- a/unittests/Support/BinaryStreamTest.cpp +++ b/unittests/Support/BinaryStreamTest.cpp @@ -358,14 +358,14 @@ TEST_F(BinaryStreamTest, VarStreamArray) { struct StringExtractor { public: - typedef uint32_t ContextType; + typedef uint32_t &ContextType; static Error extract(BinaryStreamRef Stream, uint32_t &Len, StringRef &Item, - uint32_t *Index) { - if (*Index == 0) + uint32_t &Index) { + if (Index == 0) Len = strlen("1. Test"); - else if (*Index == 1) + else if (Index == 1) Len = strlen("2. Longer Test"); - else if (*Index == 2) + else if (Index == 2) Len = strlen("3. Really Long Test"); else Len = strlen("4. Super Extra Longest Test Of All"); @@ -374,14 +374,14 @@ TEST_F(BinaryStreamTest, VarStreamArray) { return EC; Item = StringRef(reinterpret_cast<const char *>(Bytes.data()), Bytes.size()); - ++(*Index); + ++Index; return Error::success(); } }; for (auto &Stream : Streams) { uint32_t Context = 0; - VarStreamArray<StringRef, StringExtractor> Array(*Stream.Input, &Context); + VarStreamArray<StringRef, StringExtractor> Array(*Stream.Input, Context); auto Iter = Array.begin(); ASSERT_EQ("1. Test", *Iter++); ASSERT_EQ("2. Longer Test", *Iter++); diff --git a/utils/TableGen/IntrinsicEmitter.cpp b/utils/TableGen/IntrinsicEmitter.cpp index 1fc18a5dd1d8..caa52d28f771 100644 --- a/utils/TableGen/IntrinsicEmitter.cpp +++ b/utils/TableGen/IntrinsicEmitter.cpp @@ -211,13 +211,12 @@ enum IIT_Info { IIT_SAME_VEC_WIDTH_ARG = 31, IIT_PTR_TO_ARG = 32, IIT_PTR_TO_ELT = 33, - IIT_VEC_OF_PTRS_TO_ELT = 34, + IIT_VEC_OF_ANYPTRS_TO_ELT = 34, IIT_I128 = 35, IIT_V512 = 36, IIT_V1024 = 37 }; - static void EncodeFixedValueType(MVT::SimpleValueType VT, std::vector<unsigned char> &Sig) { if (MVT(VT).isInteger()) { @@ -273,9 +272,16 @@ static void EncodeFixedType(Record *R, std::vector<unsigned char> &ArgCodes, } else if (R->isSubClassOf("LLVMPointerTo")) Sig.push_back(IIT_PTR_TO_ARG); - else if (R->isSubClassOf("LLVMVectorOfPointersToElt")) - Sig.push_back(IIT_VEC_OF_PTRS_TO_ELT); - else if (R->isSubClassOf("LLVMPointerToElt")) + else if (R->isSubClassOf("LLVMVectorOfAnyPointersToElt")) { + Sig.push_back(IIT_VEC_OF_ANYPTRS_TO_ELT); + unsigned ArgNo = ArgCodes.size(); + ArgCodes.push_back(3 /*vAny*/); + // Encode overloaded ArgNo + Sig.push_back(ArgNo); + // Encode LLVMMatchType<Number> ArgNo + Sig.push_back(Number); + return; + } else if (R->isSubClassOf("LLVMPointerToElt")) Sig.push_back(IIT_PTR_TO_ELT); else Sig.push_back(IIT_ARG); @@ -557,8 +563,9 @@ void IntrinsicEmitter::EmitAttributes(const CodeGenIntrinsicTable &Ints, if (ae) { while (ai != ae) { unsigned argNo = intrinsic.ArgumentAttributes[ai].first; + unsigned attrIdx = argNo + 1; // Must match AttributeList::FirstArgIndex - OS << " const Attribute::AttrKind AttrParam" << argNo + 1 <<"[]= {"; + OS << " const Attribute::AttrKind AttrParam" << attrIdx << "[]= {"; bool addComma = false; do { @@ -599,7 +606,7 @@ void IntrinsicEmitter::EmitAttributes(const CodeGenIntrinsicTable &Ints, } while (ai != ae && intrinsic.ArgumentAttributes[ai].first == argNo); OS << "};\n"; OS << " AS[" << numAttrs++ << "] = AttributeList::get(C, " - << argNo + 1 << ", AttrParam" << argNo + 1 << ");\n"; + << attrIdx << ", AttrParam" << attrIdx << ");\n"; } } diff --git a/utils/lit/lit/main.py b/utils/lit/lit/main.py index 689a2d55bcea..10cd7775060f 100755 --- a/utils/lit/lit/main.py +++ b/utils/lit/lit/main.py @@ -161,7 +161,11 @@ def main(builtinParameters = {}): main_with_tmp(builtinParameters) finally: if lit_tmp: - shutil.rmtree(lit_tmp) + try: + shutil.rmtree(lit_tmp) + except: + # FIXME: Re-try after timeout on Windows. + pass def main_with_tmp(builtinParameters): parser = argparse.ArgumentParser() diff --git a/utils/lit/lit/run.py b/utils/lit/lit/run.py index 14d8ec98490e..27c7a9e59f8b 100644 --- a/utils/lit/lit/run.py +++ b/utils/lit/lit/run.py @@ -20,6 +20,14 @@ except ImportError: import lit.Test +def abort_now(): + """Abort the current process without doing any exception teardown""" + sys.stdout.flush() + if win32api: + win32api.TerminateProcess(win32api.GetCurrentProcess(), 3) + else: + os.kill(0, 9) + ### # Test Execution Implementation @@ -91,8 +99,7 @@ class Tester(object): # This is a sad hack. Unfortunately subprocess goes # bonkers with ctrl-c and we start forking merrily. print('\nCtrl-C detected, goodbye.') - sys.stdout.flush() - os.kill(0,9) + abort_now() self.consumer.update(test_index, test) class ThreadResultsConsumer(object): @@ -353,7 +360,7 @@ class Run(object): print('\nCtrl-C detected, terminating.') pool.terminate() pool.join() - os.kill(0,9) + abort_now() return True win32api.SetConsoleCtrlHandler(console_ctrl_handler, True) @@ -368,6 +375,10 @@ class Run(object): deadline = time.time() + max_time # Start a process pool. Copy over the data shared between all test runs. + # FIXME: Find a way to capture the worker process stderr. If the user + # interrupts the workers before we make it into our task callback, they + # will each raise a KeyboardInterrupt exception and print to stderr at + # the same time. pool = multiprocessing.Pool(jobs, worker_initializer, (self.lit_config, self.parallelism_semaphores)) @@ -379,6 +390,7 @@ class Run(object): args=(test_index, test), callback=self.consume_test_result) for test_index, test in enumerate(self.tests)] + pool.close() # Wait for all results to come in. The callback that runs in the # parent process will update the display. @@ -395,10 +407,12 @@ class Run(object): a.get() # Exceptions raised here come from the worker. if self.hit_max_failures: break - finally: + except: # Stop the workers and wait for any straggling results to come in # if we exited without waiting on every async result. pool.terminate() + raise + finally: pool.join() # Mark any tests that weren't run as UNRESOLVED. @@ -463,11 +477,7 @@ def worker_run_one_test(test_index, test): execute_test(test, child_lit_config, child_parallelism_semaphores) return (test_index, test) except KeyboardInterrupt as e: - # This is a sad hack. Unfortunately subprocess goes - # bonkers with ctrl-c and we start forking merrily. - print('\nCtrl-C detected, goodbye.') - traceback.print_exc() - sys.stdout.flush() - os.kill(0,9) + # If a worker process gets an interrupt, abort it immediately. + abort_now() except: traceback.print_exc() |