diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2022-07-24 15:11:41 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2023-02-08 19:04:38 +0000 |
commit | fcaf7f8644a9988098ac6be2165bce3ea4786e91 (patch) | |
tree | 08a554363df16b968a623d651c09d82a5a0b1c65 /contrib/llvm-project/llvm | |
parent | 753f127f3ace09432b2baeffd71a308760641a62 (diff) | |
parent | 4b4fe385e49bd883fd183b5f21c1ea486c722e61 (diff) | |
download | src-fcaf7f8644a9988098ac6be2165bce3ea4786e91.tar.gz src-fcaf7f8644a9988098ac6be2165bce3ea4786e91.zip |
Merge llvm-project main llvmorg-15-init-17485-ga3e38b4a206b
This updates llvm, clang, compiler-rt, libc++, libunwind, lld, lldb and
openmp to llvmorg-15-init-17485-ga3e38b4a206b.
PR: 265425
MFC after: 2 weeks
Diffstat (limited to 'contrib/llvm-project/llvm')
557 files changed, 14255 insertions, 5910 deletions
diff --git a/contrib/llvm-project/llvm/include/llvm/ADT/APInt.h b/contrib/llvm-project/llvm/include/llvm/ADT/APInt.h index 4155cb260a2a..5bdc1541f630 100644 --- a/contrib/llvm-project/llvm/include/llvm/ADT/APInt.h +++ b/contrib/llvm-project/llvm/include/llvm/ADT/APInt.h @@ -147,7 +147,7 @@ public: APInt(unsigned numBits, StringRef str, uint8_t radix); /// Default constructor that creates an APInt with a 1-bit zero value. - explicit APInt() : BitWidth(1) { U.VAL = 0; } + explicit APInt() { U.VAL = 0; } /// Copy Constructor. APInt(const APInt &that) : BitWidth(that.BitWidth) { @@ -1824,7 +1824,7 @@ private: uint64_t *pVal; ///< Used to store the >64 bits integer value. } U; - unsigned BitWidth; ///< The number of bits in this APInt. + unsigned BitWidth = 1; ///< The number of bits in this APInt. friend struct DenseMapInfo<APInt, void>; friend class APSInt; diff --git a/contrib/llvm-project/llvm/include/llvm/ADT/APSInt.h b/contrib/llvm-project/llvm/include/llvm/ADT/APSInt.h index 7b6af436f577..727d95ed8c1c 100644 --- a/contrib/llvm-project/llvm/include/llvm/ADT/APSInt.h +++ b/contrib/llvm-project/llvm/include/llvm/ADT/APSInt.h @@ -21,11 +21,11 @@ namespace llvm { /// An arbitrary precision integer that knows its signedness. class LLVM_NODISCARD APSInt : public APInt { - bool IsUnsigned; + bool IsUnsigned = false; public: /// Default constructor that creates an uninitialized APInt. - explicit APSInt() : IsUnsigned(false) {} + explicit APSInt() = default; /// Create an APSInt with the specified width, default to unsigned. explicit APSInt(uint32_t BitWidth, bool isUnsigned = true) diff --git a/contrib/llvm-project/llvm/include/llvm/ADT/AddressRanges.h b/contrib/llvm-project/llvm/include/llvm/ADT/AddressRanges.h index 1953680d5222..c02844a095d1 100644 --- a/contrib/llvm-project/llvm/include/llvm/ADT/AddressRanges.h +++ b/contrib/llvm-project/llvm/include/llvm/ADT/AddressRanges.h @@ -10,9 +10,10 @@ #define LLVM_ADT_ADDRESSRANGES_H #include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include <cassert> #include <stdint.h> -#include <vector> namespace llvm { @@ -47,20 +48,29 @@ private: /// The AddressRanges class helps normalize address range collections. /// This class keeps a sorted vector of AddressRange objects and can perform /// insertions and searches efficiently. The address ranges are always sorted -/// and never contain any invalid or empty address ranges. Intersecting +/// and never contain any invalid or empty address ranges. +/// Intersecting([100,200), [150,300)) and adjacent([100,200), [200,300)) /// address ranges are combined during insertion. class AddressRanges { protected: - using Collection = std::vector<AddressRange>; + using Collection = SmallVector<AddressRange>; Collection Ranges; public: void clear() { Ranges.clear(); } bool empty() const { return Ranges.empty(); } - bool contains(uint64_t Addr) const; - bool contains(AddressRange Range) const; - Optional<AddressRange> getRangeThatContains(uint64_t Addr) const; - void insert(AddressRange Range); + bool contains(uint64_t Addr) const { return find(Addr) != Ranges.end(); } + bool contains(AddressRange Range) const { + return find(Range) != Ranges.end(); + } + Optional<AddressRange> getRangeThatContains(uint64_t Addr) const { + Collection::const_iterator It = find(Addr); + if (It == Ranges.end()) + return None; + + return *It; + } + Collection::const_iterator insert(AddressRange Range); void reserve(size_t Capacity) { Ranges.reserve(Capacity); } size_t size() const { return Ranges.size(); } bool operator==(const AddressRanges &RHS) const { @@ -72,6 +82,64 @@ public: } Collection::const_iterator begin() const { return Ranges.begin(); } Collection::const_iterator end() const { return Ranges.end(); } + +protected: + Collection::const_iterator find(uint64_t Addr) const; + Collection::const_iterator find(AddressRange Range) const; +}; + +/// AddressRangesMap class maps values to the address ranges. +/// It keeps address ranges and corresponding values. If ranges +/// are combined during insertion, then combined range keeps +/// newly inserted value. +template <typename T> class AddressRangesMap : protected AddressRanges { +public: + void clear() { + Ranges.clear(); + Values.clear(); + } + bool empty() const { return AddressRanges::empty(); } + bool contains(uint64_t Addr) const { return AddressRanges::contains(Addr); } + bool contains(AddressRange Range) const { + return AddressRanges::contains(Range); + } + void insert(AddressRange Range, T Value) { + size_t InputSize = Ranges.size(); + Collection::const_iterator RangesIt = AddressRanges::insert(Range); + if (RangesIt == Ranges.end()) + return; + + // make Values match to Ranges. + size_t Idx = RangesIt - Ranges.begin(); + typename ValuesCollection::iterator ValuesIt = Values.begin() + Idx; + if (InputSize < Ranges.size()) + Values.insert(ValuesIt, T()); + else if (InputSize > Ranges.size()) + Values.erase(ValuesIt, ValuesIt + InputSize - Ranges.size()); + assert(Ranges.size() == Values.size()); + + // set value to the inserted or combined range. + Values[Idx] = Value; + } + size_t size() const { + assert(Ranges.size() == Values.size()); + return AddressRanges::size(); + } + Optional<std::pair<AddressRange, T>> + getRangeValueThatContains(uint64_t Addr) const { + Collection::const_iterator It = find(Addr); + if (It == Ranges.end()) + return None; + + return std::make_pair(*It, Values[It - Ranges.begin()]); + } + std::pair<AddressRange, T> operator[](size_t Idx) const { + return std::make_pair(Ranges[Idx], Values[Idx]); + } + +protected: + using ValuesCollection = SmallVector<T>; + ValuesCollection Values; }; } // namespace llvm diff --git a/contrib/llvm-project/llvm/include/llvm/ADT/BitVector.h b/contrib/llvm-project/llvm/include/llvm/ADT/BitVector.h index 9540b3985963..2ba485777816 100644 --- a/contrib/llvm-project/llvm/include/llvm/ADT/BitVector.h +++ b/contrib/llvm-project/llvm/include/llvm/ADT/BitVector.h @@ -83,7 +83,7 @@ class BitVector { using Storage = SmallVector<BitWord>; Storage Bits; // Actual bits. - unsigned Size; // Size of bitvector in bits. + unsigned Size = 0; // Size of bitvector in bits. public: using size_type = unsigned; @@ -135,7 +135,7 @@ public: } /// BitVector default ctor - Creates an empty bitvector. - BitVector() : Size(0) {} + BitVector() = default; /// BitVector ctor - Creates a bitvector of specified number of bits. All /// bits are initialized to the specified value. diff --git a/contrib/llvm-project/llvm/include/llvm/ADT/EpochTracker.h b/contrib/llvm-project/llvm/include/llvm/ADT/EpochTracker.h index b46989bc5111..a639d1b5b3ec 100644 --- a/contrib/llvm-project/llvm/include/llvm/ADT/EpochTracker.h +++ b/contrib/llvm-project/llvm/include/llvm/ADT/EpochTracker.h @@ -56,11 +56,11 @@ public: /// make an iterator-invalidating modification. /// class HandleBase { - const uint64_t *EpochAddress; - uint64_t EpochAtCreation; + const uint64_t *EpochAddress = nullptr; + uint64_t EpochAtCreation = UINT64_MAX; public: - HandleBase() : EpochAddress(nullptr), EpochAtCreation(UINT64_MAX) {} + HandleBase() = default; explicit HandleBase(const DebugEpochBase *Parent) : EpochAddress(&Parent->Epoch), EpochAtCreation(Parent->Epoch) {} diff --git a/contrib/llvm-project/llvm/include/llvm/ADT/IntEqClasses.h b/contrib/llvm-project/llvm/include/llvm/ADT/IntEqClasses.h index 84bb58cb736c..9ee8a46be411 100644 --- a/contrib/llvm-project/llvm/include/llvm/ADT/IntEqClasses.h +++ b/contrib/llvm-project/llvm/include/llvm/ADT/IntEqClasses.h @@ -35,11 +35,11 @@ class IntEqClasses { /// NumClasses - The number of equivalence classes when compressed, or 0 when /// uncompressed. - unsigned NumClasses; + unsigned NumClasses = 0; public: /// IntEqClasses - Create an equivalence class mapping for 0 .. N-1. - IntEqClasses(unsigned N = 0) : NumClasses(0) { grow(N); } + IntEqClasses(unsigned N = 0) { grow(N); } /// grow - Increase capacity to hold 0 .. N-1, putting new integers in unique /// equivalence classes. diff --git a/contrib/llvm-project/llvm/include/llvm/ADT/Triple.h b/contrib/llvm-project/llvm/include/llvm/ADT/Triple.h index 9d85a28fbf04..ba4584dc60fa 100644 --- a/contrib/llvm-project/llvm/include/llvm/ADT/Triple.h +++ b/contrib/llvm-project/llvm/include/llvm/ADT/Triple.h @@ -283,22 +283,22 @@ private: std::string Data; /// The parsed arch type. - ArchType Arch; + ArchType Arch{}; /// The parsed subarchitecture type. - SubArchType SubArch; + SubArchType SubArch{}; /// The parsed vendor type. - VendorType Vendor; + VendorType Vendor{}; /// The parsed OS type. - OSType OS; + OSType OS{}; /// The parsed Environment type. - EnvironmentType Environment; + EnvironmentType Environment{}; /// The object format type. - ObjectFormatType ObjectFormat; + ObjectFormatType ObjectFormat{}; public: /// @name Constructors @@ -306,7 +306,7 @@ public: /// Default constructor is the same as an empty string and leaves all /// triple fields unknown. - Triple() : Arch(), SubArch(), Vendor(), OS(), Environment(), ObjectFormat() {} + Triple() = default; explicit Triple(const Twine &Str); Triple(const Twine &ArchStr, const Twine &VendorStr, const Twine &OSStr); diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h b/contrib/llvm-project/llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h index a0f5331fdba5..cd32979b9ea5 100644 --- a/contrib/llvm-project/llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h +++ b/contrib/llvm-project/llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h @@ -20,7 +20,6 @@ #include "llvm/IR/PassManager.h" namespace llvm { -class DominatorTree; class Function; class LoopInfo; diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/IVDescriptors.h b/contrib/llvm-project/llvm/include/llvm/Analysis/IVDescriptors.h index 231d3bbf534b..a23b64ef20cc 100644 --- a/contrib/llvm-project/llvm/include/llvm/Analysis/IVDescriptors.h +++ b/contrib/llvm-project/llvm/include/llvm/Analysis/IVDescriptors.h @@ -215,9 +215,6 @@ public: /// Returns true if the recurrence kind is a floating point kind. static bool isFloatingPointRecurrenceKind(RecurKind Kind); - /// Returns true if the recurrence kind is an arithmetic kind. - static bool isArithmeticRecurrenceKind(RecurKind Kind); - /// Returns true if the recurrence kind is an integer min/max kind. static bool isIntMinMaxRecurrenceKind(RecurKind Kind) { return Kind == RecurKind::UMin || Kind == RecurKind::UMax || diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/LoopCacheAnalysis.h b/contrib/llvm-project/llvm/include/llvm/Analysis/LoopCacheAnalysis.h index 4c5083f3c980..a323cacdbcdc 100644 --- a/contrib/llvm-project/llvm/include/llvm/Analysis/LoopCacheAnalysis.h +++ b/contrib/llvm-project/llvm/include/llvm/Analysis/LoopCacheAnalysis.h @@ -108,8 +108,9 @@ private: /// Return true if the indexed reference is 'consecutive' in loop \p L. /// An indexed reference is 'consecutive' if the only coefficient that uses /// the loop induction variable is the rightmost one, and the access stride is - /// smaller than the cache line size \p CLS. - bool isConsecutive(const Loop &L, unsigned CLS) const; + /// smaller than the cache line size \p CLS. Provide a valid \p Stride value + /// if the indexed reference is 'consecutive'. + bool isConsecutive(const Loop &L, const SCEV *&Stride, unsigned CLS) const; /// Retrieve the index of the subscript corresponding to the given loop \p /// L. Return a zero-based positive index if the subscript index is diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/LoopInfo.h b/contrib/llvm-project/llvm/include/llvm/Analysis/LoopInfo.h index 9351b83ad747..5a4f8f143093 100644 --- a/contrib/llvm-project/llvm/include/llvm/Analysis/LoopInfo.h +++ b/contrib/llvm-project/llvm/include/llvm/Analysis/LoopInfo.h @@ -814,12 +814,15 @@ public: /// by one each time through the loop. bool isCanonical(ScalarEvolution &SE) const; - /// Return true if the Loop is in LCSSA form. - bool isLCSSAForm(const DominatorTree &DT) const; - - /// Return true if this Loop and all inner subloops are in LCSSA form. - bool isRecursivelyLCSSAForm(const DominatorTree &DT, - const LoopInfo &LI) const; + /// Return true if the Loop is in LCSSA form. If \p IgnoreTokens is set to + /// true, token values defined inside loop are allowed to violate LCSSA form. + bool isLCSSAForm(const DominatorTree &DT, bool IgnoreTokens = true) const; + + /// Return true if this Loop and all inner subloops are in LCSSA form. If \p + /// IgnoreTokens is set to true, token values defined inside loop are allowed + /// to violate LCSSA form. + bool isRecursivelyLCSSAForm(const DominatorTree &DT, const LoopInfo &LI, + bool IgnoreTokens = true) const; /// Return true if the Loop is in the form that the LoopSimplify form /// transforms loops to, which is sometimes called normal form. diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/MemoryBuiltins.h b/contrib/llvm-project/llvm/include/llvm/Analysis/MemoryBuiltins.h index 7ad83612880f..422f63db749f 100644 --- a/contrib/llvm-project/llvm/include/llvm/Analysis/MemoryBuiltins.h +++ b/contrib/llvm-project/llvm/include/llvm/Analysis/MemoryBuiltins.h @@ -65,14 +65,13 @@ bool isMallocOrCallocLikeFn(const Value *V, const TargetLibraryInfo *TLI); /// allocates memory (either malloc, calloc, or strdup like). bool isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI); -/// Tests if a value is a call or invoke to a library function that -/// reallocates memory (e.g., realloc). -bool isReallocLikeFn(const Value *V, const TargetLibraryInfo *TLI); - /// Tests if a function is a call or invoke to a library function that /// reallocates memory (e.g., realloc). bool isReallocLikeFn(const Function *F, const TargetLibraryInfo *TLI); +/// If this is a call to a realloc function, return the reallocated operand. +Value *getReallocatedOperand(const CallBase *CB, const TargetLibraryInfo *TLI); + //===----------------------------------------------------------------------===// // free Call Utility Functions. // @@ -80,26 +79,23 @@ bool isReallocLikeFn(const Function *F, const TargetLibraryInfo *TLI); /// isLibFreeFunction - Returns true if the function is a builtin free() bool isLibFreeFunction(const Function *F, const LibFunc TLIFn); -/// isFreeCall - Returns non-null if the value is a call to the builtin free() -const CallInst *isFreeCall(const Value *I, const TargetLibraryInfo *TLI); - -inline CallInst *isFreeCall(Value *I, const TargetLibraryInfo *TLI) { - return const_cast<CallInst*>(isFreeCall((const Value*)I, TLI)); -} +/// If this if a call to a free function, return the freed operand. +Value *getFreedOperand(const CallBase *CB, const TargetLibraryInfo *TLI); //===----------------------------------------------------------------------===// // Properties of allocation functions // -/// Return false if the allocation can have side effects on the program state -/// we are required to preserve beyond the effect of allocating a new object. +/// Return true if this is a call to an allocation function that does not have +/// side effects that we are required to preserve beyond the effect of +/// allocating a new object. /// Ex: If our allocation routine has a counter for the number of objects /// allocated, and the program prints it on exit, can the value change due /// to optimization? Answer is highly language dependent. /// Note: *Removable* really does mean removable; it does not mean observable. /// A language (e.g. C++) can allow removing allocations without allowing /// insertion or speculative execution of allocation routines. -bool isAllocRemovable(const CallBase *V, const TargetLibraryInfo *TLI); +bool isRemovableAlloc(const CallBase *V, const TargetLibraryInfo *TLI); /// Gets the alignment argument for an aligned_alloc-like function, using either /// built-in knowledge based on fuction names/signatures or allocalign @@ -107,13 +103,16 @@ bool isAllocRemovable(const CallBase *V, const TargetLibraryInfo *TLI); /// the definition of the allocalign attribute. Value *getAllocAlignment(const CallBase *V, const TargetLibraryInfo *TLI); -/// Return the size of the requested allocation. With a trivial mapper, this is -/// identical to calling getObjectSize(..., Exact). A mapper function can be -/// used to replace one Value* (operand to the allocation) with another. This -/// is useful when doing abstract interpretation. -Optional<APInt> getAllocSize(const CallBase *CB, - const TargetLibraryInfo *TLI, - std::function<const Value*(const Value*)> Mapper); +/// Return the size of the requested allocation. With a trivial mapper, this is +/// similar to calling getObjectSize(..., Exact), but without looking through +/// calls that return their argument. A mapper function can be used to replace +/// one Value* (operand to the allocation) with another. This is useful when +/// doing abstract interpretation. +Optional<APInt> getAllocSize( + const CallBase *CB, const TargetLibraryInfo *TLI, + function_ref<const Value *(const Value *)> Mapper = [](const Value *V) { + return V; + }); /// If this is a call to an allocation function that initializes memory to a /// fixed value, return said value in the requested type. Otherwise, return diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/MemoryProfileInfo.h b/contrib/llvm-project/llvm/include/llvm/Analysis/MemoryProfileInfo.h new file mode 100644 index 000000000000..1b12e78eaeba --- /dev/null +++ b/contrib/llvm-project/llvm/include/llvm/Analysis/MemoryProfileInfo.h @@ -0,0 +1,112 @@ +//===- llvm/Analysis/MemoryProfileInfo.h - memory profile info ---*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains utilities to analyze memory profile information. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_MEMORYPROFILEINFO_H +#define LLVM_ANALYSIS_MEMORYPROFILEINFO_H + +#include "llvm/IR/Constants.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" +#include <map> + +namespace llvm { +namespace memprof { + +// Allocation type assigned to an allocation reached by a given context. +// More can be added but initially this is just noncold and cold. +// Values should be powers of two so that they can be ORed, in particular to +// track allocations that have different behavior with different calling +// contexts. +enum class AllocationType : uint8_t { None = 0, NotCold = 1, Cold = 2 }; + +/// Return the allocation type for a given set of memory profile values. +AllocationType getAllocType(uint64_t MaxAccessCount, uint64_t MinSize, + uint64_t MinLifetime); + +/// Build callstack metadata from the provided list of call stack ids. Returns +/// the resulting metadata node. +MDNode *buildCallstackMetadata(ArrayRef<uint64_t> CallStack, LLVMContext &Ctx); + +/// Returns the stack node from an MIB metadata node. +MDNode *getMIBStackNode(const MDNode *MIB); + +/// Returns the allocation type from an MIB metadata node. +AllocationType getMIBAllocType(const MDNode *MIB); + +/// Class to build a trie of call stack contexts for a particular profiled +/// allocation call, along with their associated allocation types. +/// The allocation will be at the root of the trie, which is then used to +/// compute the minimum lists of context ids needed to associate a call context +/// with a single allocation type. +class CallStackTrie { +private: + struct CallStackTrieNode { + // Allocation types for call context sharing the context prefix at this + // node. + uint8_t AllocTypes; + // Map of caller stack id to the corresponding child Trie node. + std::map<uint64_t, CallStackTrieNode *> Callers; + CallStackTrieNode(AllocationType Type) + : AllocTypes(static_cast<uint8_t>(Type)) {} + }; + + // The node for the allocation at the root. + CallStackTrieNode *Alloc; + // The allocation's leaf stack id. + uint64_t AllocStackId; + + void deleteTrieNode(CallStackTrieNode *Node) { + if (!Node) + return; + for (auto C : Node->Callers) + deleteTrieNode(C.second); + delete Node; + } + + // Recursive helper to trim contexts and create metadata nodes. + bool buildMIBNodes(CallStackTrieNode *Node, LLVMContext &Ctx, + std::vector<uint64_t> &MIBCallStack, + std::vector<Metadata *> &MIBNodes, + bool CalleeHasAmbiguousCallerContext); + +public: + CallStackTrie() : Alloc(nullptr), AllocStackId(0) {} + ~CallStackTrie() { deleteTrieNode(Alloc); } + + bool empty() const { return Alloc == nullptr; } + + /// Add a call stack context with the given allocation type to the Trie. + /// The context is represented by the list of stack ids (computed during + /// matching via a debug location hash), expected to be in order from the + /// allocation call down to the bottom of the call stack (i.e. callee to + /// caller order). + void addCallStack(AllocationType AllocType, ArrayRef<uint64_t> StackIds); + + /// Add the call stack context along with its allocation type from the MIB + /// metadata to the Trie. + void addCallStack(MDNode *MIB); + + /// Build and attach the minimal necessary MIB metadata. If the alloc has a + /// single allocation type, add a function attribute instead. The reason for + /// adding an attribute in this case is that it matches how the behavior for + /// allocation calls will be communicated to lib call simplification after + /// cloning or another optimization to distinguish the allocation types, + /// which is lower overhead and more direct than maintaining this metadata. + /// Returns true if memprof metadata attached, false if not (attribute added). + bool buildAndAttachMIBMetadata(CallBase *CI); +}; + +} // end namespace memprof +} // end namespace llvm + +#endif diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/ScalarEvolution.h b/contrib/llvm-project/llvm/include/llvm/Analysis/ScalarEvolution.h index 5b49ab14286b..fd00c744840b 100644 --- a/contrib/llvm-project/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/contrib/llvm-project/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -1374,11 +1374,11 @@ private: /// Expression indicating the least constant maximum backedge-taken count of /// the loop that is known, or a SCEVCouldNotCompute. This expression is /// only valid if the redicates associated with all loop exits are true. - const SCEV *ConstantMax; + const SCEV *ConstantMax = nullptr; /// Indicating if \c ExitNotTaken has an element for every exiting block in /// the loop. - bool IsComplete; + bool IsComplete = false; /// Expression indicating the least maximum backedge-taken count of the loop /// that is known, or a SCEVCouldNotCompute. Lazily computed on first query. @@ -1391,7 +1391,7 @@ private: const SCEV *getConstantMax() const { return ConstantMax; } public: - BackedgeTakenInfo() : ConstantMax(nullptr), IsComplete(false) {} + BackedgeTakenInfo() = default; BackedgeTakenInfo(BackedgeTakenInfo &&) = default; BackedgeTakenInfo &operator=(BackedgeTakenInfo &&) = default; diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/SparsePropagation.h b/contrib/llvm-project/llvm/include/llvm/Analysis/SparsePropagation.h index 428238c5fa0b..d5805a731475 100644 --- a/contrib/llvm-project/llvm/include/llvm/Analysis/SparsePropagation.h +++ b/contrib/llvm-project/llvm/include/llvm/Analysis/SparsePropagation.h @@ -331,8 +331,8 @@ void SparseSolver<LatticeKey, LatticeVal, KeyInfo>::getFeasibleSuccessors( return; } - if (TI.isExceptionalTerminator() || - TI.isIndirectTerminator()) { + if (!isa<SwitchInst>(TI)) { + // Unknown termintor, assume all successors are feasible. Succs.assign(Succs.size(), true); return; } diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/TargetTransformInfo.h b/contrib/llvm-project/llvm/include/llvm/Analysis/TargetTransformInfo.h index c64cb51cc08e..da4410fcac14 100644 --- a/contrib/llvm-project/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/contrib/llvm-project/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -52,6 +52,7 @@ class LoadInst; class LoopAccessInfo; class Loop; class LoopInfo; +class LoopVectorizationLegality; class ProfileSummaryInfo; class RecurrenceDescriptor; class SCEV; @@ -129,7 +130,8 @@ class IntrinsicCostAttributes { public: IntrinsicCostAttributes( Intrinsic::ID Id, const CallBase &CI, - InstructionCost ScalarCost = InstructionCost::getInvalid()); + InstructionCost ScalarCost = InstructionCost::getInvalid(), + bool TypeBasedOnly = false); IntrinsicCostAttributes( Intrinsic::ID Id, Type *RTy, ArrayRef<Type *> Tys, @@ -530,7 +532,7 @@ public: bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *TLI, DominatorTree *DT, - const LoopAccessInfo *LAI) const; + LoopVectorizationLegality *LVL) const; /// Query the target whether lowering of the llvm.get.active.lane.mask /// intrinsic is supported and how the mask should be used. A return value @@ -1555,10 +1557,12 @@ public: AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) = 0; - virtual bool - preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, - AssumptionCache &AC, TargetLibraryInfo *TLI, - DominatorTree *DT, const LoopAccessInfo *LAI) = 0; + virtual bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, + ScalarEvolution &SE, + AssumptionCache &AC, + TargetLibraryInfo *TLI, + DominatorTree *DT, + LoopVectorizationLegality *LVL) = 0; virtual PredicationStyle emitGetActiveLaneMask() = 0; virtual Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) = 0; @@ -1935,8 +1939,8 @@ public: bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *TLI, DominatorTree *DT, - const LoopAccessInfo *LAI) override { - return Impl.preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LAI); + LoopVectorizationLegality *LVL) override { + return Impl.preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LVL); } PredicationStyle emitGetActiveLaneMask() override { return Impl.emitGetActiveLaneMask(); diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/contrib/llvm-project/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index af71fc9bffaf..1a75cb35549e 100644 --- a/contrib/llvm-project/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/contrib/llvm-project/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -163,7 +163,7 @@ public: bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *TLI, DominatorTree *DT, - const LoopAccessInfo *LAI) const { + LoopVectorizationLegality *LVL) const { return false; } diff --git a/contrib/llvm-project/llvm/include/llvm/BinaryFormat/XCOFF.h b/contrib/llvm-project/llvm/include/llvm/BinaryFormat/XCOFF.h index 5d23ec5cd911..51e377293e95 100644 --- a/contrib/llvm-project/llvm/include/llvm/BinaryFormat/XCOFF.h +++ b/contrib/llvm-project/llvm/include/llvm/BinaryFormat/XCOFF.h @@ -31,6 +31,7 @@ constexpr size_t FileHeaderSize32 = 20; constexpr size_t FileHeaderSize64 = 24; constexpr size_t AuxFileHeaderSize32 = 72; constexpr size_t AuxFileHeaderSize64 = 110; +constexpr size_t AuxFileHeaderSizeShort = 28; constexpr size_t SectionHeaderSize32 = 40; constexpr size_t SectionHeaderSize64 = 72; constexpr size_t SymbolTableEntrySize = 18; diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/BasicBlockSectionUtils.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/BasicBlockSectionUtils.h index d8da3be0cd4c..d43f399b2c31 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/BasicBlockSectionUtils.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/BasicBlockSectionUtils.h @@ -25,6 +25,8 @@ using MachineBasicBlockComparator = void sortBasicBlocksAndUpdateBranches(MachineFunction &MF, MachineBasicBlockComparator MBBCmp); +void avoidZeroOffsetLandingPad(MachineFunction &MF); + } // end namespace llvm #endif // LLVM_CODEGEN_BASICBLOCKSECTIONUTILS_H diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h index 7ae1304cced9..557339548581 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_ANALYSIS_BASICBLOCKSECTIONSINFO_H -#define LLVM_ANALYSIS_BASICBLOCKSECTIONSINFO_H +#ifndef LLVM_CODEGEN_BASICBLOCKSECTIONSPROFILEREADER_H +#define LLVM_CODEGEN_BASICBLOCKSECTIONSPROFILEREADER_H #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallSet.h" @@ -106,4 +106,4 @@ ImmutablePass * createBasicBlockSectionsProfileReaderPass(const MemoryBuffer *Buf); } // namespace llvm -#endif // LLVM_ANALYSIS_BASICBLOCKSECTIONSINFO_H +#endif // LLVM_CODEGEN_BASICBLOCKSECTIONSPROFILEREADER_H diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/BasicTTIImpl.h index b5b766ff03f1..c35a9e878613 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -603,8 +603,8 @@ public: bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *TLI, DominatorTree *DT, - const LoopAccessInfo *LAI) { - return BaseT::preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LAI); + LoopVectorizationLegality *LVL) { + return BaseT::preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LVL); } PredicationStyle emitGetActiveLaneMask() { diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/CallingConvLower.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/CallingConvLower.h index 90afbfc32a4e..5c3776e972c0 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/CallingConvLower.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/CallingConvLower.h @@ -435,8 +435,8 @@ public: /// Note that, unlike AllocateReg, this shadows ALL of the shadow registers. unsigned AllocateStack(unsigned Size, Align Alignment, ArrayRef<MCPhysReg> ShadowRegs) { - for (unsigned i = 0; i < ShadowRegs.size(); ++i) - MarkAllocated(ShadowRegs[i]); + for (MCPhysReg Reg : ShadowRegs) + MarkAllocated(Reg); return AllocateStack(Size, Alignment); } diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h index 5e7428a5edc5..f7fafdc57401 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h @@ -569,6 +569,7 @@ private: /// Current optimization remark emitter. Used to report failures. std::unique_ptr<OptimizationRemarkEmitter> ORE; + AAResults *AA; FunctionLoweringInfo FuncInfo; // True when either the Target Machine specifies no optimizations or the diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/Utils.h index 78f1b49da822..31f3d5d84186 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/Utils.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/Utils.h @@ -48,7 +48,6 @@ class TargetRegisterInfo; class TargetRegisterClass; class ConstantFP; class APFloat; -class MachineIRBuilder; // Convenience macros for dealing with vector reduction opcodes. #define GISEL_VECREDUCE_CASES_ALL \ diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/ISDOpcodes.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/ISDOpcodes.h index 14bbcd24d04d..e90730140406 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -1292,6 +1292,12 @@ enum NodeType { // Outputs: output chain, glue STACKMAP, + // The `llvm.experimental.patchpoint.*` intrinsic. + // Operands: input chain, [glue], reg-mask, <id>, <numShadowBytes>, callee, + // <numArgs>, cc, ... + // Outputs: [rv], output chain, glue + PATCHPOINT, + // Vector Predication #define BEGIN_REGISTER_VP_SDNODE(VPSDID, ...) VPSDID, #include "llvm/IR/VPIntrinsics.def" diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/LiveIntervals.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/LiveIntervals.h index b832eaa37305..b26aa773c9ea 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/LiveIntervals.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/LiveIntervals.h @@ -55,8 +55,7 @@ class VirtRegMap; MachineFunction* MF; MachineRegisterInfo* MRI; const TargetRegisterInfo* TRI; - const TargetInstrInfo* TII; - AAResults *AA; + const TargetInstrInfo *TII; SlotIndexes* Indexes; MachineDominatorTree *DomTree = nullptr; LiveIntervalCalc *LICalc = nullptr; @@ -212,10 +211,6 @@ class VirtRegMap; return Indexes; } - AAResults *getAliasAnalysis() const { - return AA; - } - /// Returns true if the specified machine instr has been removed or was /// never entered in the map. bool isNotInMIMap(const MachineInstr &Instr) const { diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/LiveRangeEdit.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/LiveRangeEdit.h index c6efa7b30d71..3b61563cb598 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/LiveRangeEdit.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/LiveRangeEdit.h @@ -32,7 +32,6 @@ namespace llvm { -class AAResults; class LiveIntervals; class MachineInstr; class MachineOperand; @@ -93,7 +92,7 @@ private: SmallPtrSet<const VNInfo *, 4> Rematted; /// scanRemattable - Identify the Parent values that may rematerialize. - void scanRemattable(AAResults *aa); + void scanRemattable(); /// foldAsLoad - If LI has a single use and a single def that can be folded as /// a load, eliminate the register by folding the def into the use. @@ -103,8 +102,7 @@ private: SmallPtrSet<LiveInterval *, 8>>; /// Helper for eliminateDeadDefs. - void eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink, - AAResults *AA); + void eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink); /// MachineRegisterInfo callback to notify when new virtual /// registers are created. @@ -184,12 +182,11 @@ public: /// anyRematerializable - Return true if any parent values may be /// rematerializable. /// This function must be called before any rematerialization is attempted. - bool anyRematerializable(AAResults *); + bool anyRematerializable(); /// checkRematerializable - Manually add VNI to the list of rematerializable /// values if DefMI may be rematerializable. - bool checkRematerializable(VNInfo *VNI, const MachineInstr *DefMI, - AAResults *); + bool checkRematerializable(VNInfo *VNI, const MachineInstr *DefMI); /// Remat - Information needed to rematerialize at a specific location. struct Remat { @@ -242,8 +239,7 @@ public: /// allocator. These registers should not be split into new intervals /// as currently those new intervals are not guaranteed to spill. void eliminateDeadDefs(SmallVectorImpl<MachineInstr *> &Dead, - ArrayRef<Register> RegsBeingSpilled = None, - AAResults *AA = nullptr); + ArrayRef<Register> RegsBeingSpilled = None); /// calculateRegClassAndHint - Recompute register class and hint for each new /// register. diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/LiveVariables.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/LiveVariables.h index aa198527415d..03a0517d2642 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/LiveVariables.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/LiveVariables.h @@ -219,8 +219,7 @@ public: return false; bool Removed = false; - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI.getOperand(i); + for (MachineOperand &MO : MI.operands()) { if (MO.isReg() && MO.isKill() && MO.getReg() == Reg) { MO.setIsKill(false); Removed = true; @@ -255,8 +254,7 @@ public: return false; bool Removed = false; - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI.getOperand(i); + for (MachineOperand &MO : MI.operands()) { if (MO.isReg() && MO.isDef() && MO.getReg() == Reg) { MO.setIsDead(false); Removed = true; diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineInstr.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineInstr.h index acc4c9a24c01..5f483a8d0312 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineInstr.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineInstr.h @@ -572,12 +572,9 @@ public: /// Returns true if the instruction has implicit definition. bool hasImplicitDef() const { - for (unsigned I = getNumExplicitOperands(), E = getNumOperands(); - I != E; ++I) { - const MachineOperand &MO = getOperand(I); + for (const MachineOperand &MO : implicit_operands()) if (MO.isDef() && MO.isImplicit()) return true; - } return false; } @@ -1620,7 +1617,7 @@ public: /// argument area of a function (if it does not change). If the instruction /// does multiple loads, this returns true only if all of the loads are /// dereferenceable and invariant. - bool isDereferenceableInvariantLoad(AAResults *AA) const; + bool isDereferenceableInvariantLoad() const; /// If the specified instruction is a PHI that always merges together the /// same virtual register, return the register, otherwise return 0. diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineModuleInfo.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineModuleInfo.h index cdd0073749d3..61240924e5e1 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineModuleInfo.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineModuleInfo.h @@ -41,7 +41,6 @@ namespace llvm { -class BasicBlock; class Function; class LLVMTargetMachine; class MachineFunction; diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachinePipeliner.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachinePipeliner.h index 4559f7a9bde7..fc1cc0a879ca 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachinePipeliner.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachinePipeliner.h @@ -333,9 +333,9 @@ public: NodeSet() = default; NodeSet(iterator S, iterator E) : Nodes(S, E), HasRecurrence(true) { Latency = 0; - for (unsigned i = 0, e = Nodes.size(); i < e; ++i) { + for (const SUnit *Node : Nodes) { DenseMap<SUnit *, unsigned> SuccSUnitLatency; - for (const SDep &Succ : Nodes[i]->Succs) { + for (const SDep &Succ : Node->Succs) { auto SuccSUnit = Succ.getSUnit(); if (!Nodes.count(SuccSUnit)) continue; diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/Passes.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/Passes.h index 6e37d42f0d29..9822f8013e91 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/Passes.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/Passes.h @@ -26,7 +26,6 @@ namespace llvm { class FunctionPass; class MachineFunction; class MachineFunctionPass; -class MemoryBuffer; class ModulePass; class Pass; class TargetMachine; diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/RegisterScavenging.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/RegisterScavenging.h index 1f0cd273bf61..52797afbd848 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/RegisterScavenging.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/RegisterScavenging.h @@ -146,9 +146,8 @@ public: /// Query whether a frame index is a scavenging frame index. bool isScavengingFrameIndex(int FI) const { - for (SmallVectorImpl<ScavengedInfo>::const_iterator I = Scavenged.begin(), - IE = Scavenged.end(); I != IE; ++I) - if (I->FrameIndex == FI) + for (const ScavengedInfo &SI : Scavenged) + if (SI.FrameIndex == FI) return true; return false; @@ -156,10 +155,9 @@ public: /// Get an array of scavenging frame indices. void getScavengingFrameIndices(SmallVectorImpl<int> &A) const { - for (SmallVectorImpl<ScavengedInfo>::const_iterator I = Scavenged.begin(), - IE = Scavenged.end(); I != IE; ++I) - if (I->FrameIndex >= 0) - A.push_back(I->FrameIndex); + for (const ScavengedInfo &I : Scavenged) + if (I.FrameIndex >= 0) + A.push_back(I.FrameIndex); } /// Make a register of the specific register class diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/ScheduleDAG.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/ScheduleDAG.h index f1c377f76d02..2fe2aabe833e 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/ScheduleDAG.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/ScheduleDAG.h @@ -525,9 +525,8 @@ class TargetRegisterInfo; virtual void push(SUnit *U) = 0; void push_all(const std::vector<SUnit *> &Nodes) { - for (std::vector<SUnit *>::const_iterator I = Nodes.begin(), - E = Nodes.end(); I != E; ++I) - push(*I); + for (SUnit *SU : Nodes) + push(SU); } virtual SUnit *pop() = 0; diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/SelectionDAG.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/SelectionDAG.h index bcbd7ebcc0c9..1169e0116ec8 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -1043,13 +1043,15 @@ public: bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, - const AAMDNodes &AAInfo = AAMDNodes()); + const AAMDNodes &AAInfo = AAMDNodes(), + AAResults *AA = nullptr); SDValue getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, - const AAMDNodes &AAInfo = AAMDNodes()); + const AAMDNodes &AAInfo = AAMDNodes(), + AAResults *AA = nullptr); SDValue getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/SelectionDAGISel.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/SelectionDAGISel.h index 87df6d1b1604..e152503f9e1a 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/SelectionDAGISel.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/SelectionDAGISel.h @@ -321,7 +321,11 @@ private: void Select_FREEZE(SDNode *N); void Select_ARITH_FENCE(SDNode *N); + + void pushStackMapLiveVariable(SmallVectorImpl<SDValue> &Ops, SDValue Operand, + SDLoc DL); void Select_STACKMAP(SDNode *N); + void Select_PATCHPOINT(SDNode *N); private: void DoInstructionSelection(); diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetInstrInfo.h index f9183e0a9c66..a1c9061baee6 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -121,12 +121,11 @@ public: /// This means the only allowed uses are constants and unallocatable physical /// registers so that the instructions result is independent of the place /// in the function. - bool isTriviallyReMaterializable(const MachineInstr &MI, - AAResults *AA = nullptr) const { + bool isTriviallyReMaterializable(const MachineInstr &MI) const { return MI.getOpcode() == TargetOpcode::IMPLICIT_DEF || (MI.getDesc().isRematerializable() && - (isReallyTriviallyReMaterializable(MI, AA) || - isReallyTriviallyReMaterializableGeneric(MI, AA))); + (isReallyTriviallyReMaterializable(MI) || + isReallyTriviallyReMaterializableGeneric(MI))); } /// Given \p MO is a PhysReg use return if it can be ignored for the purpose @@ -143,8 +142,7 @@ protected: /// than producing a value, or if it requres any address registers that are /// not always available. /// Requirements must be check as stated in isTriviallyReMaterializable() . - virtual bool isReallyTriviallyReMaterializable(const MachineInstr &MI, - AAResults *AA) const { + virtual bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const { return false; } @@ -186,8 +184,7 @@ private: /// set and the target hook isReallyTriviallyReMaterializable returns false, /// this function does target-independent tests to determine if the /// instruction is really trivially rematerializable. - bool isReallyTriviallyReMaterializableGeneric(const MachineInstr &MI, - AAResults *AA) const; + bool isReallyTriviallyReMaterializableGeneric(const MachineInstr &MI) const; public: /// These methods return the opcode of the frame setup/destroy instructions diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetLowering.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetLowering.h index ab5d3ba0164d..1bb2a8e50c07 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/TargetLowering.h @@ -3874,7 +3874,7 @@ public: virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; /// Return true if it is profitable to move this shift by a constant amount - /// though its operand, adjusting any immediate operands as necessary to + /// through its operand, adjusting any immediate operands as necessary to /// preserve semantics. This transformation may not be desirable if it /// disrupts a particularly auspicious target-specific tree (e.g. bitfield /// extraction in AArch64). By default, it returns true. @@ -3886,6 +3886,14 @@ public: return true; } + /// Return true if it is profitable to combine an XOR of a logical shift + /// to create a logical shift of NOT. This transformation may not be desirable + /// if it disrupts a particularly auspicious target-specific tree (e.g. + /// BIC on ARM/AArch64). By default, it returns true. + virtual bool isDesirableToCommuteXorWithShift(const SDNode *N) const { + return true; + } + /// Return true if the target has native support for the specified value type /// and it is 'desirable' to use the type for the given node type. e.g. On x86 /// i16 is legal, but undesirable since i16 instruction encodings are longer diff --git a/contrib/llvm-project/llvm/include/llvm/DWARFLinker/DWARFLinker.h b/contrib/llvm-project/llvm/include/llvm/DWARFLinker/DWARFLinker.h index b2b2e2e873be..3961100e00e1 100644 --- a/contrib/llvm-project/llvm/include/llvm/DWARFLinker/DWARFLinker.h +++ b/contrib/llvm-project/llvm/include/llvm/DWARFLinker/DWARFLinker.h @@ -9,6 +9,7 @@ #ifndef LLVM_DWARFLINKER_DWARFLINKER_H #define LLVM_DWARFLINKER_DWARFLINKER_H +#include "llvm/ADT/AddressRanges.h" #include "llvm/CodeGen/AccelTable.h" #include "llvm/CodeGen/NonRelocatableStringpool.h" #include "llvm/DWARFLinker/DWARFLinkerCompileUnit.h" @@ -37,25 +38,6 @@ enum class DwarfLinkerAccelTableKind : uint8_t { Pub, ///< .debug_pubnames, .debug_pubtypes }; -/// Partial address range. Besides an offset, only the -/// HighPC is stored. The structure is stored in a map where the LowPC is the -/// key. -struct ObjFileAddressRange { - /// Function HighPC. - uint64_t HighPC; - /// Offset to apply to the linked address. - /// should be 0 for not-linked object file. - int64_t Offset; - - ObjFileAddressRange(uint64_t EndPC, int64_t Offset) - : HighPC(EndPC), Offset(Offset) {} - - ObjFileAddressRange() : HighPC(0), Offset(0) {} -}; - -/// Map LowPC to ObjFileAddressRange. -using RangesTy = std::map<uint64_t, ObjFileAddressRange>; - /// AddressesMap represents information about valid addresses used /// by debug information. Valid addresses are those which points to /// live code sections. i.e. relocations for these addresses point @@ -142,7 +124,7 @@ public: /// original \p Entries. virtual void emitRangesEntries( int64_t UnitPcOffset, uint64_t OrigLowPc, - const FunctionIntervals::const_iterator &FuncRange, + Optional<std::pair<AddressRange, int64_t>> FuncRange, const std::vector<DWARFDebugRangeList::RangeListEntry> &Entries, unsigned AddressSize) = 0; diff --git a/contrib/llvm-project/llvm/include/llvm/DWARFLinker/DWARFLinkerCompileUnit.h b/contrib/llvm-project/llvm/include/llvm/DWARFLinker/DWARFLinkerCompileUnit.h index 788275782235..05e291c05132 100644 --- a/contrib/llvm-project/llvm/include/llvm/DWARFLinker/DWARFLinkerCompileUnit.h +++ b/contrib/llvm-project/llvm/include/llvm/DWARFLinker/DWARFLinkerCompileUnit.h @@ -9,8 +9,8 @@ #ifndef LLVM_DWARFLINKER_DWARFLINKERCOMPILEUNIT_H #define LLVM_DWARFLINKER_DWARFLINKERCOMPILEUNIT_H +#include "llvm/ADT/AddressRanges.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/IntervalMap.h" #include "llvm/CodeGen/DIE.h" #include "llvm/DebugInfo/DWARF/DWARFUnit.h" @@ -18,12 +18,9 @@ namespace llvm { class DeclContext; -template <typename KeyT, typename ValT> -using HalfOpenIntervalMap = - IntervalMap<KeyT, ValT, IntervalMapImpl::NodeSizer<KeyT, ValT>::LeafSize, - IntervalMapHalfOpenInfo<KeyT>>; - -using FunctionIntervals = HalfOpenIntervalMap<uint64_t, int64_t>; +/// Mapped value in the address map is the offset to apply to the +/// linked address. +using RangesTy = AddressRangesMap<int64_t>; // FIXME: Delete this structure. struct PatchLocation { @@ -84,8 +81,7 @@ public: CompileUnit(DWARFUnit &OrigUnit, unsigned ID, bool CanUseODR, StringRef ClangModuleName) - : OrigUnit(OrigUnit), ID(ID), Ranges(RangeAlloc), - ClangModuleName(ClangModuleName) { + : OrigUnit(OrigUnit), ID(ID), ClangModuleName(ClangModuleName) { Info.resize(OrigUnit.getNumDIEs()); auto CUDie = OrigUnit.getUnitDIE(false); @@ -143,7 +139,7 @@ public: return UnitRangeAttribute; } - const FunctionIntervals &getFunctionRanges() const { return Ranges; } + const RangesTy &getFunctionRanges() const { return Ranges; } const std::vector<PatchLocation> &getRangesAttributes() const { return RangeAttributes; @@ -266,12 +262,10 @@ private: std::tuple<DIE *, const CompileUnit *, DeclContext *, PatchLocation>> ForwardDIEReferences; - FunctionIntervals::Allocator RangeAlloc; - - /// The ranges in that interval map are the PC ranges for - /// functions in this unit, associated with the PC offset to apply - /// to the addresses to get the linked address. - FunctionIntervals Ranges; + /// The ranges in that map are the PC ranges for functions in this unit, + /// associated with the PC offset to apply to the addresses to get + /// the linked address. + RangesTy Ranges; /// The DW_AT_low_pc of each DW_TAG_label. SmallDenseMap<uint64_t, uint64_t, 1> Labels; diff --git a/contrib/llvm-project/llvm/include/llvm/DWARFLinker/DWARFStreamer.h b/contrib/llvm-project/llvm/include/llvm/DWARFLinker/DWARFStreamer.h index 003fe548252a..0ccab0efa8f4 100644 --- a/contrib/llvm-project/llvm/include/llvm/DWARFLinker/DWARFStreamer.h +++ b/contrib/llvm-project/llvm/include/llvm/DWARFLinker/DWARFStreamer.h @@ -96,7 +96,7 @@ public: /// original \p Entries. void emitRangesEntries( int64_t UnitPcOffset, uint64_t OrigLowPc, - const FunctionIntervals::const_iterator &FuncRange, + Optional<std::pair<AddressRange, int64_t>> FuncRange, const std::vector<DWARFDebugRangeList::RangeListEntry> &Entries, unsigned AddressSize) override; diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/CodeViewRegisters.def b/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/CodeViewRegisters.def index 4cee3abdde87..5d537755b2d6 100644 --- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/CodeViewRegisters.def +++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/CodeViewRegisters.def @@ -523,10 +523,12 @@ CV_REGISTER(ARM_NQ15, 415) #if defined(CV_REGISTERS_ALL) || defined(CV_REGISTERS_ARM64) -// arm64intr.h from MSVC defines ARM64_FPSR, which conflicts with +// arm64intr.h from MSVC defines ARM64_FPSR and ARM64_FPCR, which conflicts with // these declarations. #pragma push_macro("ARM64_FPSR") +#pragma push_macro("ARM64_FPCR") #undef ARM64_FPSR +#undef ARM64_FPCR // ARM64 registers @@ -715,7 +717,79 @@ CV_REGISTER(ARM64_Q31, 211) // Floating point status register CV_REGISTER(ARM64_FPSR, 220) +CV_REGISTER(ARM64_FPCR, 221) + +// 8 bit floating point registers + +CV_REGISTER(ARM64_B0, 230) +CV_REGISTER(ARM64_B1, 231) +CV_REGISTER(ARM64_B2, 232) +CV_REGISTER(ARM64_B3, 233) +CV_REGISTER(ARM64_B4, 234) +CV_REGISTER(ARM64_B5, 235) +CV_REGISTER(ARM64_B6, 236) +CV_REGISTER(ARM64_B7, 237) +CV_REGISTER(ARM64_B8, 238) +CV_REGISTER(ARM64_B9, 239) +CV_REGISTER(ARM64_B10, 240) +CV_REGISTER(ARM64_B11, 241) +CV_REGISTER(ARM64_B12, 242) +CV_REGISTER(ARM64_B13, 243) +CV_REGISTER(ARM64_B14, 244) +CV_REGISTER(ARM64_B15, 245) +CV_REGISTER(ARM64_B16, 246) +CV_REGISTER(ARM64_B17, 247) +CV_REGISTER(ARM64_B18, 248) +CV_REGISTER(ARM64_B19, 249) +CV_REGISTER(ARM64_B20, 250) +CV_REGISTER(ARM64_B21, 251) +CV_REGISTER(ARM64_B22, 252) +CV_REGISTER(ARM64_B23, 253) +CV_REGISTER(ARM64_B24, 254) +CV_REGISTER(ARM64_B25, 255) +CV_REGISTER(ARM64_B26, 256) +CV_REGISTER(ARM64_B27, 257) +CV_REGISTER(ARM64_B28, 258) +CV_REGISTER(ARM64_B29, 259) +CV_REGISTER(ARM64_B30, 260) +CV_REGISTER(ARM64_B31, 261) + +// 16 bit floating point registers + +CV_REGISTER(ARM64_H0, 270) +CV_REGISTER(ARM64_H1, 271) +CV_REGISTER(ARM64_H2, 272) +CV_REGISTER(ARM64_H3, 273) +CV_REGISTER(ARM64_H4, 274) +CV_REGISTER(ARM64_H5, 275) +CV_REGISTER(ARM64_H6, 276) +CV_REGISTER(ARM64_H7, 277) +CV_REGISTER(ARM64_H8, 278) +CV_REGISTER(ARM64_H9, 279) +CV_REGISTER(ARM64_H10, 280) +CV_REGISTER(ARM64_H11, 281) +CV_REGISTER(ARM64_H12, 282) +CV_REGISTER(ARM64_H13, 283) +CV_REGISTER(ARM64_H14, 284) +CV_REGISTER(ARM64_H15, 285) +CV_REGISTER(ARM64_H16, 286) +CV_REGISTER(ARM64_H17, 287) +CV_REGISTER(ARM64_H18, 288) +CV_REGISTER(ARM64_H19, 289) +CV_REGISTER(ARM64_H20, 290) +CV_REGISTER(ARM64_H21, 291) +CV_REGISTER(ARM64_H22, 292) +CV_REGISTER(ARM64_H23, 293) +CV_REGISTER(ARM64_H24, 294) +CV_REGISTER(ARM64_H25, 295) +CV_REGISTER(ARM64_H26, 296) +CV_REGISTER(ARM64_H27, 297) +CV_REGISTER(ARM64_H28, 298) +CV_REGISTER(ARM64_H29, 299) +CV_REGISTER(ARM64_H30, 300) +CV_REGISTER(ARM64_H31, 301) #pragma pop_macro("ARM64_FPSR") +#pragma pop_macro("ARM64_FPCR") #endif // defined(CV_REGISTERS_ALL) || defined(CV_REGISTERS_ARM64) diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/Symbolize/Markup.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/Symbolize/Markup.h index 2628b47cf6d3..4f2b0de481ec 100644 --- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/Symbolize/Markup.h +++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/Symbolize/Markup.h @@ -84,6 +84,10 @@ public: /// \returns the next markup node or None if none remain. Optional<MarkupNode> nextNode(); + bool isSGR(const MarkupNode &Node) const { + return SGRSyntax.match(Node.Text); + } + private: Optional<MarkupNode> parseElement(StringRef Line); void parseTextOutsideMarkup(StringRef Text); diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h index b7d70ccafe66..26686143af95 100644 --- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h +++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h @@ -17,6 +17,9 @@ #include "Markup.h" +#include <map> + +#include "llvm/ADT/DenseMap.h" #include "llvm/Support/WithColor.h" #include "llvm/Support/raw_ostream.h" @@ -29,45 +32,106 @@ class MarkupFilter { public: MarkupFilter(raw_ostream &OS, Optional<bool> ColorsEnabled = llvm::None); - /// Begins a logical \p Line of markup. - /// - /// This must be called for each line of the input stream before calls to - /// filter() for elements of that line. The provided \p Line must be the same - /// one that was passed to parseLine() to produce the elements to be later - /// passed to filter(). + /// Filters a line containing symbolizer markup and writes the human-readable + /// results to the output stream. /// - /// This informs the filter that a new line is beginning and establishes a - /// context for error location reporting. - void beginLine(StringRef Line); + /// Invalid or unimplemented markup elements are removed. Some output may be + /// deferred until future filter() or finish() call. + void filter(StringRef Line); - /// Handle a \p Node of symbolizer markup. - /// - /// If the node is a recognized, valid markup element, it is replaced with a - /// human-readable string. If the node isn't an element or the element isn't - /// recognized, it is output verbatim. If the element is recognized but isn't - /// valid, it is omitted from the output. - void filter(const MarkupNode &Node); + /// Records that the input stream has ended and writes any deferred output. + void finish(); private: + struct Module { + uint64_t ID; + std::string Name; + SmallVector<uint8_t> BuildID; + }; + + struct MMap { + uint64_t Addr; + uint64_t Size; + const Module *Mod; + std::string Mode; // Lowercase + uint64_t ModuleRelativeAddr; + + bool contains(uint64_t Addr) const; + }; + + // An informational module line currently being constructed. As many mmap + // elements as possible are folded into one ModuleInfo line. + struct ModuleInfoLine { + const Module *Mod; + + SmallVector<const MMap *> MMaps = {}; + }; + + bool tryContextualElement(const MarkupNode &Node, + const SmallVector<MarkupNode> &DeferredNodes); + bool tryMMap(const MarkupNode &Element, + const SmallVector<MarkupNode> &DeferredNodes); + bool tryReset(const MarkupNode &Element, + const SmallVector<MarkupNode> &DeferredNodes); + bool tryModule(const MarkupNode &Element, + const SmallVector<MarkupNode> &DeferredNodes); + + void beginModuleInfoLine(const Module *M); + void endAnyModuleInfoLine(); + + void filterNode(const MarkupNode &Node); + + bool tryPresentation(const MarkupNode &Node); + bool trySymbol(const MarkupNode &Node); + bool trySGR(const MarkupNode &Node); void highlight(); + void highlightValue(); void restoreColor(); void resetColor(); + Optional<Module> parseModule(const MarkupNode &Element) const; + Optional<MMap> parseMMap(const MarkupNode &Element) const; + + Optional<uint64_t> parseAddr(StringRef Str) const; + Optional<uint64_t> parseModuleID(StringRef Str) const; + Optional<uint64_t> parseSize(StringRef Str) const; + Optional<SmallVector<uint8_t>> parseBuildID(StringRef Str) const; + Optional<std::string> parseMode(StringRef Str) const; + bool checkTag(const MarkupNode &Node) const; - bool checkNumFields(const MarkupNode &Node, size_t Size) const; + bool checkNumFields(const MarkupNode &Element, size_t Size) const; + bool checkNumFieldsAtLeast(const MarkupNode &Element, size_t Size) const; void reportTypeError(StringRef Str, StringRef TypeName) const; void reportLocation(StringRef::iterator Loc) const; + const MMap *overlappingMMap(const MMap &Map) const; + + StringRef lineEnding() const; + raw_ostream &OS; const bool ColorsEnabled; + MarkupParser Parser; + + // Current line being filtered. StringRef Line; + // A module info line currently being built. This incorporates as much mmap + // information as possible before being emitted. + Optional<ModuleInfoLine> MIL; + + // SGR state. Optional<raw_ostream::Colors> Color; bool Bold = false; + + // Map from Module ID to Module. + DenseMap<uint64_t, std::unique_ptr<Module>> Modules; + + // Ordered map from starting address to mmap. + std::map<uint64_t, MMap> MMaps; }; } // end namespace symbolize diff --git a/contrib/llvm-project/llvm/include/llvm/Debuginfod/HTTPServer.h b/contrib/llvm-project/llvm/include/llvm/Debuginfod/HTTPServer.h index 410ba32b3f2e..15e611ec546f 100644 --- a/contrib/llvm-project/llvm/include/llvm/Debuginfod/HTTPServer.h +++ b/contrib/llvm-project/llvm/include/llvm/Debuginfod/HTTPServer.h @@ -13,8 +13,8 @@ /// //===----------------------------------------------------------------------===// -#ifndef LLVM_SUPPORT_HTTP_SERVER_H -#define LLVM_SUPPORT_HTTP_SERVER_H +#ifndef LLVM_DEBUGINFOD_HTTPSERVER_H +#define LLVM_DEBUGINFOD_HTTPSERVER_H #include "llvm/ADT/StringRef.h" #include "llvm/Support/Error.h" @@ -120,4 +120,4 @@ public: }; } // end namespace llvm -#endif // LLVM_SUPPORT_HTTP_SERVER_H +#endif // LLVM_DEBUGINFOD_HTTPSERVER_H diff --git a/contrib/llvm-project/llvm/include/llvm/Demangle/ItaniumDemangle.h b/contrib/llvm-project/llvm/include/llvm/Demangle/ItaniumDemangle.h index 959632f13e1e..6d4f6222af44 100644 --- a/contrib/llvm-project/llvm/include/llvm/Demangle/ItaniumDemangle.h +++ b/contrib/llvm-project/llvm/include/llvm/Demangle/ItaniumDemangle.h @@ -13,8 +13,8 @@ // //===----------------------------------------------------------------------===// -#ifndef DEMANGLE_ITANIUMDEMANGLE_H -#define DEMANGLE_ITANIUMDEMANGLE_H +#ifndef LLVM_DEMANGLE_ITANIUMDEMANGLE_H +#define LLVM_DEMANGLE_ITANIUMDEMANGLE_H #include "DemangleConfig.h" #include "StringView.h" @@ -5477,4 +5477,4 @@ struct ManglingParser : AbstractManglingParser<ManglingParser<Alloc>, Alloc> { DEMANGLE_NAMESPACE_END -#endif // DEMANGLE_ITANIUMDEMANGLE_H +#endif // LLVM_DEMANGLE_ITANIUMDEMANGLE_H diff --git a/contrib/llvm-project/llvm/include/llvm/Demangle/StringView.h b/contrib/llvm-project/llvm/include/llvm/Demangle/StringView.h index 6bbb8837fed1..30580af282fb 100644 --- a/contrib/llvm-project/llvm/include/llvm/Demangle/StringView.h +++ b/contrib/llvm-project/llvm/include/llvm/Demangle/StringView.h @@ -13,8 +13,8 @@ // //===----------------------------------------------------------------------===// -#ifndef DEMANGLE_STRINGVIEW_H -#define DEMANGLE_STRINGVIEW_H +#ifndef LLVM_DEMANGLE_STRINGVIEW_H +#define LLVM_DEMANGLE_STRINGVIEW_H #include "DemangleConfig.h" #include <cassert> diff --git a/contrib/llvm-project/llvm/include/llvm/Demangle/Utility.h b/contrib/llvm-project/llvm/include/llvm/Demangle/Utility.h index ca7e44b948c7..691c34067d7f 100644 --- a/contrib/llvm-project/llvm/include/llvm/Demangle/Utility.h +++ b/contrib/llvm-project/llvm/include/llvm/Demangle/Utility.h @@ -13,8 +13,8 @@ // //===----------------------------------------------------------------------===// -#ifndef DEMANGLE_UTILITY_H -#define DEMANGLE_UTILITY_H +#ifndef LLVM_DEMANGLE_UTILITY_H +#define LLVM_DEMANGLE_UTILITY_H #include "StringView.h" #include <array> diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/MapperJITLinkMemoryManager.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/MapperJITLinkMemoryManager.h new file mode 100644 index 000000000000..37d75bfff546 --- /dev/null +++ b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/MapperJITLinkMemoryManager.h @@ -0,0 +1,56 @@ +//===--------------- MapperJITLinkMemoryManager.h -*- C++ -*---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Implements JITLinkMemoryManager using MemoryMapper +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_ORC_MAPPERJITLINKMEMORYMANAGER_H +#define LLVM_EXECUTIONENGINE_ORC_MAPPERJITLINKMEMORYMANAGER_H + +#include "llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h" +#include "llvm/ExecutionEngine/Orc/Core.h" +#include "llvm/ExecutionEngine/Orc/MemoryMapper.h" + +namespace llvm { +namespace orc { + +class MapperJITLinkMemoryManager : public jitlink::JITLinkMemoryManager { +public: + MapperJITLinkMemoryManager(std::unique_ptr<MemoryMapper> Mapper); + + template <class MemoryMapperType, class... Args> + static Expected<std::unique_ptr<MapperJITLinkMemoryManager>> + CreateWithMapper(Args &&...A) { + auto Mapper = MemoryMapperType::Create(std::forward<Args>(A)...); + if (!Mapper) + return Mapper.takeError(); + + return std::make_unique<MapperJITLinkMemoryManager>(std::move(*Mapper)); + } + + void allocate(const jitlink::JITLinkDylib *JD, jitlink::LinkGraph &G, + OnAllocatedFunction OnAllocated) override; + // synchronous overload + using JITLinkMemoryManager::allocate; + + void deallocate(std::vector<FinalizedAlloc> Allocs, + OnDeallocatedFunction OnDeallocated) override; + // synchronous overload + using JITLinkMemoryManager::deallocate; + +private: + class InFlightAlloc; + + std::unique_ptr<MemoryMapper> Mapper; +}; + +} // end namespace orc +} // end namespace llvm + +#endif // LLVM_EXECUTIONENGINE_ORC_MAPPERJITLINKMEMORYMANAGER_H diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/MemoryMapper.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/MemoryMapper.h index d023bfbdb5b6..0b4cda119cad 100644 --- a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/MemoryMapper.h +++ b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/MemoryMapper.h @@ -14,6 +14,7 @@ #define LLVM_EXECUTIONENGINE_ORC_MEMORYMAPPER_H #include "llvm/ExecutionEngine/Orc/Core.h" +#include "llvm/Support/Process.h" #include <mutex> @@ -41,6 +42,9 @@ public: using OnReservedFunction = unique_function<void(Expected<ExecutorAddrRange>)>; + // Page size of the target process + virtual unsigned int getPageSize() = 0; + /// Reserves address space in executor process virtual void reserve(size_t NumBytes, OnReservedFunction OnReserved) = 0; @@ -76,7 +80,11 @@ public: class InProcessMemoryMapper final : public MemoryMapper { public: - InProcessMemoryMapper() {} + InProcessMemoryMapper(size_t PageSize); + + static Expected<std::unique_ptr<InProcessMemoryMapper>> Create(); + + unsigned int getPageSize() override { return PageSize; } void reserve(size_t NumBytes, OnReservedFunction OnReserved) override; @@ -107,6 +115,56 @@ private: std::mutex Mutex; ReservationMap Reservations; AllocationMap Allocations; + + size_t PageSize; +}; + +class SharedMemoryMapper final : public MemoryMapper { +public: + struct SymbolAddrs { + ExecutorAddr Instance; + ExecutorAddr Reserve; + ExecutorAddr Initialize; + ExecutorAddr Deinitialize; + ExecutorAddr Release; + }; + + SharedMemoryMapper(ExecutorProcessControl &EPC, SymbolAddrs SAs, + size_t PageSize); + + static Expected<std::unique_ptr<SharedMemoryMapper>> + Create(ExecutorProcessControl &EPC, SymbolAddrs SAs); + + unsigned int getPageSize() override { return PageSize; } + + void reserve(size_t NumBytes, OnReservedFunction OnReserved) override; + + char *prepare(ExecutorAddr Addr, size_t ContentSize) override; + + void initialize(AllocInfo &AI, OnInitializedFunction OnInitialized) override; + + void deinitialize(ArrayRef<ExecutorAddr> Allocations, + OnDeinitializedFunction OnDeInitialized) override; + + void release(ArrayRef<ExecutorAddr> Reservations, + OnReleasedFunction OnRelease) override; + + ~SharedMemoryMapper() override; + +private: + struct Reservation { + void *LocalAddr; + size_t Size; + }; + + ExecutorProcessControl &EPC; + SymbolAddrs SAs; + + std::mutex Mutex; + + std::map<ExecutorAddr, Reservation> Reservations; + + size_t PageSize; }; } // namespace orc diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h index 96166ac20b2e..2aedf1e44ad8 100644 --- a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h +++ b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h @@ -31,6 +31,12 @@ extern const char *SimpleExecutorMemoryManagerReserveWrapperName; extern const char *SimpleExecutorMemoryManagerFinalizeWrapperName; extern const char *SimpleExecutorMemoryManagerDeallocateWrapperName; +extern const char *ExecutorSharedMemoryMapperServiceInstanceName; +extern const char *ExecutorSharedMemoryMapperServiceReserveWrapperName; +extern const char *ExecutorSharedMemoryMapperServiceInitializeWrapperName; +extern const char *ExecutorSharedMemoryMapperServiceDeinitializeWrapperName; +extern const char *ExecutorSharedMemoryMapperServiceReleaseWrapperName; + extern const char *MemoryWriteUInt8sWrapperName; extern const char *MemoryWriteUInt16sWrapperName; extern const char *MemoryWriteUInt32sWrapperName; @@ -58,6 +64,21 @@ using SPSSimpleExecutorMemoryManagerFinalizeSignature = using SPSSimpleExecutorMemoryManagerDeallocateSignature = shared::SPSError( shared::SPSExecutorAddr, shared::SPSSequence<shared::SPSExecutorAddr>); +// ExecutorSharedMemoryMapperService +using SPSExecutorSharedMemoryMapperServiceReserveSignature = + shared::SPSExpected< + shared::SPSTuple<shared::SPSExecutorAddr, shared::SPSString>>( + shared::SPSExecutorAddr, uint64_t); +using SPSExecutorSharedMemoryMapperServiceInitializeSignature = + shared::SPSExpected<shared::SPSExecutorAddr>( + shared::SPSExecutorAddr, shared::SPSExecutorAddr, + shared::SPSSharedMemoryFinalizeRequest); +using SPSExecutorSharedMemoryMapperServiceDeinitializeSignature = + shared::SPSError(shared::SPSExecutorAddr, + shared::SPSSequence<shared::SPSExecutorAddr>); +using SPSExecutorSharedMemoryMapperServiceReleaseSignature = shared::SPSError( + shared::SPSExecutorAddr, shared::SPSSequence<shared::SPSExecutorAddr>); + using SPSRunAsMainSignature = int64_t(shared::SPSExecutorAddr, shared::SPSSequence<shared::SPSString>); diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/Shared/SimplePackedSerialization.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/Shared/SimplePackedSerialization.h index 9be58e9f0fa9..c38825948208 100644 --- a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/Shared/SimplePackedSerialization.h +++ b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/Shared/SimplePackedSerialization.h @@ -56,6 +56,7 @@ public: SPSOutputBuffer(char *Buffer, size_t Remaining) : Buffer(Buffer), Remaining(Remaining) {} bool write(const char *Data, size_t Size) { + assert(Data && "Data must not be null"); if (Size > Remaining) return false; memcpy(Buffer, Data, Size); @@ -349,6 +350,8 @@ public: static bool serialize(SPSOutputBuffer &OB, const ArrayRef<char> &A) { if (!SPSArgList<uint64_t>::serialize(OB, static_cast<uint64_t>(A.size()))) return false; + if (A.empty()) // Empty ArrayRef may have null data, so bail out early. + return true; return OB.write(A.data(), A.size()); } @@ -358,7 +361,7 @@ public: return false; if (Size > std::numeric_limits<size_t>::max()) return false; - A = {IB.data(), static_cast<size_t>(Size)}; + A = {Size ? IB.data() : nullptr, static_cast<size_t>(Size)}; return IB.skip(Size); } }; @@ -476,6 +479,8 @@ public: static bool serialize(SPSOutputBuffer &OB, StringRef S) { if (!SPSArgList<uint64_t>::serialize(OB, static_cast<uint64_t>(S.size()))) return false; + if (S.empty()) // Empty StringRef may have null data, so bail out early. + return true; return OB.write(S.data(), S.size()); } @@ -487,7 +492,7 @@ public: Data = IB.data(); if (!IB.skip(Size)) return false; - S = StringRef(Data, Size); + S = StringRef(Size ? Data : nullptr, Size); return true; } }; diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h index d596a89a50b6..502c7c1f7069 100644 --- a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h +++ b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h @@ -82,6 +82,17 @@ struct FinalizeRequest { shared::AllocActions Actions; }; +struct SharedMemorySegFinalizeRequest { + WireProtectionFlags Prot; + ExecutorAddr Addr; + uint64_t Size; +}; + +struct SharedMemoryFinalizeRequest { + std::vector<SharedMemorySegFinalizeRequest> Segments; + shared::AllocActions Actions; +}; + template <typename T> struct UIntWrite { UIntWrite() = default; UIntWrite(ExecutorAddr Addr, T Value) : Addr(Addr), Value(Value) {} @@ -131,6 +142,13 @@ using SPSSegFinalizeRequest = using SPSFinalizeRequest = SPSTuple<SPSSequence<SPSSegFinalizeRequest>, SPSSequence<SPSAllocActionCallPair>>; +using SPSSharedMemorySegFinalizeRequest = + SPSTuple<SPSMemoryProtectionFlags, SPSExecutorAddr, uint64_t>; + +using SPSSharedMemoryFinalizeRequest = + SPSTuple<SPSSequence<SPSSharedMemorySegFinalizeRequest>, + SPSSequence<SPSAllocActionCallPair>>; + template <typename T> using SPSMemoryAccessUIntWrite = SPSTuple<SPSExecutorAddr, T>; @@ -204,6 +222,48 @@ public: } }; +template <> +class SPSSerializationTraits<SPSSharedMemorySegFinalizeRequest, + tpctypes::SharedMemorySegFinalizeRequest> { + using SFRAL = SPSSharedMemorySegFinalizeRequest::AsArgList; + +public: + static size_t size(const tpctypes::SharedMemorySegFinalizeRequest &SFR) { + return SFRAL::size(SFR.Prot, SFR.Addr, SFR.Size); + } + + static bool serialize(SPSOutputBuffer &OB, + const tpctypes::SharedMemorySegFinalizeRequest &SFR) { + return SFRAL::serialize(OB, SFR.Prot, SFR.Addr, SFR.Size); + } + + static bool deserialize(SPSInputBuffer &IB, + tpctypes::SharedMemorySegFinalizeRequest &SFR) { + return SFRAL::deserialize(IB, SFR.Prot, SFR.Addr, SFR.Size); + } +}; + +template <> +class SPSSerializationTraits<SPSSharedMemoryFinalizeRequest, + tpctypes::SharedMemoryFinalizeRequest> { + using FRAL = SPSSharedMemoryFinalizeRequest::AsArgList; + +public: + static size_t size(const tpctypes::SharedMemoryFinalizeRequest &FR) { + return FRAL::size(FR.Segments, FR.Actions); + } + + static bool serialize(SPSOutputBuffer &OB, + const tpctypes::SharedMemoryFinalizeRequest &FR) { + return FRAL::serialize(OB, FR.Segments, FR.Actions); + } + + static bool deserialize(SPSInputBuffer &IB, + tpctypes::SharedMemoryFinalizeRequest &FR) { + return FRAL::deserialize(IB, FR.Segments, FR.Actions); + } +}; + template <typename T> class SPSSerializationTraits<SPSMemoryAccessUIntWrite<T>, tpctypes::UIntWrite<T>> { @@ -244,7 +304,6 @@ public: } }; - } // end namespace shared } // end namespace orc } // end namespace llvm diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.h new file mode 100644 index 000000000000..69d8cf5d2980 --- /dev/null +++ b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.h @@ -0,0 +1,78 @@ +//===----------- ExecutorSharedMemoryMapperService.h ------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_EXECUTORSHAREDMEMORYMAPPERSERVICE +#define LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_EXECUTORSHAREDMEMORYMAPPERSERVICE + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h" +#include "llvm/ExecutionEngine/Orc/TargetProcess/ExecutorBootstrapService.h" + +#include <atomic> +#include <mutex> + +#if defined(_WIN32) +#include <windows.h> +#endif + +namespace llvm { +namespace orc { +namespace rt_bootstrap { + +class ExecutorSharedMemoryMapperService final + : public ExecutorBootstrapService { +public: + ~ExecutorSharedMemoryMapperService(){}; + + Expected<std::pair<ExecutorAddr, std::string>> reserve(uint64_t Size); + Expected<ExecutorAddr> initialize(ExecutorAddr Reservation, + tpctypes::SharedMemoryFinalizeRequest &FR); + + Error deinitialize(const std::vector<ExecutorAddr> &Bases); + Error release(const std::vector<ExecutorAddr> &Bases); + + Error shutdown() override; + void addBootstrapSymbols(StringMap<ExecutorAddr> &M) override; + +private: + struct Allocation { + std::vector<shared::WrapperFunctionCall> DeinitializationActions; + }; + using AllocationMap = DenseMap<ExecutorAddr, Allocation>; + + struct Reservation { + size_t Size; + std::vector<ExecutorAddr> Allocations; +#if defined(_WIN32) + HANDLE SharedMemoryFile; +#endif + }; + using ReservationMap = DenseMap<void *, Reservation>; + + static llvm::orc::shared::CWrapperFunctionResult + reserveWrapper(const char *ArgData, size_t ArgSize); + + static llvm::orc::shared::CWrapperFunctionResult + initializeWrapper(const char *ArgData, size_t ArgSize); + + static llvm::orc::shared::CWrapperFunctionResult + deinitializeWrapper(const char *ArgData, size_t ArgSize); + + static llvm::orc::shared::CWrapperFunctionResult + releaseWrapper(const char *ArgData, size_t ArgSize); + + std::atomic<int> SharedMemoryCount{0}; + std::mutex Mutex; + ReservationMap Reservations; + AllocationMap Allocations; +}; + +} // namespace rt_bootstrap +} // namespace orc +} // namespace llvm +#endif // LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_EXECUTORSHAREDMEMORYMAPPERSERVICE diff --git a/contrib/llvm-project/llvm/include/llvm/Frontend/Directive/DirectiveBase.td b/contrib/llvm-project/llvm/include/llvm/Frontend/Directive/DirectiveBase.td index e40f40f74c73..4269a966a988 100644 --- a/contrib/llvm-project/llvm/include/llvm/Frontend/Directive/DirectiveBase.td +++ b/contrib/llvm-project/llvm/include/llvm/Frontend/Directive/DirectiveBase.td @@ -74,6 +74,9 @@ class Clause<string c> { // Define an alternative name return in get<LanguageName>ClauseName function. string alternativeName = ""; + // Define aliases used in the parser. + list<string> aliases = []; + // Optional class holding value of the clause in clang AST. string clangClass = ""; @@ -88,6 +91,7 @@ class Clause<string c> { // List of allowed clause values list<ClauseVal> allowedClauseValues = []; + // If set to true, value class is part of a list. Single class by default. bit isValueList = false; @@ -101,6 +105,14 @@ class Clause<string c> { // Set clause used by default when unknown. Function returning the kind // of enumeration will use this clause as the default. bit isDefault = false; + + // Prefix before the actual value. Used in the parser generation. + // `clause(prefix: value)` + string prefix = ""; + + // Set the prefix as optional. + // `clause([prefix]: value)` + bit isPrefixOptional = true; } // Hold information about clause validity by version. diff --git a/contrib/llvm-project/llvm/include/llvm/Frontend/OpenACC/ACC.td b/contrib/llvm-project/llvm/include/llvm/Frontend/OpenACC/ACC.td index 45d815894454..e5f0632f59f5 100644 --- a/contrib/llvm-project/llvm/include/llvm/Frontend/OpenACC/ACC.td +++ b/contrib/llvm-project/llvm/include/llvm/Frontend/OpenACC/ACC.td @@ -62,20 +62,24 @@ def ACCC_Collapse : Clause<"collapse"> { // 2.7.6 def ACCC_Copy : Clause<"copy"> { let flangClass = "AccObjectList"; + let aliases = ["present_or_copy", "pcopy"]; } // 2.7.7 def ACCC_Copyin : Clause<"copyin"> { let flangClass = "AccObjectListWithModifier"; + let aliases = ["present_or_copyin", "pcopyin"]; } // 2.7.8 def ACCC_Copyout : Clause<"copyout"> { let flangClass = "AccObjectListWithModifier"; + let aliases = ["present_or_copyout", "pcopyout"]; } // 2.7.9 def ACCC_Create : Clause<"create"> { let flangClass = "AccObjectListWithModifier"; + let aliases = ["present_or_create", "pcreate"]; } // 2.5.15 @@ -130,6 +134,7 @@ def ACCC_DeviceResident : Clause<"device_resident"> { def ACCC_DeviceType : Clause<"device_type"> { let flangClass = "AccDeviceTypeExprList"; let defaultValue = "*"; + let aliases = ["dtype"]; } // 2.6.6 @@ -226,6 +231,7 @@ def ACCC_Seq : Clause<"seq"> {} def ACCC_Vector : Clause<"vector"> { let flangClass = "ScalarIntExpr"; let isValueOptional = true; + let prefix = "length"; } // 2.5.11 @@ -243,6 +249,7 @@ def ACCC_Wait : Clause<"wait"> { def ACCC_Worker: Clause<"worker"> { let flangClass = "ScalarIntExpr"; let isValueOptional = true; + let prefix = "num"; } // 2.12 diff --git a/contrib/llvm-project/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/contrib/llvm-project/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index 3dfcabffb58a..e4f2fcc649fc 100644 --- a/contrib/llvm-project/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/contrib/llvm-project/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -630,6 +630,15 @@ public: InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, bool Tied = true, Value *Final = nullptr); + /// Generator for the taskgroup construct + /// + /// \param Loc The location where the taskgroup construct was encountered. + /// \param AllocaIP The insertion point to be used for alloca instructions. + /// \param BodyGenCB Callback that will generate the region code. + InsertPointTy createTaskgroup(const LocationDescription &Loc, + InsertPointTy AllocaIP, + BodyGenCallbackTy BodyGenCB); + /// Functions used to generate reductions. Such functions take two Values /// representing LHS and RHS of the reduction, respectively, and a reference /// to the value that is updated to refer to the reduction result. diff --git a/contrib/llvm-project/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/contrib/llvm-project/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index 9d1ab57729b7..7a70af9ce792 100644 --- a/contrib/llvm-project/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/contrib/llvm-project/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -37,6 +37,7 @@ __OMP_TYPE(Int8Ptr) __OMP_TYPE(Int16Ptr) __OMP_TYPE(Int32Ptr) __OMP_TYPE(Int64Ptr) +__OMP_TYPE(Double) OMP_TYPE(SizeTy, M.getDataLayout().getIntPtrType(Ctx)) @@ -236,6 +237,7 @@ __OMP_RTL(omp_get_place_proc_ids, false, Void, Int32, Int32Ptr) __OMP_RTL(omp_get_place_num, false, Int32, ) __OMP_RTL(omp_get_partition_num_places, false, Int32, ) __OMP_RTL(omp_get_partition_place_nums, false, Void, Int32Ptr) +__OMP_RTL(omp_get_wtime, false, Double,) __OMP_RTL(omp_set_num_threads, false, Void, Int32) __OMP_RTL(omp_set_dynamic, false, Void, Int32) @@ -681,6 +683,7 @@ __OMP_RTL_ATTRS(omp_get_partition_num_places, GetterAttrs, AttributeSet(), ParamAttrs()) __OMP_RTL_ATTRS(omp_get_partition_place_nums, GetterAttrs, AttributeSet(), ParamAttrs()) +__OMP_RTL_ATTRS(omp_get_wtime, GetterAttrs, AttributeSet(), ParamAttrs()) __OMP_RTL_ATTRS(omp_set_num_threads, SetterAttrs, AttributeSet(), ParamAttrs()) __OMP_RTL_ATTRS(omp_set_dynamic, SetterAttrs, AttributeSet(), ParamAttrs()) @@ -919,7 +922,7 @@ __OMP_RTL_ATTRS(__kmpc_doacross_fini, BarrierAttrs, AttributeSet(), __OMP_RTL_ATTRS(__kmpc_alloc_shared, AttributeSet( EnumAttr(NoUnwind), - EnumAttr(NoSync), + EnumAttr(NoSync), AllocSizeAttr(0, None)), ReturnPtrAttrs, ParamAttrs()) __OMP_RTL_ATTRS(__kmpc_free_shared, DeviceAllocAttrs, AttributeSet(), ParamAttrs(NoCaptureAttrs)) diff --git a/contrib/llvm-project/llvm/include/llvm/IR/FixedMetadataKinds.def b/contrib/llvm-project/llvm/include/llvm/IR/FixedMetadataKinds.def index 1d24f527df7b..c7cb59b13050 100644 --- a/contrib/llvm-project/llvm/include/llvm/IR/FixedMetadataKinds.def +++ b/contrib/llvm-project/llvm/include/llvm/IR/FixedMetadataKinds.def @@ -45,3 +45,5 @@ LLVM_FIXED_MD_KIND(MD_annotation, "annotation", 30) LLVM_FIXED_MD_KIND(MD_nosanitize, "nosanitize", 31) LLVM_FIXED_MD_KIND(MD_func_sanitize, "func_sanitize", 32) LLVM_FIXED_MD_KIND(MD_exclude, "exclude", 33) +LLVM_FIXED_MD_KIND(MD_memprof, "memprof", 34) +LLVM_FIXED_MD_KIND(MD_callsite, "callsite", 35) diff --git a/contrib/llvm-project/llvm/include/llvm/IR/GlobalIFunc.h b/contrib/llvm-project/llvm/include/llvm/IR/GlobalIFunc.h index 976772b343fd..c148ee790778 100644 --- a/contrib/llvm-project/llvm/include/llvm/IR/GlobalIFunc.h +++ b/contrib/llvm-project/llvm/include/llvm/IR/GlobalIFunc.h @@ -93,6 +93,12 @@ public: static bool classof(const Value *V) { return V->getValueID() == Value::GlobalIFuncVal; } + + // Apply specific operation to all resolver-related values. If resolver target + // is already a global object, then apply the operation to it directly. If + // target is a GlobalExpr or a GlobalAlias, evaluate it to its base object and + // apply the operation for the base object and all aliases along the path. + void applyAlongResolverPath(function_ref<void(const GlobalValue &)> Op) const; }; template <> diff --git a/contrib/llvm-project/llvm/include/llvm/IR/IRBuilder.h b/contrib/llvm-project/llvm/include/llvm/IR/IRBuilder.h index cec26e966b5c..0d3ffba955a3 100644 --- a/contrib/llvm-project/llvm/include/llvm/IR/IRBuilder.h +++ b/contrib/llvm-project/llvm/include/llvm/IR/IRBuilder.h @@ -974,7 +974,7 @@ public: /// This is a convenience function for code that uses aggregate return values /// as a vehicle for having multiple return values. ReturnInst *CreateAggregateRet(Value *const *retVals, unsigned N) { - Value *V = UndefValue::get(getCurrentFunctionReturnType()); + Value *V = PoisonValue::get(getCurrentFunctionReturnType()); for (unsigned i = 0; i != N; ++i) V = CreateInsertValue(V, retVals[i], i, "mrv"); return Insert(ReturnInst::Create(Context, V)); diff --git a/contrib/llvm-project/llvm/include/llvm/IR/InlineAsm.h b/contrib/llvm-project/llvm/include/llvm/IR/InlineAsm.h index 032a70efdceb..0a8d27aad58a 100644 --- a/contrib/llvm-project/llvm/include/llvm/IR/InlineAsm.h +++ b/contrib/llvm-project/llvm/include/llvm/IR/InlineAsm.h @@ -92,7 +92,8 @@ public: enum ConstraintPrefix { isInput, // 'x' isOutput, // '=x' - isClobber // '~x' + isClobber, // '~x' + isLabel, // '!x' }; using ConstraintCodeVector = std::vector<std::string>; @@ -117,7 +118,7 @@ public: using ConstraintInfoVector = std::vector<ConstraintInfo>; struct ConstraintInfo { - /// Type - The basic type of the constraint: input/output/clobber + /// Type - The basic type of the constraint: input/output/clobber/label /// ConstraintPrefix Type = isInput; diff --git a/contrib/llvm-project/llvm/include/llvm/IR/Instruction.h b/contrib/llvm-project/llvm/include/llvm/IR/Instruction.h index 8d0a8363cdfb..15b0bdf557fb 100644 --- a/contrib/llvm-project/llvm/include/llvm/IR/Instruction.h +++ b/contrib/llvm-project/llvm/include/llvm/IR/Instruction.h @@ -172,10 +172,6 @@ public: /// its operands. bool isOnlyUserOfAnyOperand(); - bool isIndirectTerminator() const { - return isIndirectTerminator(getOpcode()); - } - static const char* getOpcodeName(unsigned OpCode); static inline bool isTerminator(unsigned OpCode) { @@ -242,17 +238,6 @@ public: } } - /// Returns true if the OpCode is a terminator with indirect targets. - static inline bool isIndirectTerminator(unsigned OpCode) { - switch (OpCode) { - case Instruction::IndirectBr: - case Instruction::CallBr: - return true; - default: - return false; - } - } - //===--------------------------------------------------------------------===// // Metadata manipulation. //===--------------------------------------------------------------------===// diff --git a/contrib/llvm-project/llvm/include/llvm/IR/Instructions.h b/contrib/llvm-project/llvm/include/llvm/IR/Instructions.h index a14bc39cea65..083fed5de4a3 100644 --- a/contrib/llvm-project/llvm/include/llvm/IR/Instructions.h +++ b/contrib/llvm-project/llvm/include/llvm/IR/Instructions.h @@ -44,6 +44,7 @@ namespace llvm { class APFloat; class APInt; class BasicBlock; +class BlockAddress; class ConstantInt; class DataLayout; class StringRef; @@ -4004,9 +4005,6 @@ class CallBrInst : public CallBase { ArrayRef<BasicBlock *> IndirectDests, ArrayRef<Value *> Args, ArrayRef<OperandBundleDef> Bundles, const Twine &NameStr); - /// Should the Indirect Destinations change, scan + update the Arg list. - void updateArgBlockAddresses(unsigned i, BasicBlock *B); - /// Compute the number of operands to allocate. static int ComputeNumOperands(int NumArgs, int NumIndirectDests, int NumBundleInputs = 0) { @@ -4154,7 +4152,6 @@ public: *(&Op<-1>() - getNumIndirectDests() - 1) = reinterpret_cast<Value *>(B); } void setIndirectDest(unsigned i, BasicBlock *B) { - updateArgBlockAddresses(i, B); *(&Op<-1>() - getNumIndirectDests() + i) = reinterpret_cast<Value *>(B); } @@ -4172,6 +4169,8 @@ public: unsigned getNumSuccessors() const { return getNumIndirectDests() + 1; } + BlockAddress *getBlockAddressForIndirectDest(unsigned DestNo) const; + // Methods for support type inquiry through isa, cast, and dyn_cast: static bool classof(const Instruction *I) { return (I->getOpcode() == Instruction::CallBr); diff --git a/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicInst.h b/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicInst.h index 06d2335821d3..fc9111a4f512 100644 --- a/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicInst.h +++ b/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicInst.h @@ -1381,7 +1381,7 @@ public: } /// The statepoint with which this gc.relocate is associated. - const GCStatepointInst *getStatepoint() const; + const Value *getStatepoint() const; }; /// Represents calls to the gc.relocate intrinsic. diff --git a/contrib/llvm-project/llvm/include/llvm/IR/Intrinsics.td b/contrib/llvm-project/llvm/include/llvm/IR/Intrinsics.td index 8bf8e9ca76ad..c523e3773de4 100644 --- a/contrib/llvm-project/llvm/include/llvm/IR/Intrinsics.td +++ b/contrib/llvm-project/llvm/include/llvm/IR/Intrinsics.td @@ -495,7 +495,7 @@ def int_objc_arc_annotation_bottomup_bbend : Intrinsic<[], // Returns the location of the Swift asynchronous context (usually stored just // before the frame pointer), and triggers the creation of a null context if it // would otherwise be unneeded. -def int_swift_async_context_addr : Intrinsic<[llvm_ptrptr_ty], [], [IntrNoMem]>; +def int_swift_async_context_addr : Intrinsic<[llvm_ptrptr_ty], [], []>; //===--------------------- Code Generator Intrinsics ----------------------===// // @@ -619,6 +619,7 @@ def int_memcpy : Intrinsic<[], [llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty, llvm_i1_ty], [IntrArgMemOnly, IntrWillReturn, IntrNoFree, + IntrNoCallback, NoCapture<ArgIndex<0>>, NoCapture<ArgIndex<1>>, NoAlias<ArgIndex<0>>, NoAlias<ArgIndex<1>>, WriteOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>, @@ -631,7 +632,7 @@ def int_memcpy : Intrinsic<[], def int_memcpy_inline : Intrinsic<[], [llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty, llvm_i1_ty], - [IntrArgMemOnly, IntrWillReturn, IntrNoFree, + [IntrArgMemOnly, IntrWillReturn, IntrNoFree, IntrNoCallback, NoCapture<ArgIndex<0>>, NoCapture<ArgIndex<1>>, NoAlias<ArgIndex<0>>, NoAlias<ArgIndex<1>>, WriteOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>, @@ -641,6 +642,7 @@ def int_memmove : Intrinsic<[], [llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty, llvm_i1_ty], [IntrArgMemOnly, IntrWillReturn, IntrNoFree, + IntrNoCallback, NoCapture<ArgIndex<0>>, NoCapture<ArgIndex<1>>, WriteOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>, ImmArg<ArgIndex<3>>]>; @@ -648,7 +650,7 @@ def int_memset : Intrinsic<[], [llvm_anyptr_ty, llvm_i8_ty, llvm_anyint_ty, llvm_i1_ty], [IntrWriteMem, IntrArgMemOnly, IntrWillReturn, - IntrNoFree, + IntrNoFree, IntrNoCallback, NoCapture<ArgIndex<0>>, WriteOnly<ArgIndex<0>>, ImmArg<ArgIndex<3>>]>; @@ -659,7 +661,7 @@ def int_memset : Intrinsic<[], def int_memset_inline : Intrinsic<[], [llvm_anyptr_ty, llvm_i8_ty, llvm_anyint_ty, llvm_i1_ty], - [IntrWriteMem, IntrArgMemOnly, IntrWillReturn, IntrNoFree, + [IntrWriteMem, IntrArgMemOnly, IntrWillReturn, IntrNoFree, IntrNoCallback, NoCapture<ArgIndex<0>>, WriteOnly<ArgIndex<0>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>]>; @@ -1963,11 +1965,11 @@ def int_vscale : DefaultAttrsIntrinsic<[llvm_anyint_ty], [], [IntrNoMem]>; //===---------- Intrinsics to perform subvector insertion/extraction ------===// def int_vector_insert : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_anyvector_ty, llvm_i64_ty], - [IntrNoMem, ImmArg<ArgIndex<2>>]>; + [IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<2>>]>; def int_vector_extract : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty, llvm_i64_ty], - [IntrNoMem, ImmArg<ArgIndex<1>>]>; + [IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<1>>]>; //===----------------- Pointer Authentication Intrinsics ------------------===// // diff --git a/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index e81224d9b890..93925a84c8e8 100644 --- a/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -167,6 +167,10 @@ def int_amdgcn_dispatch_id : ClangBuiltin<"__builtin_amdgcn_dispatch_id">, Intrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrSpeculatable, IntrWillReturn]>; +// For internal use. Coordinates LDS lowering between IR transform and backend. +def int_amdgcn_lds_kernel_id : + Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, IntrWillReturn]>; + def int_amdgcn_implicit_buffer_ptr : ClangBuiltin<"__builtin_amdgcn_implicit_buffer_ptr">, Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 4>], [], @@ -2304,6 +2308,17 @@ def int_amdgcn_mfma_i32_32x32x16_i8 : AMDGPUMfmaIntrinsic<llvm_v16i32_ty, ll def int_amdgcn_mfma_f32_16x16x8_xf32 : AMDGPUMfmaIntrinsic<llvm_v4f32_ty, llvm_v2f32_ty>; def int_amdgcn_mfma_f32_32x32x4_xf32 : AMDGPUMfmaIntrinsic<llvm_v16f32_ty, llvm_v2f32_ty>; +class AMDGPUMFp8MfmaIntrinsic<LLVMType DestTy> : + AMDGPUMfmaIntrinsic<DestTy, llvm_i64_ty>; + +multiclass AMDGPUMFp8MfmaIntrinsic<LLVMType DestTy> { + foreach kind = ["bf8_bf8", "bf8_fp8", "fp8_bf8", "fp8_fp8"] in + def NAME#"_"#kind : AMDGPUMFp8MfmaIntrinsic<DestTy>; +} + +defm int_amdgcn_mfma_f32_16x16x32 : AMDGPUMFp8MfmaIntrinsic<llvm_v4f32_ty>; +defm int_amdgcn_mfma_f32_32x32x16 : AMDGPUMFp8MfmaIntrinsic<llvm_v16f32_ty>; + // llvm.amdgcn.smfmac.?32.* vdst, srcA, srcB, srcC, index, cbsz, abid class AMDGPUMSmfmacIntrinsic<LLVMType DestTy, LLVMType SrcA, LLVMType SrcB> : ClangBuiltin<!subst("int", "__builtin", NAME)>, @@ -2320,6 +2335,69 @@ def int_amdgcn_smfmac_f32_32x32x16_bf16 : AMDGPUMSmfmacIntrinsic<llvm_v16f32_ty, def int_amdgcn_smfmac_i32_16x16x64_i8 : AMDGPUMSmfmacIntrinsic<llvm_v4i32_ty, llvm_v2i32_ty, llvm_v4i32_ty>; def int_amdgcn_smfmac_i32_32x32x32_i8 : AMDGPUMSmfmacIntrinsic<llvm_v16i32_ty, llvm_v2i32_ty, llvm_v4i32_ty>; +class AMDGPUMFp8SmfmacIntrinsic<LLVMType DestTy> : + AMDGPUMSmfmacIntrinsic<DestTy, llvm_v2i32_ty, llvm_v4i32_ty>; + +multiclass AMDGPUMFp8SmfmacIntrinsic<LLVMType DestTy> { + foreach kind = ["bf8_bf8", "bf8_fp8", "fp8_bf8", "fp8_fp8"] in + def NAME#"_"#kind : AMDGPUMFp8SmfmacIntrinsic<DestTy>; +} + +defm int_amdgcn_smfmac_f32_16x16x64 : AMDGPUMFp8SmfmacIntrinsic<llvm_v4f32_ty>; +defm int_amdgcn_smfmac_f32_32x32x32 : AMDGPUMFp8SmfmacIntrinsic<llvm_v16f32_ty>; + +// llvm.amdgcn.cvt.f32.bf8 float vdst, int srcA, imm byte_sel [0..3] +// byte_sel selects byte from srcA. +def int_amdgcn_cvt_f32_bf8 : ClangBuiltin<"__builtin_amdgcn_cvt_f32_bf8">, + Intrinsic<[llvm_float_ty], + [llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrWillReturn, ImmArg<ArgIndex<1>>]>; + +// llvm.amdgcn.cvt.f32.fp8 float vdst, int srcA, imm byte_sel [0..3] +def int_amdgcn_cvt_f32_fp8 : ClangBuiltin<"__builtin_amdgcn_cvt_f32_fp8">, + Intrinsic<[llvm_float_ty], + [llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrWillReturn, ImmArg<ArgIndex<1>>]>; + +// llvm.amdgcn.cvt.pk.f32.bf8 float2 vdst, int srcA, imm word_sel +// word_sel = 1 selects 2 high bytes, 0 selects 2 low bytes. +def int_amdgcn_cvt_pk_f32_bf8 : ClangBuiltin<"__builtin_amdgcn_cvt_pk_f32_bf8">, + Intrinsic<[llvm_v2f32_ty], + [llvm_i32_ty, llvm_i1_ty], + [IntrNoMem, IntrWillReturn, ImmArg<ArgIndex<1>>]>; + +// llvm.amdgcn.cvt.pk.f32.fp8 float2 vdst, int srcA, imm word_sel. +def int_amdgcn_cvt_pk_f32_fp8 : ClangBuiltin<"__builtin_amdgcn_cvt_pk_f32_fp8">, + Intrinsic<[llvm_v2f32_ty], + [llvm_i32_ty, llvm_i1_ty], + [IntrNoMem, IntrWillReturn, ImmArg<ArgIndex<1>>]>; + +// llvm.amdgcn.cvt.pk.bf8.f32 int vdst, float srcA, float srcB, int old, imm word_sel +// word_sel = 1 selects 2 high bytes in the vdst, 0 selects 2 low bytes. +def int_amdgcn_cvt_pk_bf8_f32 : ClangBuiltin<"__builtin_amdgcn_cvt_pk_bf8_f32">, + Intrinsic<[llvm_i32_ty], + [llvm_float_ty, llvm_float_ty, llvm_i32_ty, llvm_i1_ty], + [IntrNoMem, IntrWillReturn, ImmArg<ArgIndex<3>>]>; + +// llvm.amdgcn.cvt.pk.fp8.f32 int vdst, float srcA, float srcB, int old, imm word_sel +def int_amdgcn_cvt_pk_fp8_f32 : ClangBuiltin<"__builtin_amdgcn_cvt_pk_fp8_f32">, + Intrinsic<[llvm_i32_ty], + [llvm_float_ty, llvm_float_ty, llvm_i32_ty, llvm_i1_ty], + [IntrNoMem, IntrWillReturn, ImmArg<ArgIndex<3>>]>; + +// llvm.amdgcn.cvt.sr.bf8.f32 int vdst, float srcA, int srcB, int old, imm byte_sel [0..3] +// byte_sel selects byte to write into vdst. +def int_amdgcn_cvt_sr_bf8_f32 : ClangBuiltin<"__builtin_amdgcn_cvt_sr_bf8_f32">, + Intrinsic<[llvm_i32_ty], + [llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrWillReturn, ImmArg<ArgIndex<3>>]>; + +// llvm.amdgcn.cvt.sr.fp8.f32 int vdst, float srcA, int srcB, int old, imm byte_sel [0..3] +def int_amdgcn_cvt_sr_fp8_f32 : ClangBuiltin<"__builtin_amdgcn_cvt_sr_fp8_f32">, + Intrinsic<[llvm_i32_ty], + [llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrWillReturn, ImmArg<ArgIndex<3>>]>; + //===----------------------------------------------------------------------===// // Special Intrinsics for backend internal use only. No frontend // should emit calls to these. diff --git a/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsSPIRV.td index 14c628595d30..2cdd75f82962 100644 --- a/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsSPIRV.td +++ b/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsSPIRV.td @@ -20,7 +20,7 @@ let TargetPrefix = "spv" in { def int_spv_gep : Intrinsic<[llvm_anyptr_ty], [llvm_i1_ty, llvm_any_ty, llvm_vararg_ty], [ImmArg<ArgIndex<0>>]>; def int_spv_load : Intrinsic<[llvm_i32_ty], [llvm_anyptr_ty, llvm_i16_ty, llvm_i8_ty], [ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>]>; - def int_spv_store : Intrinsic<[], [llvm_i32_ty, llvm_anyptr_ty, llvm_i16_ty, llvm_i8_ty], [ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>]>; + def int_spv_store : Intrinsic<[], [llvm_any_ty, llvm_anyptr_ty, llvm_i16_ty, llvm_i8_ty], [ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>]>; def int_spv_extractv : Intrinsic<[llvm_any_ty], [llvm_i32_ty, llvm_vararg_ty]>; def int_spv_insertv : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_any_ty, llvm_vararg_ty]>; def int_spv_extractelt : Intrinsic<[llvm_any_ty], [llvm_any_ty, llvm_anyint_ty]>; @@ -28,4 +28,5 @@ let TargetPrefix = "spv" in { def int_spv_const_composite : Intrinsic<[llvm_i32_ty], [llvm_vararg_ty]>; def int_spv_bitcast : Intrinsic<[llvm_any_ty], [llvm_any_ty]>; def int_spv_switch : Intrinsic<[], [llvm_any_ty, llvm_vararg_ty]>; + def int_spv_cmpxchg : Intrinsic<[llvm_i32_ty], [llvm_any_ty, llvm_vararg_ty]>; } diff --git a/contrib/llvm-project/llvm/include/llvm/IR/Metadata.h b/contrib/llvm-project/llvm/include/llvm/IR/Metadata.h index ec769ce95160..b90b895f32e8 100644 --- a/contrib/llvm-project/llvm/include/llvm/IR/Metadata.h +++ b/contrib/llvm-project/llvm/include/llvm/IR/Metadata.h @@ -1287,7 +1287,7 @@ public: return const_cast<MDNode *>(this)->mutable_end(); } - op_range operands() const { return op_range(op_begin(), op_end()); } + ArrayRef<MDOperand> operands() const { return getHeader().operands(); } const MDOperand &getOperand(unsigned I) const { assert(I < getNumOperands() && "Out of range"); @@ -1345,7 +1345,9 @@ class MDTuple : public MDNode { StorageType Storage, bool ShouldCreate = true); TempMDTuple cloneImpl() const { - return getTemporary(getContext(), SmallVector<Metadata *, 4>(operands())); + ArrayRef<MDOperand> Operands = operands(); + return getTemporary(getContext(), SmallVector<Metadata *, 4>( + Operands.begin(), Operands.end())); } public: diff --git a/contrib/llvm-project/llvm/include/llvm/IR/PatternMatch.h b/contrib/llvm-project/llvm/include/llvm/IR/PatternMatch.h index 7f0695b552e1..31ff63c8b660 100644 --- a/contrib/llvm-project/llvm/include/llvm/IR/PatternMatch.h +++ b/contrib/llvm-project/llvm/include/llvm/IR/PatternMatch.h @@ -153,10 +153,16 @@ inline class_match<ConstantFP> m_ConstantFP() { return class_match<ConstantFP>(); } -/// Match an arbitrary ConstantExpr and ignore it. -inline class_match<ConstantExpr> m_ConstantExpr() { - return class_match<ConstantExpr>(); -} +struct constantexpr_match { + template <typename ITy> bool match(ITy *V) { + auto *C = dyn_cast<Constant>(V); + return C && (isa<ConstantExpr>(C) || C->containsConstantExpression()); + } +}; + +/// Match a constant expression or a constant that contains a constant +/// expression. +inline constantexpr_match m_ConstantExpr() { return constantexpr_match(); } /// Match an arbitrary basic block value and ignore it. inline class_match<BasicBlock> m_BasicBlock() { @@ -741,14 +747,14 @@ inline bind_ty<const BasicBlock> m_BasicBlock(const BasicBlock *&V) { /// Match an arbitrary immediate Constant and ignore it. inline match_combine_and<class_match<Constant>, - match_unless<class_match<ConstantExpr>>> + match_unless<constantexpr_match>> m_ImmConstant() { return m_CombineAnd(m_Constant(), m_Unless(m_ConstantExpr())); } /// Match an immediate Constant, capturing the value if we match. inline match_combine_and<bind_ty<Constant>, - match_unless<class_match<ConstantExpr>>> + match_unless<constantexpr_match>> m_ImmConstant(Constant *&C) { return m_CombineAnd(m_Constant(C), m_Unless(m_ConstantExpr())); } diff --git a/contrib/llvm-project/llvm/include/llvm/IR/Statepoint.h b/contrib/llvm-project/llvm/include/llvm/IR/Statepoint.h index ba8ffbbaf397..559840a33cfd 100644 --- a/contrib/llvm-project/llvm/include/llvm/IR/Statepoint.h +++ b/contrib/llvm-project/llvm/include/llvm/IR/Statepoint.h @@ -54,7 +54,6 @@ enum class StatepointFlags { // These two are defined in IntrinsicInst since they're part of the // IntrinsicInst class hierarchy. class GCRelocateInst; -class GCResultInst; /// Represents a gc.statepoint intrinsic call. This extends directly from /// CallBase as the IntrinsicInst only supports calls and gc.statepoint is diff --git a/contrib/llvm-project/llvm/include/llvm/InitializePasses.h b/contrib/llvm-project/llvm/include/llvm/InitializePasses.h index 77f2c6330788..8cf31c08dff8 100644 --- a/contrib/llvm-project/llvm/include/llvm/InitializePasses.h +++ b/contrib/llvm-project/llvm/include/llvm/InitializePasses.h @@ -88,7 +88,6 @@ void initializeBranchProbabilityInfoWrapperPassPass(PassRegistry&); void initializeBranchRelaxationPass(PassRegistry&); void initializeBreakCriticalEdgesPass(PassRegistry&); void initializeBreakFalseDepsPass(PassRegistry&); -void initializeCanonicalizeAliasesLegacyPassPass(PassRegistry &); void initializeCanonicalizeFreezeInLoopsPass(PassRegistry &); void initializeCFGOnlyPrinterLegacyPassPass(PassRegistry&); void initializeCFGOnlyViewerLegacyPassPass(PassRegistry&); @@ -101,7 +100,6 @@ void initializeCFIFixupPass(PassRegistry&); void initializeCFIInstrInserterPass(PassRegistry&); void initializeCFLAndersAAWrapperPassPass(PassRegistry&); void initializeCFLSteensAAWrapperPassPass(PassRegistry&); -void initializeCGProfileLegacyPassPass(PassRegistry &); void initializeCallGraphDOTPrinterPass(PassRegistry&); void initializeCallGraphPrinterLegacyPassPass(PassRegistry&); void initializeCallGraphViewerPass(PassRegistry&); @@ -113,7 +111,6 @@ void initializeCodeGenPreparePass(PassRegistry&); void initializeConstantHoistingLegacyPassPass(PassRegistry&); void initializeConstantMergeLegacyPassPass(PassRegistry&); void initializeConstraintEliminationPass(PassRegistry &); -void initializeControlHeightReductionLegacyPassPass(PassRegistry&); void initializeCorrelatedValuePropagationPass(PassRegistry&); void initializeCostModelAnalysisPass(PassRegistry&); void initializeCrossDSOCFIPass(PassRegistry&); @@ -148,7 +145,6 @@ void initializeEarlyTailDuplicatePass(PassRegistry&); void initializeEdgeBundlesPass(PassRegistry&); void initializeEHContGuardCatchretPass(PassRegistry &); void initializeEliminateAvailableExternallyLegacyPassPass(PassRegistry&); -void initializeEntryExitInstrumenterPass(PassRegistry&); void initializeExpandMemCmpPassPass(PassRegistry&); void initializeExpandPostRAPass(PassRegistry&); void initializeExpandReductionsPass(PassRegistry&); @@ -165,7 +161,6 @@ void initializeFloat2IntLegacyPassPass(PassRegistry&); void initializeForceFunctionAttrsLegacyPassPass(PassRegistry&); void initializeForwardControlFlowIntegrityPass(PassRegistry&); void initializeFuncletLayoutPass(PassRegistry&); -void initializeFunctionImportLegacyPassPass(PassRegistry&); void initializeFunctionSpecializationLegacyPassPass(PassRegistry &); void initializeGCMachineCodeAnalysisPass(PassRegistry&); void initializeGCModuleInfoPass(PassRegistry&); @@ -200,8 +195,6 @@ void initializeInlineCostAnalysisPass(PassRegistry&); void initializeInstCountLegacyPassPass(PassRegistry &); void initializeInstNamerPass(PassRegistry&); void initializeInstSimplifyLegacyPassPass(PassRegistry &); -void initializeInstrProfilingLegacyPassPass(PassRegistry&); -void initializeInstrOrderFileLegacyPassPass(PassRegistry&); void initializeInstructionCombiningPassPass(PassRegistry&); void initializeInstructionSelectPass(PassRegistry&); void initializeInterleavedAccessPass(PassRegistry&); @@ -273,7 +266,6 @@ void initializeLowerWidenableConditionLegacyPassPass(PassRegistry&); void initializeLowerIntrinsicsPass(PassRegistry&); void initializeLowerInvokeLegacyPassPass(PassRegistry&); void initializeLowerSwitchLegacyPassPass(PassRegistry &); -void initializeLowerTypeTestsPass(PassRegistry&); void initializeLowerMatrixIntrinsicsLegacyPassPass(PassRegistry &); void initializeLowerMatrixIntrinsicsMinimalLegacyPassPass(PassRegistry &); void initializeMIRAddFSDiscriminatorsPass(PassRegistry &); @@ -321,7 +313,6 @@ void initializeModuleSummaryIndexWrapperPassPass(PassRegistry&); void initializeModuloScheduleTestPass(PassRegistry&); void initializeMustExecutePrinterPass(PassRegistry&); void initializeMustBeExecutedContextPrinterPass(PassRegistry&); -void initializeNameAnonGlobalLegacyPassPass(PassRegistry&); void initializeNaryReassociateLegacyPassPass(PassRegistry&); void initializeNewGVNLegacyPassPass(PassRegistry&); void initializeObjCARCAAWrapperPassPass(PassRegistry&); @@ -347,7 +338,6 @@ void initializePostDomOnlyViewerWrapperPassPass(PassRegistry &); void initializePostDomPrinterWrapperPassPass(PassRegistry &); void initializePostDomViewerWrapperPassPass(PassRegistry &); void initializePostDominatorTreeWrapperPassPass(PassRegistry&); -void initializePostInlineEntryExitInstrumenterPass(PassRegistry&); void initializePostMachineSchedulerPass(PassRegistry&); void initializePostOrderFunctionAttrsLegacyPassPass(PassRegistry&); void initializePostRAHazardRecognizerPass(PassRegistry&); @@ -393,9 +383,7 @@ void initializeSLPVectorizerPass(PassRegistry&); void initializeSROALegacyPassPass(PassRegistry&); void initializeSafeStackLegacyPassPass(PassRegistry&); void initializeSafepointIRVerifierPass(PassRegistry&); -void initializeSampleProfileLoaderLegacyPassPass(PassRegistry&); void initializeSelectOptimizePass(PassRegistry &); -void initializeModuleSanitizerCoverageLegacyPassPass(PassRegistry &); void initializeScalarEvolutionWrapperPassPass(PassRegistry&); void initializeScalarizeMaskedMemIntrinLegacyPassPass(PassRegistry &); void initializeScalarizerLegacyPassPass(PassRegistry&); @@ -448,7 +436,6 @@ void initializeVirtRegMapPass(PassRegistry&); void initializeVirtRegRewriterPass(PassRegistry&); void initializeWarnMissedTransformationsLegacyPass(PassRegistry &); void initializeWasmEHPreparePass(PassRegistry&); -void initializeWholeProgramDevirtPass(PassRegistry&); void initializeWinEHPreparePass(PassRegistry&); void initializeWriteBitcodePassPass(PassRegistry&); void initializeWriteThinLTOBitcodePass(PassRegistry&); diff --git a/contrib/llvm-project/llvm/include/llvm/LinkAllPasses.h b/contrib/llvm-project/llvm/include/llvm/LinkAllPasses.h index af5926dcb38b..e660ea05ddcf 100644 --- a/contrib/llvm-project/llvm/include/llvm/LinkAllPasses.h +++ b/contrib/llvm-project/llvm/include/llvm/LinkAllPasses.h @@ -91,7 +91,6 @@ namespace { (void) llvm::createLibCallsShrinkWrapPass(); (void) llvm::createCalledValuePropagationPass(); (void) llvm::createConstantMergePass(); - (void) llvm::createControlHeightReductionLegacyPass(); (void) llvm::createCostModelAnalysisPass(); (void) llvm::createDeadArgEliminationPass(); (void) llvm::createDeadCodeEliminationPass(); @@ -101,8 +100,6 @@ namespace { (void) llvm::createDomPrinterWrapperPassPass(); (void) llvm::createDomOnlyViewerWrapperPassPass(); (void) llvm::createDomViewerWrapperPassPass(); - (void) llvm::createInstrProfilingLegacyPass(); - (void) llvm::createFunctionImportPass(); (void) llvm::createFunctionInliningPass(); (void) llvm::createAlwaysInlinerLegacyPass(); (void) llvm::createGlobalDCEPass(); @@ -177,8 +174,6 @@ namespace { (void) llvm::createInstCountPass(); (void) llvm::createConstantHoistingPass(); (void) llvm::createCodeGenPreparePass(); - (void) llvm::createEntryExitInstrumenterPass(); - (void) llvm::createPostInlineEntryExitInstrumenterPass(); (void) llvm::createEarlyCSEPass(); (void) llvm::createGVNHoistPass(); (void) llvm::createMergedLoadStoreMotionPass(); diff --git a/contrib/llvm-project/llvm/include/llvm/MC/MCContext.h b/contrib/llvm-project/llvm/include/llvm/MC/MCContext.h index a0e18891ed90..61520c4f29bf 100644 --- a/contrib/llvm-project/llvm/include/llvm/MC/MCContext.h +++ b/contrib/llvm-project/llvm/include/llvm/MC/MCContext.h @@ -603,8 +603,6 @@ public: const MCSymbolELF *Group, const MCSectionELF *RelInfoSection); - void renameELFSection(MCSectionELF *Section, StringRef Name); - MCSectionELF *createELFGroupSection(const MCSymbolELF *Group, bool IsComdat); void recordELFMergeableSectionInfo(StringRef SectionName, unsigned Flags, diff --git a/contrib/llvm-project/llvm/include/llvm/MC/MCDXContainerStreamer.h b/contrib/llvm-project/llvm/include/llvm/MC/MCDXContainerStreamer.h index ef1a95f71778..ac2fbc6cdff3 100644 --- a/contrib/llvm-project/llvm/include/llvm/MC/MCDXContainerStreamer.h +++ b/contrib/llvm-project/llvm/include/llvm/MC/MCDXContainerStreamer.h @@ -22,8 +22,6 @@ #include "llvm/MC/MCObjectWriter.h" namespace llvm { -class MCAssembler; -class MCExpr; class MCInst; class raw_ostream; diff --git a/contrib/llvm-project/llvm/include/llvm/MC/MCMachObjectWriter.h b/contrib/llvm-project/llvm/include/llvm/MC/MCMachObjectWriter.h index 149373dd2b54..15e4652bc05d 100644 --- a/contrib/llvm-project/llvm/include/llvm/MC/MCMachObjectWriter.h +++ b/contrib/llvm-project/llvm/include/llvm/MC/MCMachObjectWriter.h @@ -263,9 +263,9 @@ public: const MCFragment &FB, bool InSet, bool IsPCRel) const override; - uint64_t writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) override; + void populateAddrSigSection(MCAssembler &Asm); - void writeAddrsigSection(MCAssembler &Asm); + uint64_t writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) override; }; /// Construct a new Mach-O writer instance. diff --git a/contrib/llvm-project/llvm/include/llvm/MC/MCSPIRVStreamer.h b/contrib/llvm-project/llvm/include/llvm/MC/MCSPIRVStreamer.h index 7366e0a9d82c..fc1e41c636d8 100644 --- a/contrib/llvm-project/llvm/include/llvm/MC/MCSPIRVStreamer.h +++ b/contrib/llvm-project/llvm/include/llvm/MC/MCSPIRVStreamer.h @@ -19,8 +19,6 @@ #include "llvm/MC/MCObjectWriter.h" namespace llvm { -class MCAssembler; -class MCExpr; class MCInst; class raw_ostream; diff --git a/contrib/llvm-project/llvm/include/llvm/MC/MCTargetOptions.h b/contrib/llvm-project/llvm/include/llvm/MC/MCTargetOptions.h index 9c906cdc90d0..ae305564a353 100644 --- a/contrib/llvm-project/llvm/include/llvm/MC/MCTargetOptions.h +++ b/contrib/llvm-project/llvm/include/llvm/MC/MCTargetOptions.h @@ -27,7 +27,6 @@ enum class ExceptionHandling { enum class DebugCompressionType { None, ///< No compression - GNU, ///< zlib-gnu style compression Z, ///< zlib style complession }; diff --git a/contrib/llvm-project/llvm/include/llvm/Object/Archive.h b/contrib/llvm-project/llvm/include/llvm/Object/Archive.h index a36c9bd6163b..63f0df85054e 100644 --- a/contrib/llvm-project/llvm/include/llvm/Object/Archive.h +++ b/contrib/llvm-project/llvm/include/llvm/Object/Archive.h @@ -379,10 +379,10 @@ protected: uint64_t getArchiveMagicLen() const; void setFirstRegular(const Child &C); -private: StringRef SymbolTable; StringRef StringTable; +private: StringRef FirstRegularData; uint16_t FirstRegularStartOfFile = -1; diff --git a/contrib/llvm-project/llvm/include/llvm/Object/DXContainer.h b/contrib/llvm-project/llvm/include/llvm/Object/DXContainer.h index 7aa7d8ecf4c7..9ec5b5e6b1b2 100644 --- a/contrib/llvm-project/llvm/include/llvm/Object/DXContainer.h +++ b/contrib/llvm-project/llvm/include/llvm/Object/DXContainer.h @@ -121,4 +121,4 @@ public: } // namespace object } // namespace llvm -#endif // LLVM_OBJECT_DXCONTAINERFILE_H +#endif // LLVM_OBJECT_DXCONTAINER_H diff --git a/contrib/llvm-project/llvm/include/llvm/Object/Decompressor.h b/contrib/llvm-project/llvm/include/llvm/Object/Decompressor.h index 00b6c2016742..35f4ebe2e5d6 100644 --- a/contrib/llvm-project/llvm/include/llvm/Object/Decompressor.h +++ b/contrib/llvm-project/llvm/include/llvm/Object/Decompressor.h @@ -16,8 +16,6 @@ namespace llvm { namespace object { -class SectionRef; - /// Decompressor helps to handle decompression of compressed sections. class Decompressor { public: @@ -43,19 +41,9 @@ public: /// Return memory buffer size required for decompression. uint64_t getDecompressedSize() { return DecompressedSize; } - /// Return true if section is compressed, including gnu-styled case. - static bool isCompressed(const object::SectionRef &Section); - - /// Return true if section is a ELF compressed one. - static bool isCompressedELFSection(uint64_t Flags, StringRef Name); - - /// Return true if section name matches gnu style compressed one. - static bool isGnuStyle(StringRef Name); - private: Decompressor(StringRef Data); - Error consumeCompressedGnuHeader(); Error consumeCompressedZLibHeader(bool Is64Bit, bool IsLittleEndian); StringRef SectionData; diff --git a/contrib/llvm-project/llvm/include/llvm/Object/ELF.h b/contrib/llvm-project/llvm/include/llvm/Object/ELF.h index 1a59ba94098f..794d29fd9913 100644 --- a/contrib/llvm-project/llvm/include/llvm/Object/ELF.h +++ b/contrib/llvm-project/llvm/include/llvm/Object/ELF.h @@ -181,6 +181,7 @@ public: private: StringRef Buf; + std::vector<Elf_Shdr> FakeSections; ELFFile(StringRef Object); @@ -389,6 +390,8 @@ public: Expected<ArrayRef<uint8_t>> getSectionContents(const Elf_Shdr &Sec) const; Expected<ArrayRef<uint8_t>> getSegmentContents(const Elf_Phdr &Phdr) const; Expected<std::vector<BBAddrMap>> decodeBBAddrMap(const Elf_Shdr &Sec) const; + + void createFakeSections(); }; using ELF32LEFile = ELFFile<ELF32LE>; @@ -757,11 +760,37 @@ Expected<ELFFile<ELFT>> ELFFile<ELFT>::create(StringRef Object) { return ELFFile(Object); } +/// Used by llvm-objdump -d (which needs sections for disassembly) to +/// disassemble objects without a section header table (e.g. ET_CORE objects +/// analyzed by linux perf or ET_EXEC with llvm-strip --strip-sections). +template <class ELFT> void ELFFile<ELFT>::createFakeSections() { + if (!FakeSections.empty()) + return; + auto PhdrsOrErr = program_headers(); + if (!PhdrsOrErr) + return; + + for (auto Phdr : *PhdrsOrErr) { + if (!(Phdr.p_type & ELF::PT_LOAD) || !(Phdr.p_flags & ELF::PF_X)) + continue; + Elf_Shdr FakeShdr = {}; + FakeShdr.sh_type = ELF::SHT_PROGBITS; + FakeShdr.sh_flags = ELF::SHF_ALLOC | ELF::SHF_EXECINSTR; + FakeShdr.sh_addr = Phdr.p_vaddr; + FakeShdr.sh_size = Phdr.p_memsz; + FakeShdr.sh_offset = Phdr.p_offset; + FakeSections.push_back(FakeShdr); + } +} + template <class ELFT> Expected<typename ELFT::ShdrRange> ELFFile<ELFT>::sections() const { const uintX_t SectionTableOffset = getHeader().e_shoff; - if (SectionTableOffset == 0) + if (SectionTableOffset == 0) { + if (!FakeSections.empty()) + return makeArrayRef(FakeSections.data(), FakeSections.size()); return ArrayRef<Elf_Shdr>(); + } if (getHeader().e_shentsize != sizeof(Elf_Shdr)) return createError("invalid e_shentsize in ELF header: " + diff --git a/contrib/llvm-project/llvm/include/llvm/Object/ELFObjectFile.h b/contrib/llvm-project/llvm/include/llvm/Object/ELFObjectFile.h index c449a3dafc0c..ed2f70b0da25 100644 --- a/contrib/llvm-project/llvm/include/llvm/Object/ELFObjectFile.h +++ b/contrib/llvm-project/llvm/include/llvm/Object/ELFObjectFile.h @@ -457,6 +457,8 @@ public: elf_symbol_iterator_range getDynamicSymbolIterators() const override; bool isRelocatableObject() const override; + + void createFakeSections() { EF.createFakeSections(); } }; using ELF32LEObjectFile = ELFObjectFile<ELF32LE>; diff --git a/contrib/llvm-project/llvm/include/llvm/Object/OffloadBinary.h b/contrib/llvm-project/llvm/include/llvm/Object/OffloadBinary.h index 5afc3ed295ae..4bff91c4c930 100644 --- a/contrib/llvm-project/llvm/include/llvm/Object/OffloadBinary.h +++ b/contrib/llvm-project/llvm/include/llvm/Object/OffloadBinary.h @@ -14,8 +14,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_BINARYFORMAT_OFFLOADING_H -#define LLVM_BINARYFORMAT_OFFLOADING_H +#ifndef LLVM_OBJECT_OFFLOADBINARY_H +#define LLVM_OBJECT_OFFLOADBINARY_H #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" @@ -80,7 +80,7 @@ public: /// Serialize the contents of \p File to a binary buffer to be read later. static std::unique_ptr<MemoryBuffer> write(const OffloadingImage &); - static uint64_t getAlignment() { return alignof(Header); } + static uint64_t getAlignment() { return 8; } ImageKind getImageKind() const { return TheEntry->TheImageKind; } OffloadKind getOffloadKind() const { return TheEntry->TheOffloadKind; } diff --git a/contrib/llvm-project/llvm/include/llvm/ObjectYAML/OffloadYAML.h b/contrib/llvm-project/llvm/include/llvm/ObjectYAML/OffloadYAML.h index a4fdbce0b320..fc7a8cc7e78e 100644 --- a/contrib/llvm-project/llvm/include/llvm/ObjectYAML/OffloadYAML.h +++ b/contrib/llvm-project/llvm/include/llvm/ObjectYAML/OffloadYAML.h @@ -76,4 +76,4 @@ template <> struct MappingTraits<OffloadYAML::Binary::Member> { } // end namespace yaml } // end namespace llvm -#endif // LLVM_OBJECTYAML_ARCHIVEYAML_H +#endif // LLVM_OBJECTYAML_OFFLOADYAML_H diff --git a/contrib/llvm-project/llvm/include/llvm/Passes/StandardInstrumentations.h b/contrib/llvm-project/llvm/include/llvm/Passes/StandardInstrumentations.h index 32ecc9ec5fb0..30287cde5de7 100644 --- a/contrib/llvm-project/llvm/include/llvm/Passes/StandardInstrumentations.h +++ b/contrib/llvm-project/llvm/include/llvm/Passes/StandardInstrumentations.h @@ -133,9 +133,9 @@ public: } bool isPoisoned() const { - return BBGuards && - std::any_of(BBGuards->begin(), BBGuards->end(), - [](const auto &BB) { return BB.second.isPoisoned(); }); + return BBGuards && llvm::any_of(*BBGuards, [](const auto &BB) { + return BB.second.isPoisoned(); + }); } static void printDiff(raw_ostream &out, const CFG &Before, diff --git a/contrib/llvm-project/llvm/include/llvm/ProfileData/InstrProfReader.h b/contrib/llvm-project/llvm/include/llvm/ProfileData/InstrProfReader.h index 3a25de05bbf1..1d1b59bb6c46 100644 --- a/contrib/llvm-project/llvm/include/llvm/ProfileData/InstrProfReader.h +++ b/contrib/llvm-project/llvm/include/llvm/ProfileData/InstrProfReader.h @@ -619,9 +619,14 @@ public: /// Read a single record. Error readNextRecord(NamedInstrProfRecord &Record) override; - /// Return the NamedInstrProfRecord associated with FuncName and FuncHash - Expected<InstrProfRecord> getInstrProfRecord(StringRef FuncName, - uint64_t FuncHash); + /// Return the NamedInstrProfRecord associated with FuncName and FuncHash. + /// When return a hash_mismatch error and MismatchedFuncSum is not nullptr, + /// the sum of all counters in the mismatched function will be set to + /// MismatchedFuncSum. If there are multiple instances of mismatched + /// functions, MismatchedFuncSum returns the maximum. + Expected<InstrProfRecord> + getInstrProfRecord(StringRef FuncName, uint64_t FuncHash, + uint64_t *MismatchedFuncSum = nullptr); /// Return the memprof record for the function identified by /// llvm::md5(Name). diff --git a/contrib/llvm-project/llvm/include/llvm/ProfileData/SampleProf.h b/contrib/llvm-project/llvm/include/llvm/ProfileData/SampleProf.h index f11392c05318..1ad83c2f5b5a 100644 --- a/contrib/llvm-project/llvm/include/llvm/ProfileData/SampleProf.h +++ b/contrib/llvm-project/llvm/include/llvm/ProfileData/SampleProf.h @@ -874,16 +874,20 @@ public: /// Return the total number of samples collected inside the function. uint64_t getTotalSamples() const { return TotalSamples; } - /// Return the total number of branch samples that have the function as the - /// branch target. This should be equivalent to the sample of the first - /// instruction of the symbol. But as we directly get this info for raw - /// profile without referring to potentially inaccurate debug info, this + /// For top-level functions, return the total number of branch samples that + /// have the function as the branch target (or 0 otherwise). This is the raw + /// data fetched from the profile. This should be equivalent to the sample of + /// the first instruction of the symbol. But as we directly get this info for + /// raw profile without referring to potentially inaccurate debug info, this /// gives more accurate profile data and is preferred for standalone symbols. uint64_t getHeadSamples() const { return TotalHeadSamples; } - /// Return the sample count of the first instruction of the function. + /// Return an estimate of the sample count of the function entry basic block. /// The function can be either a standalone symbol or an inlined function. - uint64_t getEntrySamples() const { + /// For Context-Sensitive profiles, this will prefer returning the head + /// samples (i.e. getHeadSamples()), if non-zero. Otherwise it estimates from + /// the function body's samples or callsite samples. + uint64_t getHeadSamplesEstimate() const { if (FunctionSamples::ProfileIsCS && getHeadSamples()) { // For CS profile, if we already have more accurate head samples // counted by branch sample from caller, use them as entry samples. @@ -900,7 +904,7 @@ public: // An indirect callsite may be promoted to several inlined direct calls. // We need to get the sum of them. for (const auto &N_FS : CallsiteSamples.begin()->second) - Count += N_FS.second.getEntrySamples(); + Count += N_FS.second.getHeadSamplesEstimate(); } // Return at least 1 if total sample is not 0. return Count ? Count : TotalSamples > 0; diff --git a/contrib/llvm-project/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h b/contrib/llvm-project/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h index 41d144cfd5c4..61b05743faf6 100644 --- a/contrib/llvm-project/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h +++ b/contrib/llvm-project/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h @@ -161,7 +161,8 @@ enum : int32_t { KERNEL_CODE_PROPERTY(ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 6, 1), KERNEL_CODE_PROPERTY(RESERVED0, 7, 3), KERNEL_CODE_PROPERTY(ENABLE_WAVEFRONT_SIZE32, 10, 1), // GFX10+ - KERNEL_CODE_PROPERTY(RESERVED1, 11, 5), + KERNEL_CODE_PROPERTY(USES_DYNAMIC_STACK, 11, 1), + KERNEL_CODE_PROPERTY(RESERVED1, 12, 4), }; #undef KERNEL_CODE_PROPERTY diff --git a/contrib/llvm-project/llvm/include/llvm/Support/Casting.h b/contrib/llvm-project/llvm/include/llvm/Support/Casting.h index 5444d777b749..b6bbff8ada10 100644 --- a/contrib/llvm-project/llvm/include/llvm/Support/Casting.h +++ b/contrib/llvm-project/llvm/include/llvm/Support/Casting.h @@ -265,7 +265,7 @@ struct CastIsPossible { template <typename To, typename From> struct CastIsPossible<To, Optional<From>> { static inline bool isPossible(const Optional<From> &f) { - assert(f.hasValue() && "CastIsPossible::isPossible called on a nullopt!"); + assert(f && "CastIsPossible::isPossible called on a nullopt!"); return isa_impl_wrap< To, const From, typename simplify_type<const From>::SimpleType>::doit(*f); diff --git a/contrib/llvm-project/llvm/include/llvm/Support/Compression.h b/contrib/llvm-project/llvm/include/llvm/Support/Compression.h index c99f811459ab..8500396d88a0 100644 --- a/contrib/llvm-project/llvm/include/llvm/Support/Compression.h +++ b/contrib/llvm-project/llvm/include/llvm/Support/Compression.h @@ -19,7 +19,6 @@ namespace llvm { template <typename T> class SmallVectorImpl; class Error; -class StringRef; namespace compression { namespace zlib { @@ -44,6 +43,28 @@ Error uncompress(ArrayRef<uint8_t> Input, } // End of namespace zlib +namespace zstd { + +constexpr int NoCompression = -5; +constexpr int BestSpeedCompression = 1; +constexpr int DefaultCompression = 5; +constexpr int BestSizeCompression = 12; + +bool isAvailable(); + +void compress(ArrayRef<uint8_t> Input, + SmallVectorImpl<uint8_t> &CompressedBuffer, + int Level = DefaultCompression); + +Error uncompress(ArrayRef<uint8_t> Input, uint8_t *UncompressedBuffer, + size_t &UncompressedSize); + +Error uncompress(ArrayRef<uint8_t> Input, + SmallVectorImpl<uint8_t> &UncompressedBuffer, + size_t UncompressedSize); + +} // End of namespace zstd + } // End of namespace compression } // End of namespace llvm diff --git a/contrib/llvm-project/llvm/include/llvm/Support/DivisionByConstantInfo.h b/contrib/llvm-project/llvm/include/llvm/Support/DivisionByConstantInfo.h index 896bc679885e..7d01613ce1c6 100644 --- a/contrib/llvm-project/llvm/include/llvm/Support/DivisionByConstantInfo.h +++ b/contrib/llvm-project/llvm/include/llvm/Support/DivisionByConstantInfo.h @@ -1,4 +1,4 @@ -//== llvm/Support/DivisonByConstantInfo.h - division by constant -*- C++ -*-==// +//===- llvm/Support/DivisionByConstantInfo.h ---------------------*- C++ -*-==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -25,9 +25,9 @@ struct SignedDivisionByConstantInfo { }; /// Magic data for optimising unsigned division by a constant. -struct UnsignedDivisonByConstantInfo { - static UnsignedDivisonByConstantInfo get(const APInt &D, - unsigned LeadingZeros = 0); +struct UnsignedDivisionByConstantInfo { + static UnsignedDivisionByConstantInfo get(const APInt &D, + unsigned LeadingZeros = 0); APInt Magic; ///< magic number bool IsAdd; ///< add indicator unsigned ShiftAmount; ///< shift amount diff --git a/contrib/llvm-project/llvm/include/llvm/Support/JSON.h b/contrib/llvm-project/llvm/include/llvm/Support/JSON.h index 719e8b60d0fa..0a44aabedae6 100644 --- a/contrib/llvm-project/llvm/include/llvm/Support/JSON.h +++ b/contrib/llvm-project/llvm/include/llvm/Support/JSON.h @@ -169,44 +169,36 @@ public: emplace_back(V); } - Value &operator[](size_t I) { return V[I]; } - const Value &operator[](size_t I) const { return V[I]; } - Value &front() { return V.front(); } - const Value &front() const { return V.front(); } - Value &back() { return V.back(); } - const Value &back() const { return V.back(); } - Value *data() { return V.data(); } - const Value *data() const { return V.data(); } - - iterator begin() { return V.begin(); } - const_iterator begin() const { return V.begin(); } - iterator end() { return V.end(); } - const_iterator end() const { return V.end(); } - - bool empty() const { return V.empty(); } - size_t size() const { return V.size(); } - void reserve(size_t S) { V.reserve(S); } - - void clear() { V.clear(); } - void push_back(const Value &E) { V.push_back(E); } - void push_back(Value &&E) { V.push_back(std::move(E)); } - template <typename... Args> void emplace_back(Args &&... A) { - V.emplace_back(std::forward<Args>(A)...); - } - void pop_back() { V.pop_back(); } + Value &operator[](size_t I); + const Value &operator[](size_t I) const; + Value &front(); + const Value &front() const; + Value &back(); + const Value &back() const; + Value *data(); + const Value *data() const; + + iterator begin(); + const_iterator begin() const; + iterator end(); + const_iterator end() const; + + bool empty() const; + size_t size() const; + void reserve(size_t S); + + void clear(); + void push_back(const Value &E); + void push_back(Value &&E); + template <typename... Args> void emplace_back(Args &&...A); + void pop_back(); // FIXME: insert() takes const_iterator since C++11, old libstdc++ disagrees. - iterator insert(iterator P, const Value &E) { return V.insert(P, E); } - iterator insert(iterator P, Value &&E) { - return V.insert(P, std::move(E)); - } - template <typename It> iterator insert(iterator P, It A, It Z) { - return V.insert(P, A, Z); - } - template <typename... Args> iterator emplace(const_iterator P, Args &&... A) { - return V.emplace(P, std::forward<Args>(A)...); - } + iterator insert(iterator P, const Value &E); + iterator insert(iterator P, Value &&E); + template <typename It> iterator insert(iterator P, It A, It Z); + template <typename... Args> iterator emplace(const_iterator P, Args &&...A); - friend bool operator==(const Array &L, const Array &R) { return L.V == R.V; } + friend bool operator==(const Array &L, const Array &R); }; inline bool operator!=(const Array &L, const Array &R) { return !(L == R); } @@ -515,6 +507,48 @@ private: bool operator==(const Value &, const Value &); inline bool operator!=(const Value &L, const Value &R) { return !(L == R); } +// Array Methods +inline Value &Array::operator[](size_t I) { return V[I]; } +inline const Value &Array::operator[](size_t I) const { return V[I]; } +inline Value &Array::front() { return V.front(); } +inline const Value &Array::front() const { return V.front(); } +inline Value &Array::back() { return V.back(); } +inline const Value &Array::back() const { return V.back(); } +inline Value *Array::data() { return V.data(); } +inline const Value *Array::data() const { return V.data(); } + +inline typename Array::iterator Array::begin() { return V.begin(); } +inline typename Array::const_iterator Array::begin() const { return V.begin(); } +inline typename Array::iterator Array::end() { return V.end(); } +inline typename Array::const_iterator Array::end() const { return V.end(); } + +inline bool Array::empty() const { return V.empty(); } +inline size_t Array::size() const { return V.size(); } +inline void Array::reserve(size_t S) { V.reserve(S); } + +inline void Array::clear() { V.clear(); } +inline void Array::push_back(const Value &E) { V.push_back(E); } +inline void Array::push_back(Value &&E) { V.push_back(std::move(E)); } +template <typename... Args> inline void Array::emplace_back(Args &&...A) { + V.emplace_back(std::forward<Args>(A)...); +} +inline void Array::pop_back() { V.pop_back(); } +inline typename Array::iterator Array::insert(iterator P, const Value &E) { + return V.insert(P, E); +} +inline typename Array::iterator Array::insert(iterator P, Value &&E) { + return V.insert(P, std::move(E)); +} +template <typename It> +inline typename Array::iterator Array::insert(iterator P, It A, It Z) { + return V.insert(P, A, Z); +} +template <typename... Args> +inline typename Array::iterator Array::emplace(const_iterator P, Args &&...A) { + return V.emplace(P, std::forward<Args>(A)...); +} +inline bool operator==(const Array &L, const Array &R) { return L.V == R.V; } + /// ObjectKey is a used to capture keys in Object. Like Value but: /// - only strings are allowed /// - it's optimized for the string literal case (Owned == nullptr) diff --git a/contrib/llvm-project/llvm/include/llvm/Support/SpecialCaseList.h b/contrib/llvm-project/llvm/include/llvm/Support/SpecialCaseList.h index d022a8f53706..0d56c4b9912d 100644 --- a/contrib/llvm-project/llvm/include/llvm/Support/SpecialCaseList.h +++ b/contrib/llvm-project/llvm/include/llvm/Support/SpecialCaseList.h @@ -19,9 +19,9 @@ // prefix:wildcard_expression[=category] // If category is not specified, it is assumed to be empty string. // Definitions of "prefix" and "category" are sanitizer-specific. For example, -// sanitizer exclusion support prefixes "src", "fun" and "global". -// Wildcard expressions define, respectively, source files, functions or -// globals which shouldn't be instrumented. +// sanitizer exclusion support prefixes "src", "mainfile", "fun" and "global". +// Wildcard expressions define, respectively, source files, main files, +// functions or globals which shouldn't be instrumented. // Examples of categories: // "functional": used in DFSan to list functions with pure functional // semantics. @@ -37,6 +37,7 @@ // type:*Namespace::ClassName*=init // src:file_with_tricky_code.cc // src:ignore-global-initializers-issues.cc=init +// mainfile:main_file.cc // // [dataflow] // # Functions with pure functional semantics: diff --git a/contrib/llvm-project/llvm/include/llvm/Support/VirtualFileSystem.h b/contrib/llvm-project/llvm/include/llvm/Support/VirtualFileSystem.h index 3c99b0d8efdb..6844a406f38c 100644 --- a/contrib/llvm-project/llvm/include/llvm/Support/VirtualFileSystem.h +++ b/contrib/llvm-project/llvm/include/llvm/Support/VirtualFileSystem.h @@ -462,7 +462,6 @@ private: namespace detail { class InMemoryDirectory; -class InMemoryFile; class InMemoryNode; struct NewInMemoryNodeInfo { diff --git a/contrib/llvm-project/llvm/include/llvm/TableGen/DirectiveEmitter.h b/contrib/llvm-project/llvm/include/llvm/TableGen/DirectiveEmitter.h index d73b9ae49235..e85c13f4b7cc 100644 --- a/contrib/llvm-project/llvm/include/llvm/TableGen/DirectiveEmitter.h +++ b/contrib/llvm-project/llvm/include/llvm/TableGen/DirectiveEmitter.h @@ -174,6 +174,16 @@ public: } bool isImplicit() const { return Def->getValueAsBit("isImplicit"); } + + std::vector<StringRef> getAliases() const { + return Def->getValueAsListOfStrings("aliases"); + } + + StringRef getPrefix() const { return Def->getValueAsString("prefix"); } + + bool isPrefixOptional() const { + return Def->getValueAsBit("isPrefixOptional"); + } }; // Wrapper class that contains VersionedClause's information defined in diff --git a/contrib/llvm-project/llvm/include/llvm/TableGen/Record.h b/contrib/llvm-project/llvm/include/llvm/TableGen/Record.h index 44daad976c12..50df38e695d7 100644 --- a/contrib/llvm-project/llvm/include/llvm/TableGen/Record.h +++ b/contrib/llvm-project/llvm/include/llvm/TableGen/Record.h @@ -1558,6 +1558,7 @@ private: // Location where record was instantiated, followed by the location of // multiclass prototypes used. SmallVector<SMLoc, 4> Locs; + SmallVector<SMLoc, 0> ForwardDeclarationLocs; SmallVector<Init *, 0> TemplateArgs; SmallVector<RecordVal, 0> Values; SmallVector<AssertionInfo, 0> Assertions; @@ -1614,7 +1615,7 @@ public: return Name; } - const std::string getNameInitAsString() const { + std::string getNameInitAsString() const { return getNameInit()->getAsUnquotedString(); } @@ -1623,6 +1624,13 @@ public: ArrayRef<SMLoc> getLoc() const { return Locs; } void appendLoc(SMLoc Loc) { Locs.push_back(Loc); } + ArrayRef<SMLoc> getForwardDeclarationLocs() const { + return ForwardDeclarationLocs; + } + + // Update a class location when encountering a (re-)definition. + void updateClassLoc(SMLoc Loc); + // Make the type that this record should have based on its superclasses. RecordRecTy *getType(); diff --git a/contrib/llvm-project/llvm/include/llvm/Target/TargetCallingConv.td b/contrib/llvm-project/llvm/include/llvm/Target/TargetCallingConv.td index a719581e0ac3..18b7ff4aec95 100644 --- a/contrib/llvm-project/llvm/include/llvm/Target/TargetCallingConv.td +++ b/contrib/llvm-project/llvm/include/llvm/Target/TargetCallingConv.td @@ -141,6 +141,15 @@ class CCAssignToStackWithShadow<int size, list<Register> ShadowRegList = shadowList; } +/// CCAssignToRegAndStack - Same as CCAssignToReg, but also allocates a stack +/// slot, when some register is used. Basically, it works like: +/// CCIf<CCAssignToReg<regList>, CCAssignToStack<size, align>>. +class CCAssignToRegAndStack<list<Register> regList, int size, int align> + : CCAssignToReg<regList> { + int Size = size; + int Align = align; +} + /// CCPassByVal - This action always matches: it assigns the value to a stack /// slot to implement ByVal aggregate parameter passing. Size and alignment /// specify the minimum size and alignment for the stack slot. diff --git a/contrib/llvm-project/llvm/include/llvm/Target/TargetSelectionDAG.td b/contrib/llvm-project/llvm/include/llvm/Target/TargetSelectionDAG.td index 171fdb1b98e0..bce8f2b72ec3 100644 --- a/contrib/llvm-project/llvm/include/llvm/Target/TargetSelectionDAG.td +++ b/contrib/llvm-project/llvm/include/llvm/Target/TargetSelectionDAG.td @@ -705,6 +705,9 @@ def concat_vectors : SDNode<"ISD::CONCAT_VECTORS", def vector_extract_subvec : SDNode<"ISD::EXTRACT_SUBVECTOR", SDTypeProfile<1, 2, [SDTCisInt<2>, SDTCisVec<1>, SDTCisVec<0>]>, []>; +def vector_insert_subvec : SDNode<"ISD::INSERT_SUBVECTOR", + SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisVec<2>, SDTCisInt<3>]>, + []>; // This operator does subvector type checking. def extract_subvector : SDNode<"ISD::EXTRACT_SUBVECTOR", SDTSubVecExtract, []>; diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/IPO.h b/contrib/llvm-project/llvm/include/llvm/Transforms/IPO.h index 6b7d4f4821f0..0b0f30be4dc9 100644 --- a/contrib/llvm-project/llvm/include/llvm/Transforms/IPO.h +++ b/contrib/llvm-project/llvm/include/llvm/Transforms/IPO.h @@ -98,10 +98,6 @@ ModulePass *createGVExtractionPass(std::vector<GlobalValue*>& GVs, bool deleteFn = false, bool keepConstInit = false); //===----------------------------------------------------------------------===// -/// This pass performs iterative function importing from other modules. -Pass *createFunctionImportPass(); - -//===----------------------------------------------------------------------===// /// createFunctionInliningPass - Return a new pass object that uses a heuristic /// to inline direct function calls to small functions. /// @@ -239,49 +235,13 @@ enum class PassSummaryAction { Export, ///< Export information to summary. }; -/// This pass lowers type metadata and the llvm.type.test intrinsic to -/// bitsets. -/// -/// The behavior depends on the summary arguments: -/// - If ExportSummary is non-null, this pass will export type identifiers to -/// the given summary. -/// - If ImportSummary is non-null, this pass will import type identifiers from -/// the given summary. -/// - Otherwise, if both are null and DropTypeTests is true, all type test -/// assume sequences will be removed from the IR. -/// It is invalid for both ExportSummary and ImportSummary to be non-null -/// unless DropTypeTests is true. -ModulePass *createLowerTypeTestsPass(ModuleSummaryIndex *ExportSummary, - const ModuleSummaryIndex *ImportSummary, - bool DropTypeTests = false); - /// This pass export CFI checks for use by external modules. ModulePass *createCrossDSOCFIPass(); -/// This pass implements whole-program devirtualization using type -/// metadata. -/// -/// The behavior depends on the summary arguments: -/// - If ExportSummary is non-null, this pass will export type identifiers to -/// the given summary. -/// - Otherwise, if ImportSummary is non-null, this pass will import type -/// identifiers from the given summary. -/// - Otherwise it does neither. -/// It is invalid for both ExportSummary and ImportSummary to be non-null. -ModulePass * -createWholeProgramDevirtPass(ModuleSummaryIndex *ExportSummary, - const ModuleSummaryIndex *ImportSummary); - /// This pass splits globals into pieces for the benefit of whole-program /// devirtualization and control-flow integrity. ModulePass *createGlobalSplitPass(); -//===----------------------------------------------------------------------===// -// SampleProfilePass - Loads sample profile data from disk and generates -// IR metadata to reflect the profile. -ModulePass *createSampleProfileLoaderPass(); -ModulePass *createSampleProfileLoaderPass(StringRef Name); - /// Write ThinLTO-ready bitcode to Str. ModulePass *createWriteThinLTOBitcodePass(raw_ostream &Str, raw_ostream *ThinLinkOS = nullptr); diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/Attributor.h b/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/Attributor.h index 17e29695ab73..8466f5612d99 100644 --- a/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -118,7 +118,9 @@ #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" #include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/PassManager.h" +#include "llvm/IR/Value.h" #include "llvm/Support/Alignment.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Casting.h" @@ -155,6 +157,7 @@ namespace AA { enum ValueScope : uint8_t { Intraprocedural = 1, Interprocedural = 2, + AnyScope = Intraprocedural | Interprocedural, }; struct ValueAndContext : public std::pair<Value *, const Instruction *> { @@ -217,12 +220,11 @@ Constant *getInitialValueForObj(Value &Obj, Type &Ty, /// \returns True if \p Objects contains all assumed underlying objects, and /// false if something went wrong and the objects could not be /// determined. -bool getAssumedUnderlyingObjects(Attributor &A, const Value &Ptr, - SmallVectorImpl<Value *> &Objects, - const AbstractAttribute &QueryingAA, - const Instruction *CtxI, - bool &UsedAssumedInformation, - AA::ValueScope VS = Interprocedural); +bool getAssumedUnderlyingObjects( + Attributor &A, const Value &Ptr, SmallSetVector<Value *, 8> &Objects, + const AbstractAttribute &QueryingAA, const Instruction *CtxI, + bool &UsedAssumedInformation, AA::ValueScope VS = AA::Interprocedural, + SmallPtrSetImpl<Value *> *SeenObjects = nullptr); /// Collect all potential values \p LI could read into \p PotentialValues. That /// is, the only values read by \p LI are assumed to be known and all are in @@ -305,6 +307,24 @@ struct DenseMapInfo<AA::ValueAndContext> } }; +template <> +struct DenseMapInfo<AA::ValueScope> : public DenseMapInfo<unsigned char> { + using Base = DenseMapInfo<unsigned char>; + static inline AA::ValueScope getEmptyKey() { + return AA::ValueScope(Base::getEmptyKey()); + } + static inline AA::ValueScope getTombstoneKey() { + return AA::ValueScope(Base::getTombstoneKey()); + } + static unsigned getHashValue(const AA::ValueScope &S) { + return Base::getHashValue(S); + } + + static bool isEqual(const AA::ValueScope &LHS, const AA::ValueScope &RHS) { + return Base::isEqual(LHS, RHS); + } +}; + /// The value passed to the line option that defines the maximal initialization /// chain length. extern unsigned MaxInitializationChainLength; @@ -1643,8 +1663,6 @@ struct Attributor { /// Record that \p F is deleted after information was manifested. void deleteAfterManifest(Function &F) { - errs() << "Delete " << F.getName() << " : " << (Configuration.DeleteFns) - << "\n"; if (Configuration.DeleteFns) ToBeDeletedFunctions.insert(&F); } @@ -1664,14 +1682,16 @@ struct Attributor { /// return None, otherwise return `nullptr`. Optional<Value *> getAssumedSimplified(const IRPosition &IRP, const AbstractAttribute &AA, - bool &UsedAssumedInformation) { - return getAssumedSimplified(IRP, &AA, UsedAssumedInformation); + bool &UsedAssumedInformation, + AA::ValueScope S) { + return getAssumedSimplified(IRP, &AA, UsedAssumedInformation, S); } Optional<Value *> getAssumedSimplified(const Value &V, const AbstractAttribute &AA, - bool &UsedAssumedInformation) { + bool &UsedAssumedInformation, + AA::ValueScope S) { return getAssumedSimplified(IRPosition::value(V), AA, - UsedAssumedInformation); + UsedAssumedInformation, S); } /// If \p V is assumed simplified, return it, if it is unclear yet, @@ -1679,7 +1699,17 @@ struct Attributor { /// except that it can be used without recording dependences on any \p AA. Optional<Value *> getAssumedSimplified(const IRPosition &V, const AbstractAttribute *AA, - bool &UsedAssumedInformation); + bool &UsedAssumedInformation, + AA::ValueScope S); + + /// Try to simplify \p IRP and in the scope \p S. If successful, true is + /// returned and all potential values \p IRP can take are put into \p Values. + /// If false is returned no other information is valid. + bool getAssumedSimplifiedValues(const IRPosition &IRP, + const AbstractAttribute *AA, + SmallVectorImpl<AA::ValueAndContext> &Values, + AA::ValueScope S, + bool &UsedAssumedInformation); /// Register \p CB as a simplification callback. /// `Attributor::getAssumedSimplified` will use these callbacks before @@ -4409,6 +4439,10 @@ template <typename MemberTy> struct PotentialValuesState : AbstractState { return *this; } + bool contains(const MemberTy &V) const { + return !isValidState() ? true : Set.contains(V); + } + protected: SetTy &getAssumedSet() { assert(isValidState() && "This set shoud not be used when it is invalid!"); @@ -4490,9 +4524,12 @@ private: }; using PotentialConstantIntValuesState = PotentialValuesState<APInt>; +using PotentialLLVMValuesState = + PotentialValuesState<std::pair<AA::ValueAndContext, AA::ValueScope>>; raw_ostream &operator<<(raw_ostream &OS, const PotentialConstantIntValuesState &R); +raw_ostream &operator<<(raw_ostream &OS, const PotentialLLVMValuesState &R); /// An abstract interface for potential values analysis. /// @@ -4508,7 +4545,7 @@ raw_ostream &operator<<(raw_ostream &OS, /// 2. We tried to initialize on a Value that we cannot handle (e.g. an /// operator we do not currently handle). /// -/// TODO: Support values other than constant integers. +/// For non constant integers see AAPotentialValues. struct AAPotentialConstantValues : public StateWrapper<PotentialConstantIntValuesState, AbstractAttribute> { using Base = StateWrapper<PotentialConstantIntValuesState, AbstractAttribute>; @@ -4562,6 +4599,48 @@ struct AAPotentialConstantValues static const char ID; }; +struct AAPotentialValues + : public StateWrapper<PotentialLLVMValuesState, AbstractAttribute> { + using Base = StateWrapper<PotentialLLVMValuesState, AbstractAttribute>; + AAPotentialValues(const IRPosition &IRP, Attributor &A) : Base(IRP) {} + + /// See AbstractAttribute::getState(...). + PotentialLLVMValuesState &getState() override { return *this; } + const PotentialLLVMValuesState &getState() const override { return *this; } + + /// Create an abstract attribute view for the position \p IRP. + static AAPotentialValues &createForPosition(const IRPosition &IRP, + Attributor &A); + + /// Extract the single value in \p Values if any. + static Value *getSingleValue(Attributor &A, const AbstractAttribute &AA, + const IRPosition &IRP, + SmallVectorImpl<AA::ValueAndContext> &Values); + + /// See AbstractAttribute::getName() + const std::string getName() const override { return "AAPotentialValues"; } + + /// See AbstractAttribute::getIdAddr() + const char *getIdAddr() const override { return &ID; } + + /// This function should return true if the type of the \p AA is + /// AAPotentialValues + static bool classof(const AbstractAttribute *AA) { + return (AA->getIdAddr() == &ID); + } + + /// Unique ID (due to the unique address) + static const char ID; + +private: + virtual bool + getAssumedSimplifiedValues(Attributor &A, + SmallVectorImpl<AA::ValueAndContext> &Values, + AA::ValueScope) const = 0; + + friend struct Attributor; +}; + /// An abstract interface for all noundef attributes. struct AANoUndef : public IRAttribute<Attribute::NoUndef, @@ -4801,8 +4880,7 @@ struct AAFunctionReachability /// Can \p Inst reach \p Fn. /// See also AA::isPotentiallyReachable. virtual bool instructionCanReach(Attributor &A, const Instruction &Inst, - const Function &Fn, - bool UseBackwards = true) const = 0; + const Function &Fn) const = 0; /// Create an abstract attribute view for the position \p IRP. static AAFunctionReachability &createForPosition(const IRPosition &IRP, @@ -4834,19 +4912,36 @@ struct AAPointerInfo : public AbstractAttribute { AAPointerInfo(const IRPosition &IRP) : AbstractAttribute(IRP) {} enum AccessKind { - AK_READ = 1 << 0, - AK_WRITE = 1 << 1, - AK_READ_WRITE = AK_READ | AK_WRITE, + // First two bits to distinguish may and must accesses + AK_MUST = 1 << 0, + AK_MAY = 1 << 1, + + // Then two bits for read and write. These are not exclusive. + AK_R = 1 << 2, + AK_W = 1 << 3, + AK_RW = AK_R | AK_W, + + // Helper for easy access. + AK_MAY_READ = AK_MAY | AK_R, + AK_MAY_WRITE = AK_MAY | AK_W, + AK_MAY_READ_WRITE = AK_MAY | AK_R | AK_W, + AK_MUST_READ = AK_MUST | AK_R, + AK_MUST_WRITE = AK_MUST | AK_W, + AK_MUST_READ_WRITE = AK_MUST | AK_R | AK_W, }; /// An access description. struct Access { Access(Instruction *I, Optional<Value *> Content, AccessKind Kind, Type *Ty) - : LocalI(I), RemoteI(I), Content(Content), Kind(Kind), Ty(Ty) {} + : LocalI(I), RemoteI(I), Content(Content), Kind(Kind), Ty(Ty) { + verify(); + } Access(Instruction *LocalI, Instruction *RemoteI, Optional<Value *> Content, AccessKind Kind, Type *Ty) : LocalI(LocalI), RemoteI(RemoteI), Content(Content), Kind(Kind), - Ty(Ty) {} + Ty(Ty) { + verify(); + } Access(const Access &Other) = default; Access(const Access &&Other) : LocalI(Other.LocalI), RemoteI(Other.RemoteI), Content(Other.Content), @@ -4867,14 +4962,22 @@ struct AAPointerInfo : public AbstractAttribute { return *this; } + void verify() { + assert(isMustAccess() + isMayAccess() == 1 && + "Expect must or may access, not both."); + } + /// Return the access kind. AccessKind getKind() const { return Kind; } /// Return true if this is a read access. - bool isRead() const { return Kind & AK_READ; } + bool isRead() const { return Kind & AK_R; } /// Return true if this is a write access. - bool isWrite() const { return Kind & AK_WRITE; } + bool isWrite() const { return Kind & AK_W; } + + bool isMustAccess() const { return Kind & AK_MUST; } + bool isMayAccess() const { return Kind & AK_MAY; } /// Return the instruction that causes the access with respect to the local /// scope of the associated attribute. @@ -4887,7 +4990,9 @@ struct AAPointerInfo : public AbstractAttribute { bool isWrittenValueYetUndetermined() const { return !Content; } /// Return true if the value written cannot be determined at all. - bool isWrittenValueUnknown() const { return Content && !*Content; } + bool isWrittenValueUnknown() const { + return Content.has_value() && !*Content; + } /// Return the type associated with the access, if known. Type *getType() const { return Ty; } @@ -4976,10 +5081,14 @@ struct AAPointerInfo : public AbstractAttribute { /// return true if all such accesses were known and the callback returned true /// for all of them, false otherwise. In contrast to forallInterferingAccesses /// this function will perform reasoning to exclude write accesses that cannot - /// affect the load even if they on the surface look as if they would. - virtual bool forallInterferingAccesses( - Attributor &A, const AbstractAttribute &QueryingAA, Instruction &I, - function_ref<bool(const Access &, bool)> CB) const = 0; + /// affect the load even if they on the surface look as if they would. The + /// flag \p HasBeenWrittenTo will be set to true if we know that \p I does not + /// read the intial value of the underlying memory. + virtual bool + forallInterferingAccesses(Attributor &A, const AbstractAttribute &QueryingAA, + Instruction &I, + function_ref<bool(const Access &, bool)> CB, + bool &HasBeenWrittenTo) const = 0; /// This function should return true if the type of the \p AA is AAPointerInfo static bool classof(const AbstractAttribute *AA) { diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h b/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h index c8ea1f5b6624..2e882eeb8bac 100644 --- a/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h +++ b/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h @@ -164,26 +164,10 @@ public: bool VerifyInput; bool VerifyOutput; bool MergeFunctions; - bool PrepareForLTO; - bool PrepareForThinLTO; - bool PerformThinLTO; bool DivergentTarget; unsigned LicmMssaOptCap; unsigned LicmMssaNoAccForPromotionCap; - /// Enable profile instrumentation pass. - bool EnablePGOInstrGen; - /// Enable profile context sensitive instrumentation pass. - bool EnablePGOCSInstrGen; - /// Enable profile context sensitive profile use pass. - bool EnablePGOCSInstrUse; - /// Profile data file name that the instrumentation will be written to. - std::string PGOInstrGen; - /// Path of the profile data file. - std::string PGOInstrUse; - /// Path of the sample Profile data file. - std::string PGOSampleUse; - private: /// ExtensionList - This is list of all of the extensions that are registered. std::vector<std::pair<ExtensionPointTy, ExtensionFn>> Extensions; diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h b/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h index fff06da22cf3..c41871e33eaf 100644 --- a/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h +++ b/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h @@ -105,7 +105,7 @@ public: if (!CalleeSamples || !CallerSamples) { Weight = 0; } else { - uint64_t CalleeEntryCount = CalleeSamples->getEntrySamples(); + uint64_t CalleeEntryCount = CalleeSamples->getHeadSamplesEstimate(); uint64_t CallsiteCount = 0; LineLocation Callsite = Callee->getCallSiteLoc(); if (auto CallTargets = CallerSamples->findCallTargetMapAt(Callsite)) { @@ -169,7 +169,7 @@ private: for (const auto &InlinedSamples : CallsiteSamples.second) { addProfiledFunction(InlinedSamples.first); addProfiledCall(Samples.getFuncName(), InlinedSamples.first, - InlinedSamples.second.getEntrySamples()); + InlinedSamples.second.getHeadSamplesEstimate()); addProfiledCalls(InlinedSamples.second); } } diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h b/contrib/llvm-project/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h index ae19fbfb49a7..87eeb8e020a6 100644 --- a/contrib/llvm-project/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h +++ b/contrib/llvm-project/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h @@ -425,7 +425,7 @@ public: // If we are replacing the instruction with itself, this must be in a // segment of unreachable code, so just clobber the instruction. if (&I == V) - V = UndefValue::get(I.getType()); + V = PoisonValue::get(I.getType()); LLVM_DEBUG(dbgs() << "IC: Replacing " << I << "\n" << " with " << *V << '\n'); diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation.h index 9ff45fc29b06..0c688e3bdaf6 100644 --- a/contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation.h +++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation.h @@ -28,7 +28,6 @@ namespace llvm { class Triple; -class FunctionPass; class ModulePass; class OptimizationRemarkEmitter; class Comdat; @@ -79,8 +78,6 @@ struct GCOVOptions { std::string Exclude; }; -ModulePass *createCGProfileLegacyPass(); - // The pgo-specific indirect call promotion function declared below is used by // the pgo-driven indirect call promotion and sample profile passes. It's a // wrapper around llvm::promoteCall, et al. that additionally computes !prof @@ -126,13 +123,6 @@ struct InstrProfOptions { InstrProfOptions() = default; }; -/// Insert frontend instrumentation based profiling. Parameter IsCS indicates if -// this is the context sensitive instrumentation. -ModulePass *createInstrProfilingLegacyPass( - const InstrProfOptions &Options = InstrProfOptions(), bool IsCS = false); - -ModulePass *createInstrOrderFilePass(); - // Insert DataFlowSanitizer (dynamic data flow analysis) instrumentation ModulePass *createDataFlowSanitizerLegacyPassPass( const std::vector<std::string> &ABIListFiles = std::vector<std::string>()); diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h index d12b2cf45825..f56ec6ff682f 100644 --- a/contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h +++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h @@ -17,12 +17,7 @@ #include "llvm/Transforms/Instrumentation/AddressSanitizerOptions.h" namespace llvm { -class Function; -class FunctionPass; -class GlobalVariable; -class MDNode; class Module; -class ModulePass; class raw_ostream; struct AddressSanitizerOptions { diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation/HWAddressSanitizer.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation/HWAddressSanitizer.h index d3b5b5ca5c25..11ea66780d8c 100644 --- a/contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation/HWAddressSanitizer.h +++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation/HWAddressSanitizer.h @@ -17,7 +17,6 @@ #include "llvm/IR/PassManager.h" namespace llvm { -class FunctionPass; class Module; class StringRef; class raw_ostream; diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation/MemorySanitizer.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation/MemorySanitizer.h index e4654a0fc7ef..ad92ed1c6330 100644 --- a/contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation/MemorySanitizer.h +++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation/MemorySanitizer.h @@ -18,7 +18,6 @@ namespace llvm { class Function; -class FunctionPass; class Module; class StringRef; class raw_ostream; diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation/SanitizerCoverage.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation/SanitizerCoverage.h index 9bacb7eb38a5..dfa85fde28e6 100644 --- a/contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation/SanitizerCoverage.h +++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation/SanitizerCoverage.h @@ -8,8 +8,7 @@ // //===----------------------------------------------------------------------===// // -// This file declares the SanitizerCoverage class which is a port of the legacy -// SanitizerCoverage pass to use the new PassManager infrastructure. +// SanitizerCoverage is a simple code coverage implementation. // //===----------------------------------------------------------------------===// @@ -23,7 +22,6 @@ namespace llvm { class Module; -class ModulePass; /// This is the ModuleSanitizerCoverage pass used in the new pass manager. The /// pass instruments functions for coverage, adds initialization calls to the @@ -56,13 +54,6 @@ private: std::unique_ptr<SpecialCaseList> Blocklist; }; -// Insert SanitizerCoverage instrumentation. -ModulePass *createModuleSanitizerCoverageLegacyPassPass( - const SanitizerCoverageOptions &Options = SanitizerCoverageOptions(), - const std::vector<std::string> &AllowlistFiles = std::vector<std::string>(), - const std::vector<std::string> &BlocklistFiles = - std::vector<std::string>()); - } // namespace llvm #endif diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h index b3a067ba59c2..fd37130d5459 100644 --- a/contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h +++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h @@ -17,7 +17,6 @@ namespace llvm { class Function; -class FunctionPass; class Module; /// A function pass for tsan instrumentation. diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar.h index edd492b0343d..5f852963c687 100644 --- a/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar.h +++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar.h @@ -521,10 +521,6 @@ FunctionPass *createLoopVersioningPass(); // FunctionPass *createLoopDataPrefetchPass(); -///===---------------------------------------------------------------------===// -ModulePass *createNameAnonGlobalPass(); -ModulePass *createCanonicalizeAliasesPass(); - //===----------------------------------------------------------------------===// // // LibCallsShrinkWrap - Shrink-wraps a call to function if the result is not diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils.h index ebd4bd318573..5abfb2cceb58 100644 --- a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils.h +++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils.h @@ -50,15 +50,6 @@ extern char &LowerSwitchID; //===----------------------------------------------------------------------===// // -// EntryExitInstrumenter pass - Instrument function entry/exit with calls to -// mcount(), @__cyg_profile_func_{enter,exit} and the like. There are two -// variants, intended to run pre- and post-inlining, respectively. -// -FunctionPass *createEntryExitInstrumenterPass(); -FunctionPass *createPostInlineEntryExitInstrumenterPass(); - -//===----------------------------------------------------------------------===// -// // BreakCriticalEdges - Break all of the critical edges in the CFG by inserting // a dummy basic block. This pass may be "required" by passes that cannot deal // with critical edges. For this usage, a pass must call: @@ -115,13 +106,6 @@ ModulePass *createStripNonLineTableDebugLegacyPass(); //===----------------------------------------------------------------------===// // -// ControlHeightReudction - Merges conditional blocks of code and reduces the -// number of conditional branches in the hot paths based on profiles. -// -FunctionPass *createControlHeightReductionLegacyPass(); - -//===----------------------------------------------------------------------===// -// // InjectTLIMappingsLegacy - populates the VFABI attribute with the // scalar-to-vector mappings from the TargetLibraryInfo. // diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/LowerAtomic.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/LowerAtomic.h index c85f8e3a5646..43a407d35909 100644 --- a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/LowerAtomic.h +++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/LowerAtomic.h @@ -11,8 +11,8 @@ /// //===----------------------------------------------------------------------===// -#ifndef LLVM_TRANSFORMS_SCALAR_LOWERATOMIC_H -#define LLVM_TRANSFORMS_SCALAR_LOWERATOMIC_H +#ifndef LLVM_TRANSFORMS_UTILS_LOWERATOMIC_H +#define LLVM_TRANSFORMS_UTILS_LOWERATOMIC_H #include "llvm/IR/Instructions.h" @@ -34,4 +34,4 @@ Value *buildAtomicRMWValue(AtomicRMWInst::BinOp Op, IRBuilderBase &Builder, Value *Loaded, Value *Inc); } -#endif // LLVM_TRANSFORMS_SCALAR_LOWERATOMIC_H +#endif // LLVM_TRANSFORMS_UTILS_LOWERATOMIC_H diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/MisExpect.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/MisExpect.h index 064eeac4c669..75eb97646770 100644 --- a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/MisExpect.h +++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/MisExpect.h @@ -14,6 +14,9 @@ // //===----------------------------------------------------------------------===// +#ifndef LLVM_TRANSFORMS_UTILS_MISEXPECT_H +#define LLVM_TRANSFORMS_UTILS_MISEXPECT_H + #include "llvm/ADT/SmallVector.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" @@ -75,3 +78,5 @@ void checkExpectAnnotations(Instruction &I, } // namespace misexpect } // namespace llvm + +#endif diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h index 260ed1a97831..4f878928a7bf 100644 --- a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h +++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h @@ -28,19 +28,6 @@ namespace llvm { extern cl::opt<unsigned> SCEVCheapExpansionBudget; -/// Return true if the given expression is safe to expand in the sense that -/// all materialized values are safe to speculate anywhere their operands are -/// defined, and the expander is capable of expanding the expression. -/// CanonicalMode indicates whether the expander will be used in canonical mode. -bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE, - bool CanonicalMode = true); - -/// Return true if the given expression is safe to expand in the sense that -/// all materialized values are defined and safe to speculate at the specified -/// location and their operands are defined at this location. -bool isSafeToExpandAt(const SCEV *S, const Instruction *InsertionPoint, - ScalarEvolution &SE); - /// struct for holding enough information to help calculate the cost of the /// given SCEV when expanded into IR. struct SCEVOperand { @@ -270,6 +257,16 @@ public: SmallVectorImpl<WeakTrackingVH> &DeadInsts, const TargetTransformInfo *TTI = nullptr); + /// Return true if the given expression is safe to expand in the sense that + /// all materialized values are safe to speculate anywhere their operands are + /// defined, and the expander is capable of expanding the expression. + bool isSafeToExpand(const SCEV *S) const; + + /// Return true if the given expression is safe to expand in the sense that + /// all materialized values are defined and safe to speculate at the specified + /// location and their operands are defined at this location. + bool isSafeToExpandAt(const SCEV *S, const Instruction *InsertionPoint) const; + /// Insert code to directly compute the specified SCEV expression into the /// program. The code is inserted into the specified block. Value *expandCodeFor(const SCEV *SH, Type *Ty, Instruction *I) { diff --git a/contrib/llvm-project/llvm/include/llvm/WindowsDriver/MSVCPaths.h b/contrib/llvm-project/llvm/include/llvm/WindowsDriver/MSVCPaths.h index 7256a4f66eaa..66c93cd55e20 100644 --- a/contrib/llvm-project/llvm/include/llvm/WindowsDriver/MSVCPaths.h +++ b/contrib/llvm-project/llvm/include/llvm/WindowsDriver/MSVCPaths.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_SUPPORT_MSVCPATHS_H -#define LLVM_SUPPORT_MSVCPATHS_H +#ifndef LLVM_WINDOWSDRIVER_MSVCPATHS_H +#define LLVM_WINDOWSDRIVER_MSVCPATHS_H #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallString.h" diff --git a/contrib/llvm-project/llvm/lib/Analysis/AliasSetTracker.cpp b/contrib/llvm-project/llvm/lib/Analysis/AliasSetTracker.cpp index 234a73bff6a8..bb25244a88dd 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/AliasSetTracker.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/AliasSetTracker.cpp @@ -579,7 +579,7 @@ AliasSet &AliasSetTracker::mergeAllAliasSets() { AliasAnyAS->Access = AliasSet::ModRefAccess; AliasAnyAS->AliasAny = true; - for (auto Cur : ASVector) { + for (auto *Cur : ASVector) { // If Cur was already forwarding, just forward to the new AS instead. AliasSet *FwdTo = Cur->Forward; if (FwdTo) { diff --git a/contrib/llvm-project/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/BasicAliasAnalysis.cpp index c78f822b8bcf..c3b032abcba2 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/BasicAliasAnalysis.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/BasicAliasAnalysis.cpp @@ -1764,7 +1764,7 @@ bool BasicAAResult::isValueEqualInPotentialCycles(const Value *V, // Make sure that the visited phis cannot reach the Value. This ensures that // the Values cannot come from different iterations of a potential cycle the // phi nodes could be involved in. - for (auto *P : VisitedPhiBBs) + for (const auto *P : VisitedPhiBBs) if (isPotentiallyReachable(&P->front(), Inst, nullptr, DT)) return false; diff --git a/contrib/llvm-project/llvm/lib/Analysis/BranchProbabilityInfo.cpp b/contrib/llvm-project/llvm/lib/Analysis/BranchProbabilityInfo.cpp index 428ae8975c30..f45728768fcd 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/BranchProbabilityInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/BranchProbabilityInfo.cpp @@ -1250,7 +1250,7 @@ void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LoopI, // Walk the basic blocks in post-order so that we can build up state about // the successors of a block iteratively. - for (auto BB : post_order(&F.getEntryBlock())) { + for (const auto *BB : post_order(&F.getEntryBlock())) { LLVM_DEBUG(dbgs() << "Computing probabilities for " << BB->getName() << "\n"); // If there is no at least two successors, no sense to set probability. diff --git a/contrib/llvm-project/llvm/lib/Analysis/CFG.cpp b/contrib/llvm-project/llvm/lib/Analysis/CFG.cpp index 1902d72f2f89..e5dd45842d6a 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/CFG.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/CFG.cpp @@ -149,7 +149,7 @@ bool llvm::isPotentiallyReachableFromMany( // untrue. SmallPtrSet<const Loop *, 8> LoopsWithHoles; if (LI && ExclusionSet) { - for (auto BB : *ExclusionSet) { + for (auto *BB : *ExclusionSet) { if (const Loop *L = getOutermostLoop(LI, BB)) LoopsWithHoles.insert(L); } diff --git a/contrib/llvm-project/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp index 602a01867f3b..d70e1b21d768 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp @@ -615,7 +615,7 @@ static void initializeWorkList(std::vector<WorkListItem> &WorkList, auto Src = InstantiatedValue{Val, I}; // If there's an assignment edge from X to Y, it means Y is reachable from // X at S3 and X is reachable from Y at S1 - for (auto &Edge : ValueInfo.getNodeInfoAtLevel(I).Edges) { + for (const auto &Edge : ValueInfo.getNodeInfoAtLevel(I).Edges) { propagate(Edge.Other, Src, MatchState::FlowFromReadOnly, ReachSet, WorkList); propagate(Src, Edge.Other, MatchState::FlowToWriteOnly, ReachSet, diff --git a/contrib/llvm-project/llvm/lib/Analysis/CFLGraph.h b/contrib/llvm-project/llvm/lib/Analysis/CFLGraph.h index 60fc8d18678c..47bb02ac8e8b 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/CFLGraph.h +++ b/contrib/llvm-project/llvm/lib/Analysis/CFLGraph.h @@ -434,7 +434,8 @@ template <typename CFLAA> class CFLGraphBuilder { // introduce any aliases. // TODO: address other common library functions such as realloc(), // strdup(), etc. - if (isMallocOrCallocLikeFn(&Call, &TLI) || isFreeCall(&Call, &TLI)) + if (isMallocOrCallocLikeFn(&Call, &TLI) || + getFreedOperand(&Call, &TLI) != nullptr) return; // TODO: Add support for noalias args/all the other fun function diff --git a/contrib/llvm-project/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp index f92869c2ec63..33ed6f88f82b 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp @@ -216,7 +216,7 @@ CFLSteensAAResult::FunctionInfo CFLSteensAAResult::buildSetsFrom(Function *Fn) { for (unsigned I = 0, E = ValueInfo.getNumLevels(); I < E; ++I) { auto Src = InstantiatedValue{Val, I}; - for (auto &Edge : ValueInfo.getNodeInfoAtLevel(I).Edges) + for (const auto &Edge : ValueInfo.getNodeInfoAtLevel(I).Edges) SetBuilder.addWith(Src, Edge.Other); } } diff --git a/contrib/llvm-project/llvm/lib/Analysis/ConstraintSystem.cpp b/contrib/llvm-project/llvm/lib/Analysis/ConstraintSystem.cpp index dc774728ab3d..2efa474f3552 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/ConstraintSystem.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/ConstraintSystem.cpp @@ -110,7 +110,7 @@ void ConstraintSystem::dump(ArrayRef<std::string> Names) const { if (Constraints.empty()) return; - for (auto &Row : Constraints) { + for (const auto &Row : Constraints) { SmallVector<std::string, 16> Parts; for (unsigned I = 1, S = Row.size(); I < S; ++I) { if (Row[I] == 0) diff --git a/contrib/llvm-project/llvm/lib/Analysis/CostModel.cpp b/contrib/llvm-project/llvm/lib/Analysis/CostModel.cpp index 52e424ae324b..3c162f604cd5 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/CostModel.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/CostModel.cpp @@ -25,6 +25,7 @@ #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/IR/IntrinsicInst.h" using namespace llvm; static cl::opt<TargetTransformInfo::TargetCostKind> CostKind( @@ -39,6 +40,9 @@ static cl::opt<TargetTransformInfo::TargetCostKind> CostKind( clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency"))); +static cl::opt<bool> TypeBasedIntrinsicCost("type-based-intrinsic-cost", + cl::desc("Calculate intrinsics cost based only on argument types"), + cl::init(false)); #define CM_NAME "cost-model" #define DEBUG_TYPE CM_NAME @@ -103,7 +107,16 @@ void CostModelAnalysis::print(raw_ostream &OS, const Module*) const { for (BasicBlock &B : *F) { for (Instruction &Inst : B) { - InstructionCost Cost = TTI->getInstructionCost(&Inst, CostKind); + InstructionCost Cost; + if (TypeBasedIntrinsicCost && isa<IntrinsicInst>(&Inst)) { + auto *II = dyn_cast<IntrinsicInst>(&Inst); + IntrinsicCostAttributes ICA(II->getIntrinsicID(), *II, + InstructionCost::getInvalid(), true); + Cost = TTI->getIntrinsicInstrCost(ICA, CostKind); + } + else { + Cost = TTI->getInstructionCost(&Inst, CostKind); + } if (auto CostVal = Cost.getValue()) OS << "Cost Model: Found an estimated cost of " << *CostVal; else @@ -122,7 +135,16 @@ PreservedAnalyses CostModelPrinterPass::run(Function &F, for (Instruction &Inst : B) { // TODO: Use a pass parameter instead of cl::opt CostKind to determine // which cost kind to print. - InstructionCost Cost = TTI.getInstructionCost(&Inst, CostKind); + InstructionCost Cost; + if (TypeBasedIntrinsicCost && isa<IntrinsicInst>(&Inst)) { + auto *II = dyn_cast<IntrinsicInst>(&Inst); + IntrinsicCostAttributes ICA(II->getIntrinsicID(), *II, + InstructionCost::getInvalid(), true); + Cost = TTI.getIntrinsicInstrCost(ICA, CostKind); + } + else { + Cost = TTI.getInstructionCost(&Inst, CostKind); + } if (auto CostVal = Cost.getValue()) OS << "Cost Model: Found an estimated cost of " << *CostVal; else diff --git a/contrib/llvm-project/llvm/lib/Analysis/DDG.cpp b/contrib/llvm-project/llvm/lib/Analysis/DDG.cpp index 998c888dd2d9..da64ef153960 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/DDG.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/DDG.cpp @@ -95,7 +95,7 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const DDGNode &N) { llvm_unreachable("unimplemented type of node"); OS << (N.getEdges().empty() ? " Edges:none!\n" : " Edges:\n"); - for (auto &E : N.getEdges()) + for (const auto &E : N.getEdges()) OS.indent(2) << *E; return OS; } @@ -188,7 +188,7 @@ DataDependenceGraph::DataDependenceGraph(Function &F, DependenceInfo &D) // Put the basic blocks in program order for correct dependence // directions. BasicBlockListType BBList; - for (auto &SCC : make_range(scc_begin(&F), scc_end(&F))) + for (const auto &SCC : make_range(scc_begin(&F), scc_end(&F))) append_range(BBList, SCC); std::reverse(BBList.begin(), BBList.end()); DDGBuilder(*this, D, BBList).populate(); diff --git a/contrib/llvm-project/llvm/lib/Analysis/Delinearization.cpp b/contrib/llvm-project/llvm/lib/Analysis/Delinearization.cpp index c36e1d922915..7ab91b9eaea4 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/Delinearization.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/Delinearization.cpp @@ -133,7 +133,7 @@ struct SCEVCollectAddRecMultiplies { if (auto *Mul = dyn_cast<SCEVMulExpr>(S)) { bool HasAddRec = false; SmallVector<const SCEV *, 0> Operands; - for (auto Op : Mul->operands()) { + for (const auto *Op : Mul->operands()) { const SCEVUnknown *Unknown = dyn_cast<SCEVUnknown>(Op); if (Unknown && !isa<CallInst>(Unknown->getValue())) { Operands.push_back(Op); diff --git a/contrib/llvm-project/llvm/lib/Analysis/DivergenceAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/DivergenceAnalysis.cpp index 1a4b09e0cac2..02c40d2640c1 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/DivergenceAnalysis.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/DivergenceAnalysis.cpp @@ -220,19 +220,19 @@ void DivergenceAnalysisImpl::analyzeLoopExitDivergence( // phi nodes at the fringes of the dominance region if (!DT.dominates(&LoopHeader, UserBlock)) { // all PHI nodes of UserBlock become divergent - for (auto &Phi : UserBlock->phis()) { + for (const auto &Phi : UserBlock->phis()) { analyzeTemporalDivergence(Phi, OuterDivLoop); } continue; } // Taint outside users of values carried by OuterDivLoop. - for (auto &I : *UserBlock) { + for (const auto &I : *UserBlock) { analyzeTemporalDivergence(I, OuterDivLoop); } // visit all blocks in the dominance region - for (auto *SuccBlock : successors(UserBlock)) { + for (const auto *SuccBlock : successors(UserBlock)) { if (!Visited.insert(SuccBlock).second) { continue; } @@ -399,7 +399,7 @@ DivergenceAnalysisPrinterPass::run(Function &F, FunctionAnalysisManager &FAM) { } for (const BasicBlock &BB : F) { OS << "\n " << BB.getName() << ":\n"; - for (auto &I : BB.instructionsWithoutDebug()) { + for (const auto &I : BB.instructionsWithoutDebug()) { OS << (DI.isDivergent(I) ? "DIVERGENT: " : " "); OS << I << "\n"; } diff --git a/contrib/llvm-project/llvm/lib/Analysis/GlobalsModRef.cpp b/contrib/llvm-project/llvm/lib/Analysis/GlobalsModRef.cpp index db6eae0d962a..e6ef1c78a628 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/GlobalsModRef.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/GlobalsModRef.cpp @@ -361,7 +361,7 @@ bool GlobalsAAResult::AnalyzeUsesOfPointer(Value *V, if (Call->isDataOperand(&U)) { // Detect calls to free. if (Call->isArgOperand(&U) && - isFreeCall(I, &GetTLI(*Call->getFunction()))) { + getFreedOperand(Call, &GetTLI(*Call->getFunction())) == U) { if (Writers) Writers->insert(Call->getParent()->getParent()); } else { @@ -906,7 +906,7 @@ ModRefInfo GlobalsAAResult::getModRefInfoForArgument(const CallBase *Call, // Iterate through all the arguments to the called function. If any argument // is based on GV, return the conservative result. - for (auto &A : Call->args()) { + for (const auto &A : Call->args()) { SmallVector<const Value*, 4> Objects; getUnderlyingObjects(A, Objects); diff --git a/contrib/llvm-project/llvm/lib/Analysis/IVDescriptors.cpp b/contrib/llvm-project/llvm/lib/Analysis/IVDescriptors.cpp index e4d706ab045c..a51e974003f6 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/IVDescriptors.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/IVDescriptors.cpp @@ -63,20 +63,6 @@ bool RecurrenceDescriptor::isFloatingPointRecurrenceKind(RecurKind Kind) { return (Kind != RecurKind::None) && !isIntegerRecurrenceKind(Kind); } -bool RecurrenceDescriptor::isArithmeticRecurrenceKind(RecurKind Kind) { - switch (Kind) { - default: - break; - case RecurKind::Add: - case RecurKind::Mul: - case RecurKind::FAdd: - case RecurKind::FMul: - case RecurKind::FMulAdd: - return true; - } - return false; -} - /// Determines if Phi may have been type-promoted. If Phi has a single user /// that ANDs the Phi with a type mask, return the user. RT is updated to /// account for the narrower bit width represented by the mask, and the AND @@ -1170,7 +1156,7 @@ RecurrenceDescriptor::getReductionOpChain(PHINode *Phi, Loop *L) const { ExpectedUses = 2; auto getNextInstruction = [&](Instruction *Cur) -> Instruction * { - for (auto User : Cur->users()) { + for (auto *User : Cur->users()) { Instruction *UI = cast<Instruction>(User); if (isa<PHINode>(UI)) continue; diff --git a/contrib/llvm-project/llvm/lib/Analysis/IVUsers.cpp b/contrib/llvm-project/llvm/lib/Analysis/IVUsers.cpp index 5bde947bd851..830211658353 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/IVUsers.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/IVUsers.cpp @@ -274,7 +274,7 @@ void IVUsers::print(raw_ostream &OS, const Module *M) const { OS << " "; IVUse.getOperandValToReplace()->printAsOperand(OS, false); OS << " = " << *getReplacementExpr(IVUse); - for (auto PostIncLoop : IVUse.PostIncLoops) { + for (const auto *PostIncLoop : IVUse.PostIncLoops) { OS << " (post-inc with loop "; PostIncLoop->getHeader()->printAsOperand(OS, false); OS << ")"; diff --git a/contrib/llvm-project/llvm/lib/Analysis/InstructionPrecedenceTracking.cpp b/contrib/llvm-project/llvm/lib/Analysis/InstructionPrecedenceTracking.cpp index 9fee57c54b85..78e7f456ebc6 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/InstructionPrecedenceTracking.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/InstructionPrecedenceTracking.cpp @@ -68,7 +68,7 @@ bool InstructionPrecedenceTracking::isPreceededBySpecialInstruction( void InstructionPrecedenceTracking::fill(const BasicBlock *BB) { FirstSpecialInsts.erase(BB); - for (auto &I : *BB) { + for (const auto &I : *BB) { NumInstScanned++; if (isSpecialInstruction(&I)) { FirstSpecialInsts[BB] = &I; @@ -101,7 +101,7 @@ void InstructionPrecedenceTracking::validate(const BasicBlock *BB) const { void InstructionPrecedenceTracking::validateAll() const { // Check that for every known block the cached value is correct. - for (auto &It : FirstSpecialInsts) + for (const auto &It : FirstSpecialInsts) validate(It.first); } #endif diff --git a/contrib/llvm-project/llvm/lib/Analysis/LazyValueInfo.cpp b/contrib/llvm-project/llvm/lib/Analysis/LazyValueInfo.cpp index d49b20798c82..2fae260e0d8f 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/LazyValueInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/LazyValueInfo.cpp @@ -1896,7 +1896,7 @@ void LazyValueInfoAnnotatedWriter::emitBasicBlockStartAnnot( const BasicBlock *BB, formatted_raw_ostream &OS) { // Find if there are latticevalues defined for arguments of the function. auto *F = BB->getParent(); - for (auto &Arg : F->args()) { + for (const auto &Arg : F->args()) { ValueLatticeElement Result = LVIImpl->getValueInBlock( const_cast<Argument *>(&Arg), const_cast<BasicBlock *>(BB)); if (Result.isUnknown()) @@ -1932,12 +1932,12 @@ void LazyValueInfoAnnotatedWriter::emitInstructionAnnot( printResult(ParentBB); // Print the LVI analysis results for the immediate successor blocks, that // are dominated by `ParentBB`. - for (auto *BBSucc : successors(ParentBB)) + for (const auto *BBSucc : successors(ParentBB)) if (DT.dominates(ParentBB, BBSucc)) printResult(BBSucc); // Print LVI in blocks where `I` is used. - for (auto *U : I->users()) + for (const auto *U : I->users()) if (auto *UseI = dyn_cast<Instruction>(U)) if (!isa<PHINode>(UseI) || DT.dominates(ParentBB, UseI->getParent())) printResult(UseI->getParent()); diff --git a/contrib/llvm-project/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp index 491d44335f22..381d62a37662 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp @@ -393,14 +393,14 @@ void LegacyDivergenceAnalysis::print(raw_ostream &OS, const Module *) const { return; // Dumps all divergent values in F, arguments and then instructions. - for (auto &Arg : F->args()) { + for (const auto &Arg : F->args()) { OS << (isDivergent(&Arg) ? "DIVERGENT: " : " "); OS << Arg << "\n"; } // Iterate instructions using instructions() to ensure a deterministic order. for (const BasicBlock &BB : *F) { OS << "\n " << BB.getName() << ":\n"; - for (auto &I : BB.instructionsWithoutDebug()) { + for (const auto &I : BB.instructionsWithoutDebug()) { OS << (isDivergent(&I) ? "DIVERGENT: " : " "); OS << I << "\n"; } diff --git a/contrib/llvm-project/llvm/lib/Analysis/Lint.cpp b/contrib/llvm-project/llvm/lib/Analysis/Lint.cpp index 9cfb91a22b7d..8b0f2a8ed99b 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/Lint.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/Lint.cpp @@ -229,7 +229,7 @@ void Lint::visitCallBase(CallBase &I) { if (Formal->hasNoAliasAttr() && Actual->getType()->isPointerTy()) { AttributeList PAL = I.getAttributes(); unsigned ArgNo = 0; - for (auto BI = I.arg_begin(); BI != AE; ++BI, ++ArgNo) { + for (auto *BI = I.arg_begin(); BI != AE; ++BI, ++ArgNo) { // Skip ByVal arguments since they will be memcpy'd to the callee's // stack so we're not really passing the pointer anyway. if (PAL.hasParamAttr(ArgNo, Attribute::ByVal)) diff --git a/contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp index 79161db9b5e4..bed684b7652a 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -130,6 +130,11 @@ static cl::opt<bool> EnableForwardingConflictDetection( cl::desc("Enable conflict detection in loop-access analysis"), cl::init(true)); +static cl::opt<unsigned> MaxForkedSCEVDepth( + "max-forked-scev-depth", cl::Hidden, + cl::desc("Maximum recursion depth when finding forked SCEVs (default = 5)"), + cl::init(5)); + bool VectorizerParams::isInterleaveForced() { return ::VectorizationInterleave.getNumOccurrences() > 0; } @@ -288,8 +293,10 @@ void RuntimePointerChecking::tryToCreateDiffCheck( DC.getInstructionsForAccess(Sink->PointerValue, Sink->IsWritePtr); Type *SrcTy = getLoadStoreType(SrcInsts[0]); Type *DstTy = getLoadStoreType(SinkInsts[0]); - if (isa<ScalableVectorType>(SrcTy) || isa<ScalableVectorType>(DstTy)) + if (isa<ScalableVectorType>(SrcTy) || isa<ScalableVectorType>(DstTy)) { + CanUseDiffCheck = false; return; + } unsigned AllocSize = std::max(DL.getTypeAllocSize(SrcTy), DL.getTypeAllocSize(DstTy)); IntegerType *IntTy = @@ -778,6 +785,140 @@ static void visitPointers(Value *StartPtr, const Loop &InnermostLoop, } } +// Walk back through the IR for a pointer, looking for a select like the +// following: +// +// %offset = select i1 %cmp, i64 %a, i64 %b +// %addr = getelementptr double, double* %base, i64 %offset +// %ld = load double, double* %addr, align 8 +// +// We won't be able to form a single SCEVAddRecExpr from this since the +// address for each loop iteration depends on %cmp. We could potentially +// produce multiple valid SCEVAddRecExprs, though, and check all of them for +// memory safety/aliasing if needed. +// +// If we encounter some IR we don't yet handle, or something obviously fine +// like a constant, then we just add the SCEV for that term to the list passed +// in by the caller. If we have a node that may potentially yield a valid +// SCEVAddRecExpr then we decompose it into parts and build the SCEV terms +// ourselves before adding to the list. +static void +findForkedSCEVs(ScalarEvolution *SE, const Loop *L, Value *Ptr, + SmallVectorImpl<std::pair<const SCEV *, bool>> &ScevList, + unsigned Depth) { + // If our Value is a SCEVAddRecExpr, loop invariant, not an instruction, or + // we've exceeded our limit on recursion, just return whatever we have + // regardless of whether it can be used for a forked pointer or not, along + // with an indication of whether it might be a poison or undef value. + const SCEV *Scev = SE->getSCEV(Ptr); + if (isa<SCEVAddRecExpr>(Scev) || L->isLoopInvariant(Ptr) || + !isa<Instruction>(Ptr) || Depth == 0) { + ScevList.push_back( + std::make_pair(Scev, !isGuaranteedNotToBeUndefOrPoison(Ptr))); + return; + } + + Depth--; + + auto UndefPoisonCheck = [](std::pair<const SCEV *, bool> S) -> bool { + return S.second; + }; + + Instruction *I = cast<Instruction>(Ptr); + unsigned Opcode = I->getOpcode(); + switch (Opcode) { + case Instruction::GetElementPtr: { + GetElementPtrInst *GEP = cast<GetElementPtrInst>(I); + Type *SourceTy = GEP->getSourceElementType(); + // We only handle base + single offset GEPs here for now. + // Not dealing with preexisting gathers yet, so no vectors. + if (I->getNumOperands() != 2 || SourceTy->isVectorTy()) { + ScevList.push_back( + std::make_pair(Scev, !isGuaranteedNotToBeUndefOrPoison(GEP))); + break; + } + SmallVector<std::pair<const SCEV *, bool>, 2> BaseScevs; + SmallVector<std::pair<const SCEV *, bool>, 2> OffsetScevs; + findForkedSCEVs(SE, L, I->getOperand(0), BaseScevs, Depth); + findForkedSCEVs(SE, L, I->getOperand(1), OffsetScevs, Depth); + + // See if we need to freeze our fork... + bool NeedsFreeze = any_of(BaseScevs, UndefPoisonCheck) || + any_of(OffsetScevs, UndefPoisonCheck); + + // Check that we only have a single fork, on either the base or the offset. + // Copy the SCEV across for the one without a fork in order to generate + // the full SCEV for both sides of the GEP. + if (OffsetScevs.size() == 2 && BaseScevs.size() == 1) + BaseScevs.push_back(BaseScevs[0]); + else if (BaseScevs.size() == 2 && OffsetScevs.size() == 1) + OffsetScevs.push_back(OffsetScevs[0]); + else { + ScevList.push_back(std::make_pair(Scev, NeedsFreeze)); + break; + } + + // Find the pointer type we need to extend to. + Type *IntPtrTy = SE->getEffectiveSCEVType( + SE->getSCEV(GEP->getPointerOperand())->getType()); + + // Find the size of the type being pointed to. We only have a single + // index term (guarded above) so we don't need to index into arrays or + // structures, just get the size of the scalar value. + const SCEV *Size = SE->getSizeOfExpr(IntPtrTy, SourceTy); + + // Scale up the offsets by the size of the type, then add to the bases. + const SCEV *Scaled1 = SE->getMulExpr( + Size, SE->getTruncateOrSignExtend(OffsetScevs[0].first, IntPtrTy)); + const SCEV *Scaled2 = SE->getMulExpr( + Size, SE->getTruncateOrSignExtend(OffsetScevs[1].first, IntPtrTy)); + ScevList.push_back(std::make_pair( + SE->getAddExpr(BaseScevs[0].first, Scaled1), NeedsFreeze)); + ScevList.push_back(std::make_pair( + SE->getAddExpr(BaseScevs[1].first, Scaled2), NeedsFreeze)); + break; + } + case Instruction::Select: { + SmallVector<std::pair<const SCEV *, bool>, 2> ChildScevs; + // A select means we've found a forked pointer, but we currently only + // support a single select per pointer so if there's another behind this + // then we just bail out and return the generic SCEV. + findForkedSCEVs(SE, L, I->getOperand(1), ChildScevs, Depth); + findForkedSCEVs(SE, L, I->getOperand(2), ChildScevs, Depth); + if (ChildScevs.size() == 2) { + ScevList.push_back(ChildScevs[0]); + ScevList.push_back(ChildScevs[1]); + } else + ScevList.push_back( + std::make_pair(Scev, !isGuaranteedNotToBeUndefOrPoison(Ptr))); + break; + } + default: + // Just return the current SCEV if we haven't handled the instruction yet. + LLVM_DEBUG(dbgs() << "ForkedPtr unhandled instruction: " << *I << "\n"); + ScevList.push_back( + std::make_pair(Scev, !isGuaranteedNotToBeUndefOrPoison(Ptr))); + break; + } +} + +static SmallVector<std::pair<const SCEV *, bool>> +findForkedPointer(PredicatedScalarEvolution &PSE, + const ValueToValueMap &StridesMap, Value *Ptr, + const Loop *L) { + ScalarEvolution *SE = PSE.getSE(); + assert(SE->isSCEVable(Ptr->getType()) && "Value is not SCEVable!"); + SmallVector<std::pair<const SCEV *, bool>> Scevs; + findForkedSCEVs(SE, L, Ptr, Scevs, MaxForkedSCEVDepth); + + // For now, we will only accept a forked pointer with two possible SCEVs. + if (Scevs.size() == 2) + return Scevs; + + return { + std::make_pair(replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr), false)}; +} + bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck, MemAccessInfo Access, Type *AccessTy, const ValueToValueMap &StridesMap, @@ -787,19 +928,8 @@ bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck, bool Assume) { Value *Ptr = Access.getPointer(); - ScalarEvolution &SE = *PSE.getSE(); - SmallVector<std::pair<const SCEV *, bool>> TranslatedPtrs; - auto *SI = dyn_cast<SelectInst>(Ptr); - // Look through selects in the current loop. - if (SI && !TheLoop->isLoopInvariant(SI)) { - TranslatedPtrs = { - std::make_pair(SE.getSCEV(SI->getOperand(1)), - !isGuaranteedNotToBeUndefOrPoison(SI->getOperand(1))), - std::make_pair(SE.getSCEV(SI->getOperand(2)), - !isGuaranteedNotToBeUndefOrPoison(SI->getOperand(2)))}; - } else - TranslatedPtrs = { - std::make_pair(replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr), false)}; + SmallVector<std::pair<const SCEV *, bool>> TranslatedPtrs = + findForkedPointer(PSE, StridesMap, Ptr, TheLoop); for (auto &P : TranslatedPtrs) { const SCEV *PtrExpr = P.first; @@ -879,7 +1009,7 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck, unsigned RunningDepId = 1; DenseMap<Value *, unsigned> DepSetId; - SmallVector<MemAccessInfo, 4> Retries; + SmallVector<std::pair<MemAccessInfo, Type *>, 4> Retries; // First, count how many write and read accesses are in the alias set. Also // collect MemAccessInfos for later. @@ -911,13 +1041,13 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck, } for (auto &Access : AccessInfos) { - for (auto &AccessTy : Accesses[Access]) { + for (const auto &AccessTy : Accesses[Access]) { if (!createCheckForAccess(RtCheck, Access, AccessTy, StridesMap, DepSetId, TheLoop, RunningDepId, ASId, ShouldCheckWrap, false)) { LLVM_DEBUG(dbgs() << "LAA: Can't find bounds for ptr:" << *Access.getPointer() << '\n'); - Retries.push_back(Access); + Retries.push_back({Access, AccessTy}); CanDoAliasSetRT = false; } } @@ -941,15 +1071,15 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck, // We know that we need these checks, so we can now be more aggressive // and add further checks if required (overflow checks). CanDoAliasSetRT = true; - for (auto Access : Retries) { - for (auto &AccessTy : Accesses[Access]) { - if (!createCheckForAccess(RtCheck, Access, AccessTy, StridesMap, - DepSetId, TheLoop, RunningDepId, ASId, - ShouldCheckWrap, /*Assume=*/true)) { - CanDoAliasSetRT = false; - UncomputablePtr = Access.getPointer(); - break; - } + for (auto Retry : Retries) { + MemAccessInfo Access = Retry.first; + Type *AccessTy = Retry.second; + if (!createCheckForAccess(RtCheck, Access, AccessTy, StridesMap, + DepSetId, TheLoop, RunningDepId, ASId, + ShouldCheckWrap, /*Assume=*/true)) { + CanDoAliasSetRT = false; + UncomputablePtr = Access.getPointer(); + break; } } } @@ -2461,7 +2591,7 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const { if (auto *Dependences = DepChecker->getDependences()) { OS.indent(Depth) << "Dependences:\n"; - for (auto &Dep : *Dependences) { + for (const auto &Dep : *Dependences) { Dep.print(OS, Depth + 2, DepChecker->getMemoryInstructions()); OS << "\n"; } diff --git a/contrib/llvm-project/llvm/lib/Analysis/LoopCacheAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/LoopCacheAnalysis.cpp index 2cbf1f7f2d28..85f2dad86711 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/LoopCacheAnalysis.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/LoopCacheAnalysis.cpp @@ -289,18 +289,14 @@ CacheCostTy IndexedReference::computeRefCost(const Loop &L, LLVM_DEBUG(dbgs() << "TripCount=" << *TripCount << "\n"); const SCEV *RefCost = nullptr; - if (isConsecutive(L, CLS)) { + const SCEV *Stride = nullptr; + if (isConsecutive(L, Stride, CLS)) { // If the indexed reference is 'consecutive' the cost is // (TripCount*Stride)/CLS. - const SCEV *Coeff = getLastCoefficient(); - const SCEV *ElemSize = Sizes.back(); - assert(Coeff->getType() == ElemSize->getType() && - "Expecting the same type"); - const SCEV *Stride = SE.getMulExpr(Coeff, ElemSize); + assert(Stride != nullptr && + "Stride should not be null for consecutive access!"); Type *WiderType = SE.getWiderType(Stride->getType(), TripCount->getType()); const SCEV *CacheLineSize = SE.getConstant(WiderType, CLS); - if (SE.isKnownNegative(Stride)) - Stride = SE.getNegativeSCEV(Stride); Stride = SE.getNoopOrAnyExtend(Stride, WiderType); TripCount = SE.getNoopOrAnyExtend(TripCount, WiderType); const SCEV *Numerator = SE.getMulExpr(Stride, TripCount); @@ -464,7 +460,8 @@ bool IndexedReference::isLoopInvariant(const Loop &L) const { return allCoeffForLoopAreZero; } -bool IndexedReference::isConsecutive(const Loop &L, unsigned CLS) const { +bool IndexedReference::isConsecutive(const Loop &L, const SCEV *&Stride, + unsigned CLS) const { // The indexed reference is 'consecutive' if the only coefficient that uses // the loop induction variable is the last one... const SCEV *LastSubscript = Subscripts.back(); @@ -478,7 +475,19 @@ bool IndexedReference::isConsecutive(const Loop &L, unsigned CLS) const { // ...and the access stride is less than the cache line size. const SCEV *Coeff = getLastCoefficient(); const SCEV *ElemSize = Sizes.back(); - const SCEV *Stride = SE.getMulExpr(Coeff, ElemSize); + Type *WiderType = SE.getWiderType(Coeff->getType(), ElemSize->getType()); + // FIXME: This assumes that all values are signed integers which may + // be incorrect in unusual codes and incorrectly use sext instead of zext. + // for (uint32_t i = 0; i < 512; ++i) { + // uint8_t trunc = i; + // A[trunc] = 42; + // } + // This consecutively iterates twice over A. If `trunc` is sign-extended, + // we would conclude that this may iterate backwards over the array. + // However, LoopCacheAnalysis is heuristic anyway and transformations must + // not result in wrong optimizations if the heuristic was incorrect. + Stride = SE.getMulExpr(SE.getNoopOrSignExtend(Coeff, WiderType), + SE.getNoopOrSignExtend(ElemSize, WiderType)); const SCEV *CacheLineSize = SE.getConstant(Stride->getType(), CLS); Stride = SE.isKnownNegative(Stride) ? SE.getNegativeSCEV(Stride) : Stride; diff --git a/contrib/llvm-project/llvm/lib/Analysis/LoopInfo.cpp b/contrib/llvm-project/llvm/lib/Analysis/LoopInfo.cpp index 29c2437ff5ea..693b9ebd450a 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/LoopInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/LoopInfo.cpp @@ -425,12 +425,12 @@ bool Loop::isCanonical(ScalarEvolution &SE) const { // Check that 'BB' doesn't have any uses outside of the 'L' static bool isBlockInLCSSAForm(const Loop &L, const BasicBlock &BB, - const DominatorTree &DT) { + const DominatorTree &DT, bool IgnoreTokens) { for (const Instruction &I : BB) { // Tokens can't be used in PHI nodes and live-out tokens prevent loop // optimizations, so for the purposes of considered LCSSA form, we // can ignore them. - if (I.getType()->isTokenTy()) + if (IgnoreTokens && I.getType()->isTokenTy()) continue; for (const Use &U : I.uses()) { @@ -455,20 +455,20 @@ static bool isBlockInLCSSAForm(const Loop &L, const BasicBlock &BB, return true; } -bool Loop::isLCSSAForm(const DominatorTree &DT) const { +bool Loop::isLCSSAForm(const DominatorTree &DT, bool IgnoreTokens) const { // For each block we check that it doesn't have any uses outside of this loop. return all_of(this->blocks(), [&](const BasicBlock *BB) { - return isBlockInLCSSAForm(*this, *BB, DT); + return isBlockInLCSSAForm(*this, *BB, DT, IgnoreTokens); }); } -bool Loop::isRecursivelyLCSSAForm(const DominatorTree &DT, - const LoopInfo &LI) const { +bool Loop::isRecursivelyLCSSAForm(const DominatorTree &DT, const LoopInfo &LI, + bool IgnoreTokens) const { // For each block we check that it doesn't have any uses outside of its // innermost loop. This process will transitively guarantee that the current // loop and all of the nested loops are in LCSSA form. return all_of(this->blocks(), [&](const BasicBlock *BB) { - return isBlockInLCSSAForm(*LI.getLoopFor(BB), *BB, DT); + return isBlockInLCSSAForm(*LI.getLoopFor(BB), *BB, DT, IgnoreTokens); }); } @@ -482,11 +482,8 @@ bool Loop::isLoopSimplifyForm() const { bool Loop::isSafeToClone() const { // Return false if any loop blocks contain indirectbrs, or there are any calls // to noduplicate functions. - // FIXME: it should be ok to clone CallBrInst's if we correctly update the - // operand list to reflect the newly cloned labels. for (BasicBlock *BB : this->blocks()) { - if (isa<IndirectBrInst>(BB->getTerminator()) || - isa<CallBrInst>(BB->getTerminator())) + if (isa<IndirectBrInst>(BB->getTerminator())) return false; for (Instruction &I : *BB) diff --git a/contrib/llvm-project/llvm/lib/Analysis/MemoryBuiltins.cpp b/contrib/llvm-project/llvm/lib/Analysis/MemoryBuiltins.cpp index f5b121c98ec4..31e4380e4379 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/MemoryBuiltins.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/MemoryBuiltins.cpp @@ -179,6 +179,11 @@ static const Function *getCalledFunction(const Value *V, static Optional<AllocFnsTy> getAllocationDataForFunction(const Function *Callee, AllocType AllocTy, const TargetLibraryInfo *TLI) { + // Don't perform a slow TLI lookup, if this function doesn't return a pointer + // and thus can't be an allocation function. + if (!Callee->getReturnType()->isPointerTy()) + return None; + // Make sure that the function is available. LibFunc TLIFn; if (!TLI || !TLI->getLibFunc(*Callee, TLIFn) || !TLI->has(TLIFn)) @@ -266,15 +271,42 @@ static Optional<AllocFnsTy> getAllocationSize(const Value *V, return Result; } +static AllocFnKind getAllocFnKind(const Value *V) { + if (const auto *CB = dyn_cast<CallBase>(V)) { + Attribute Attr = CB->getFnAttr(Attribute::AllocKind); + if (Attr.isValid()) + return AllocFnKind(Attr.getValueAsInt()); + } + return AllocFnKind::Unknown; +} + +static AllocFnKind getAllocFnKind(const Function *F) { + Attribute Attr = F->getFnAttribute(Attribute::AllocKind); + if (Attr.isValid()) + return AllocFnKind(Attr.getValueAsInt()); + return AllocFnKind::Unknown; +} + +static bool checkFnAllocKind(const Value *V, AllocFnKind Wanted) { + return (getAllocFnKind(V) & Wanted) != AllocFnKind::Unknown; +} + +static bool checkFnAllocKind(const Function *F, AllocFnKind Wanted) { + return (getAllocFnKind(F) & Wanted) != AllocFnKind::Unknown; +} + /// Tests if a value is a call or invoke to a library function that /// allocates or reallocates memory (either malloc, calloc, realloc, or strdup /// like). bool llvm::isAllocationFn(const Value *V, const TargetLibraryInfo *TLI) { - return getAllocationData(V, AnyAlloc, TLI).has_value(); + return getAllocationData(V, AnyAlloc, TLI).has_value() || + checkFnAllocKind(V, AllocFnKind::Alloc | AllocFnKind::Realloc); } bool llvm::isAllocationFn( - const Value *V, function_ref<const TargetLibraryInfo &(Function &)> GetTLI) { - return getAllocationData(V, AnyAlloc, GetTLI).has_value(); + const Value *V, + function_ref<const TargetLibraryInfo &(Function &)> GetTLI) { + return getAllocationData(V, AnyAlloc, GetTLI).has_value() || + checkFnAllocKind(V, AllocFnKind::Alloc | AllocFnKind::Realloc); } /// Tests if a value is a call or invoke to a library function that @@ -304,30 +336,36 @@ bool llvm::isMallocOrCallocLikeFn(const Value *V, const TargetLibraryInfo *TLI) /// Tests if a value is a call or invoke to a library function that /// allocates memory (either malloc, calloc, or strdup like). bool llvm::isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI) { - return getAllocationData(V, AllocLike, TLI).has_value(); -} - -/// Tests if a value is a call or invoke to a library function that -/// reallocates memory (e.g., realloc). -bool llvm::isReallocLikeFn(const Value *V, const TargetLibraryInfo *TLI) { - return getAllocationData(V, ReallocLike, TLI).has_value(); + return getAllocationData(V, AllocLike, TLI).has_value() || + checkFnAllocKind(V, AllocFnKind::Alloc); } /// Tests if a functions is a call or invoke to a library function that /// reallocates memory (e.g., realloc). bool llvm::isReallocLikeFn(const Function *F, const TargetLibraryInfo *TLI) { - return getAllocationDataForFunction(F, ReallocLike, TLI).has_value(); + return getAllocationDataForFunction(F, ReallocLike, TLI).has_value() || + checkFnAllocKind(F, AllocFnKind::Realloc); } -bool llvm::isAllocRemovable(const CallBase *CB, const TargetLibraryInfo *TLI) { - assert(isAllocationFn(CB, TLI)); +Value *llvm::getReallocatedOperand(const CallBase *CB, + const TargetLibraryInfo *TLI) { + if (getAllocationData(CB, ReallocLike, TLI).has_value()) { + // All currently supported realloc functions reallocate the first argument. + return CB->getArgOperand(0); + } + if (checkFnAllocKind(CB, AllocFnKind::Realloc)) + return CB->getArgOperandWithAttribute(Attribute::AllocatedPointer); + return nullptr; +} +bool llvm::isRemovableAlloc(const CallBase *CB, const TargetLibraryInfo *TLI) { // Note: Removability is highly dependent on the source language. For // example, recent C++ requires direct calls to the global allocation // [basic.stc.dynamic.allocation] to be observable unless part of a new // expression [expr.new paragraph 13]. - // Historically we've treated the C family allocation routines as removable + // Historically we've treated the C family allocation routines and operator + // new as removable return isAllocLikeFn(CB, TLI); } @@ -357,9 +395,8 @@ static bool CheckedZextOrTrunc(APInt &I, unsigned IntTyBits) { } Optional<APInt> -llvm::getAllocSize(const CallBase *CB, - const TargetLibraryInfo *TLI, - std::function<const Value*(const Value*)> Mapper) { +llvm::getAllocSize(const CallBase *CB, const TargetLibraryInfo *TLI, + function_ref<const Value *(const Value *)> Mapper) { // Note: This handles both explicitly listed allocation functions and // allocsize. The code structure could stand to be cleaned up a bit. Optional<AllocFnsTy> FnData = getAllocationSize(CB, TLI); @@ -434,6 +471,12 @@ Constant *llvm::getInitialValueOfAllocation(const Value *V, if (isCallocLikeFn(Alloc, TLI)) return Constant::getNullValue(Ty); + AllocFnKind AK = getAllocFnKind(Alloc); + if ((AK & AllocFnKind::Uninitialized) != AllocFnKind::Unknown) + return UndefValue::get(Ty); + if ((AK & AllocFnKind::Zeroed) != AllocFnKind::Unknown) + return Constant::getNullValue(Ty); + return nullptr; } @@ -497,14 +540,23 @@ Optional<StringRef> llvm::getAllocationFamily(const Value *I, if (Callee == nullptr || IsNoBuiltin) return None; LibFunc TLIFn; - if (!TLI || !TLI->getLibFunc(*Callee, TLIFn) || !TLI->has(TLIFn)) - return None; - const auto AllocData = getAllocationDataForFunction(Callee, AnyAlloc, TLI); - if (AllocData) - return mangledNameForMallocFamily(AllocData.value().Family); - const auto FreeData = getFreeFunctionDataForFunction(Callee, TLIFn); - if (FreeData) - return mangledNameForMallocFamily(FreeData.value().Family); + + if (TLI && TLI->getLibFunc(*Callee, TLIFn) && TLI->has(TLIFn)) { + // Callee is some known library function. + const auto AllocData = getAllocationDataForFunction(Callee, AnyAlloc, TLI); + if (AllocData) + return mangledNameForMallocFamily(AllocData.value().Family); + const auto FreeData = getFreeFunctionDataForFunction(Callee, TLIFn); + if (FreeData) + return mangledNameForMallocFamily(FreeData.value().Family); + } + // Callee isn't a known library function, still check attributes. + if (checkFnAllocKind(I, AllocFnKind::Free | AllocFnKind::Alloc | + AllocFnKind::Realloc)) { + Attribute Attr = cast<CallBase>(I)->getFnAttr("alloc-family"); + if (Attr.isValid()) + return Attr.getValueAsString(); + } return None; } @@ -512,7 +564,7 @@ Optional<StringRef> llvm::getAllocationFamily(const Value *I, bool llvm::isLibFreeFunction(const Function *F, const LibFunc TLIFn) { Optional<FreeFnsTy> FnData = getFreeFunctionDataForFunction(F, TLIFn); if (!FnData) - return false; + return checkFnAllocKind(F, AllocFnKind::Free); // Check free prototype. // FIXME: workaround for PR5130, this will be obsolete when a nobuiltin @@ -528,20 +580,24 @@ bool llvm::isLibFreeFunction(const Function *F, const LibFunc TLIFn) { return true; } -/// isFreeCall - Returns non-null if the value is a call to the builtin free() -const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) { +Value *llvm::getFreedOperand(const CallBase *CB, const TargetLibraryInfo *TLI) { bool IsNoBuiltinCall; - const Function *Callee = getCalledFunction(I, IsNoBuiltinCall); + const Function *Callee = getCalledFunction(CB, IsNoBuiltinCall); if (Callee == nullptr || IsNoBuiltinCall) return nullptr; LibFunc TLIFn; - if (!TLI || !TLI->getLibFunc(*Callee, TLIFn) || !TLI->has(TLIFn)) - return nullptr; + if (TLI && TLI->getLibFunc(*Callee, TLIFn) && TLI->has(TLIFn) && + isLibFreeFunction(Callee, TLIFn)) { + // All currently supported free functions free the first argument. + return CB->getArgOperand(0); + } - return isLibFreeFunction(Callee, TLIFn) ? dyn_cast<CallInst>(I) : nullptr; -} + if (checkFnAllocKind(CB, AllocFnKind::Free)) + return CB->getArgOperandWithAttribute(Attribute::AllocatedPointer); + return nullptr; +} //===----------------------------------------------------------------------===// // Utility functions to compute size of objects. @@ -765,8 +821,7 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitArgument(Argument &A) { } SizeOffsetType ObjectSizeOffsetVisitor::visitCallBase(CallBase &CB) { - auto Mapper = [](const Value *V) { return V; }; - if (Optional<APInt> Size = getAllocSize(&CB, TLI, Mapper)) + if (Optional<APInt> Size = getAllocSize(&CB, TLI)) return std::make_pair(*Size, Zero); return unknown(); } diff --git a/contrib/llvm-project/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp index 690d575ef979..fce9d5b24faf 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -139,10 +139,12 @@ static ModRefInfo GetLocation(const Instruction *Inst, MemoryLocation &Loc, return ModRefInfo::ModRef; } - if (const CallInst *CI = isFreeCall(Inst, &TLI)) { - // calls to free() deallocate the entire structure - Loc = MemoryLocation::getAfter(CI->getArgOperand(0)); - return ModRefInfo::Mod; + if (const CallBase *CB = dyn_cast<CallBase>(Inst)) { + if (Value *FreedOp = getFreedOperand(CB, &TLI)) { + // calls to free() deallocate the entire structure + Loc = MemoryLocation::getAfter(FreedOp); + return ModRefInfo::Mod; + } } if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { diff --git a/contrib/llvm-project/llvm/lib/Analysis/MemoryProfileInfo.cpp b/contrib/llvm-project/llvm/lib/Analysis/MemoryProfileInfo.cpp new file mode 100644 index 000000000000..3d11cb81226e --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Analysis/MemoryProfileInfo.cpp @@ -0,0 +1,226 @@ +//===-- MemoryProfileInfo.cpp - memory profile info ------------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains utilities to analyze memory profile information. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/MemoryProfileInfo.h" +#include "llvm/Support/CommandLine.h" + +using namespace llvm; +using namespace llvm::memprof; + +#define DEBUG_TYPE "memory-profile-info" + +// Upper bound on accesses per byte for marking an allocation cold. +cl::opt<float> MemProfAccessesPerByteColdThreshold( + "memprof-accesses-per-byte-cold-threshold", cl::init(10.0), cl::Hidden, + cl::desc("The threshold the accesses per byte must be under to consider " + "an allocation cold")); + +// Lower bound on lifetime to mark an allocation cold (in addition to accesses +// per byte above). This is to avoid pessimizing short lived objects. +cl::opt<unsigned> MemProfMinLifetimeColdThreshold( + "memprof-min-lifetime-cold-threshold", cl::init(200), cl::Hidden, + cl::desc("The minimum lifetime (s) for an allocation to be considered " + "cold")); + +AllocationType llvm::memprof::getAllocType(uint64_t MaxAccessCount, + uint64_t MinSize, + uint64_t MinLifetime) { + if (((float)MaxAccessCount) / MinSize < MemProfAccessesPerByteColdThreshold && + // MinLifetime is expected to be in ms, so convert the threshold to ms. + MinLifetime >= MemProfMinLifetimeColdThreshold * 1000) + return AllocationType::Cold; + return AllocationType::NotCold; +} + +MDNode *llvm::memprof::buildCallstackMetadata(ArrayRef<uint64_t> CallStack, + LLVMContext &Ctx) { + std::vector<Metadata *> StackVals; + for (auto Id : CallStack) { + auto *StackValMD = + ValueAsMetadata::get(ConstantInt::get(Type::getInt64Ty(Ctx), Id)); + StackVals.push_back(StackValMD); + } + return MDNode::get(Ctx, StackVals); +} + +MDNode *llvm::memprof::getMIBStackNode(const MDNode *MIB) { + assert(MIB->getNumOperands() == 2); + // The stack metadata is the first operand of each memprof MIB metadata. + return cast<MDNode>(MIB->getOperand(0)); +} + +AllocationType llvm::memprof::getMIBAllocType(const MDNode *MIB) { + assert(MIB->getNumOperands() == 2); + // The allocation type is currently the second operand of each memprof + // MIB metadata. This will need to change as we add additional allocation + // types that can be applied based on the allocation profile data. + auto *MDS = dyn_cast<MDString>(MIB->getOperand(1)); + assert(MDS); + if (MDS->getString().equals("cold")) + return AllocationType::Cold; + return AllocationType::NotCold; +} + +static std::string getAllocTypeAttributeString(AllocationType Type) { + switch (Type) { + case AllocationType::NotCold: + return "notcold"; + break; + case AllocationType::Cold: + return "cold"; + break; + default: + assert(false && "Unexpected alloc type"); + } + llvm_unreachable("invalid alloc type"); +} + +static void addAllocTypeAttribute(LLVMContext &Ctx, CallBase *CI, + AllocationType AllocType) { + auto AllocTypeString = getAllocTypeAttributeString(AllocType); + auto A = llvm::Attribute::get(Ctx, "memprof", AllocTypeString); + CI->addFnAttr(A); +} + +static bool hasSingleAllocType(uint8_t AllocTypes) { + const unsigned NumAllocTypes = countPopulation(AllocTypes); + assert(NumAllocTypes != 0); + return NumAllocTypes == 1; +} + +void CallStackTrie::addCallStack(AllocationType AllocType, + ArrayRef<uint64_t> StackIds) { + bool First = true; + CallStackTrieNode *Curr = nullptr; + for (auto StackId : StackIds) { + // If this is the first stack frame, add or update alloc node. + if (First) { + First = false; + if (Alloc) { + assert(AllocStackId == StackId); + Alloc->AllocTypes |= static_cast<uint8_t>(AllocType); + } else { + AllocStackId = StackId; + Alloc = new CallStackTrieNode(AllocType); + } + Curr = Alloc; + continue; + } + // Update existing caller node if it exists. + auto Next = Curr->Callers.find(StackId); + if (Next != Curr->Callers.end()) { + Curr = Next->second; + Curr->AllocTypes |= static_cast<uint8_t>(AllocType); + continue; + } + // Otherwise add a new caller node. + auto *New = new CallStackTrieNode(AllocType); + Curr->Callers[StackId] = New; + Curr = New; + } + assert(Curr); +} + +void CallStackTrie::addCallStack(MDNode *MIB) { + MDNode *StackMD = getMIBStackNode(MIB); + assert(StackMD); + std::vector<uint64_t> CallStack; + CallStack.reserve(StackMD->getNumOperands()); + for (auto &MIBStackIter : StackMD->operands()) { + auto *StackId = mdconst::dyn_extract<ConstantInt>(MIBStackIter); + assert(StackId); + CallStack.push_back(StackId->getZExtValue()); + } + addCallStack(getMIBAllocType(MIB), CallStack); +} + +static MDNode *createMIBNode(LLVMContext &Ctx, + std::vector<uint64_t> &MIBCallStack, + AllocationType AllocType) { + std::vector<Metadata *> MIBPayload( + {buildCallstackMetadata(MIBCallStack, Ctx)}); + MIBPayload.push_back( + MDString::get(Ctx, getAllocTypeAttributeString(AllocType))); + return MDNode::get(Ctx, MIBPayload); +} + +// Recursive helper to trim contexts and create metadata nodes. +// Caller should have pushed Node's loc to MIBCallStack. Doing this in the +// caller makes it simpler to handle the many early returns in this method. +bool CallStackTrie::buildMIBNodes(CallStackTrieNode *Node, LLVMContext &Ctx, + std::vector<uint64_t> &MIBCallStack, + std::vector<Metadata *> &MIBNodes, + bool CalleeHasAmbiguousCallerContext) { + // Trim context below the first node in a prefix with a single alloc type. + // Add an MIB record for the current call stack prefix. + if (hasSingleAllocType(Node->AllocTypes)) { + MIBNodes.push_back( + createMIBNode(Ctx, MIBCallStack, (AllocationType)Node->AllocTypes)); + return true; + } + + // We don't have a single allocation for all the contexts sharing this prefix, + // so recursively descend into callers in trie. + if (!Node->Callers.empty()) { + bool NodeHasAmbiguousCallerContext = Node->Callers.size() > 1; + bool AddedMIBNodesForAllCallerContexts = true; + for (auto &Caller : Node->Callers) { + MIBCallStack.push_back(Caller.first); + AddedMIBNodesForAllCallerContexts &= + buildMIBNodes(Caller.second, Ctx, MIBCallStack, MIBNodes, + NodeHasAmbiguousCallerContext); + // Remove Caller. + MIBCallStack.pop_back(); + } + if (AddedMIBNodesForAllCallerContexts) + return true; + // We expect that the callers should be forced to add MIBs to disambiguate + // the context in this case (see below). + assert(!NodeHasAmbiguousCallerContext); + } + + // If we reached here, then this node does not have a single allocation type, + // and we didn't add metadata for a longer call stack prefix including any of + // Node's callers. That means we never hit a single allocation type along all + // call stacks with this prefix. This can happen due to recursion collapsing + // or the stack being deeper than tracked by the profiler runtime, leading to + // contexts with different allocation types being merged. In that case, we + // trim the context just below the deepest context split, which is this + // node if the callee has an ambiguous caller context (multiple callers), + // since the recursive calls above returned false. Conservatively give it + // non-cold allocation type. + if (!CalleeHasAmbiguousCallerContext) + return false; + MIBNodes.push_back(createMIBNode(Ctx, MIBCallStack, AllocationType::NotCold)); + return true; +} + +// Build and attach the minimal necessary MIB metadata. If the alloc has a +// single allocation type, add a function attribute instead. Returns true if +// memprof metadata attached, false if not (attribute added). +bool CallStackTrie::buildAndAttachMIBMetadata(CallBase *CI) { + auto &Ctx = CI->getContext(); + if (hasSingleAllocType(Alloc->AllocTypes)) { + addAllocTypeAttribute(Ctx, CI, (AllocationType)Alloc->AllocTypes); + return false; + } + std::vector<uint64_t> MIBCallStack; + MIBCallStack.push_back(AllocStackId); + std::vector<Metadata *> MIBNodes; + assert(!Alloc->Callers.empty() && "addCallStack has not been called yet"); + buildMIBNodes(Alloc, Ctx, MIBCallStack, MIBNodes, + /*CalleeHasAmbiguousCallerContext=*/true); + assert(MIBCallStack.size() == 1 && + "Should only be left with Alloc's location in stack"); + CI->setMetadata(LLVMContext::MD_memprof, MDNode::get(Ctx, MIBNodes)); + return true; +} diff --git a/contrib/llvm-project/llvm/lib/Analysis/MemorySSAUpdater.cpp b/contrib/llvm-project/llvm/lib/Analysis/MemorySSAUpdater.cpp index eb75118210b9..9ad60f774e9f 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/MemorySSAUpdater.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/MemorySSAUpdater.cpp @@ -451,7 +451,7 @@ void MemorySSAUpdater::insertDef(MemoryDef *MD, bool RenameUses) { } // Existing Phi blocks may need renaming too, if an access was previously // optimized and the inserted Defs "covers" the Optimized value. - for (auto &MP : ExistingPhis) { + for (const auto &MP : ExistingPhis) { MemoryPhi *Phi = dyn_cast_or_null<MemoryPhi>(MP); if (Phi) MSSA->renamePass(Phi->getBlock(), nullptr, Visited); @@ -462,7 +462,7 @@ void MemorySSAUpdater::insertDef(MemoryDef *MD, bool RenameUses) { void MemorySSAUpdater::fixupDefs(const SmallVectorImpl<WeakVH> &Vars) { SmallPtrSet<const BasicBlock *, 8> Seen; SmallVector<const BasicBlock *, 16> Worklist; - for (auto &Var : Vars) { + for (const auto &Var : Vars) { MemoryAccess *NewDef = dyn_cast_or_null<MemoryAccess>(Var); if (!NewDef) continue; @@ -744,10 +744,10 @@ void MemorySSAUpdater::updateForClonedLoop(const LoopBlocksRPO &LoopBlocks, cloneUsesAndDefs(BB, NewBlock, VMap, MPhiMap); }; - for (auto BB : llvm::concat<BasicBlock *const>(LoopBlocks, ExitBlocks)) + for (auto *BB : llvm::concat<BasicBlock *const>(LoopBlocks, ExitBlocks)) ProcessBlock(BB); - for (auto BB : llvm::concat<BasicBlock *const>(LoopBlocks, ExitBlocks)) + for (auto *BB : llvm::concat<BasicBlock *const>(LoopBlocks, ExitBlocks)) if (MemoryPhi *MPhi = MSSA->getMemoryAccess(BB)) if (MemoryAccess *NewPhi = MPhiMap.lookup(MPhi)) FixPhiIncomingValues(MPhi, cast<MemoryPhi>(NewPhi)); @@ -811,7 +811,7 @@ void MemorySSAUpdater::applyUpdates(ArrayRef<CFGUpdate> Updates, SmallVector<CFGUpdate, 4> DeleteUpdates; SmallVector<CFGUpdate, 4> RevDeleteUpdates; SmallVector<CFGUpdate, 4> InsertUpdates; - for (auto &Update : Updates) { + for (const auto &Update : Updates) { if (Update.getKind() == DT.Insert) InsertUpdates.push_back({DT.Insert, Update.getFrom(), Update.getTo()}); else { @@ -958,7 +958,7 @@ void MemorySSAUpdater::applyInsertUpdates(ArrayRef<CFGUpdate> Updates, }; SmallDenseMap<BasicBlock *, PredInfo> PredMap; - for (auto &Edge : Updates) { + for (const auto &Edge : Updates) { BasicBlock *BB = Edge.getTo(); auto &AddedBlockSet = PredMap[BB].Added; AddedBlockSet.insert(Edge.getFrom()); @@ -1003,7 +1003,7 @@ void MemorySSAUpdater::applyInsertUpdates(ArrayRef<CFGUpdate> Updates, // First create MemoryPhis in all blocks that don't have one. Create in the // order found in Updates, not in PredMap, to get deterministic numbering. - for (auto &Edge : Updates) { + for (const auto &Edge : Updates) { BasicBlock *BB = Edge.getTo(); if (PredMap.count(BB) && !MSSA->getMemoryAccess(BB)) InsertedPhis.push_back(MSSA->createMemoryPhi(BB)); @@ -1400,7 +1400,7 @@ void MemorySSAUpdater::removeBlocks( } void MemorySSAUpdater::tryRemoveTrivialPhis(ArrayRef<WeakVH> UpdatedPHIs) { - for (auto &VH : UpdatedPHIs) + for (const auto &VH : UpdatedPHIs) if (auto *MPhi = cast_or_null<MemoryPhi>(VH)) tryRemoveTrivialPhi(MPhi); } diff --git a/contrib/llvm-project/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp b/contrib/llvm-project/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp index 373aaa48b1d1..756f92e1aac4 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp @@ -95,7 +95,7 @@ static void printModuleDebugInfo(raw_ostream &O, const Module *M, O << '\n'; } - for (auto GVU : Finder.global_variables()) { + for (auto *GVU : Finder.global_variables()) { const auto *GV = GVU->getVariable(); O << "Global variable: " << GV->getName(); printFile(O, GV->getFilename(), GV->getDirectory(), GV->getLine()); diff --git a/contrib/llvm-project/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp index 2b98634ef7bf..c52b27a38fe9 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp @@ -400,7 +400,7 @@ static void computeFunctionSummary( // to enable importing for subsequent indirect call promotion and // inlining. if (auto *MD = I.getMetadata(LLVMContext::MD_callees)) { - for (auto &Op : MD->operands()) { + for (const auto &Op : MD->operands()) { Function *Callee = mdconst::extract_or_null<Function>(Op); if (Callee) CallGraphEdges[Index.getOrInsertValueInfo(Callee)]; @@ -412,7 +412,7 @@ static void computeFunctionSummary( auto CandidateProfileData = ICallAnalysis.getPromotionCandidatesForInstruction( &I, NumVals, TotalCount, NumCandidates); - for (auto &Candidate : CandidateProfileData) + for (const auto &Candidate : CandidateProfileData) CallGraphEdges[Index.getOrInsertValueInfo(Candidate.Value)] .updateHotness(getHotness(Candidate.Count, PSI)); } @@ -451,7 +451,7 @@ static void computeFunctionSummary( // If both load and store instruction reference the same variable // we won't be able to optimize it. Add all such reference edges // to RefEdges set. - for (auto &VI : StoreRefEdges) + for (const auto &VI : StoreRefEdges) if (LoadRefEdges.remove(VI)) RefEdges.insert(VI); @@ -459,11 +459,11 @@ static void computeFunctionSummary( // All new reference edges inserted in two loops below are either // read or write only. They will be grouped in the end of RefEdges // vector, so we can use a single integer value to identify them. - for (auto &VI : LoadRefEdges) + for (const auto &VI : LoadRefEdges) RefEdges.insert(VI); unsigned FirstWORef = RefEdges.size(); - for (auto &VI : StoreRefEdges) + for (const auto &VI : StoreRefEdges) RefEdges.insert(VI); Refs = RefEdges.takeVector(); @@ -646,15 +646,18 @@ static void computeVariableSummary(ModuleSummaryIndex &Index, Index.addGlobalValueSummary(V, std::move(GVarSummary)); } -static void -computeAliasSummary(ModuleSummaryIndex &Index, const GlobalAlias &A, - DenseSet<GlobalValue::GUID> &CantBePromoted) { +static void computeAliasSummary(ModuleSummaryIndex &Index, const GlobalAlias &A, + DenseSet<GlobalValue::GUID> &CantBePromoted) { + // Skip summary for indirect function aliases as summary for aliasee will not + // be emitted. + const GlobalObject *Aliasee = A.getAliaseeObject(); + if (isa<GlobalIFunc>(Aliasee)) + return; bool NonRenamableLocal = isNonRenamableLocal(A); GlobalValueSummary::GVFlags Flags( A.getLinkage(), A.getVisibility(), NonRenamableLocal, /* Live = */ false, A.isDSOLocal(), A.canBeOmittedFromSymbolTable()); auto AS = std::make_unique<AliasSummary>(Flags); - auto *Aliasee = A.getAliaseeObject(); auto AliaseeVI = Index.getValueInfo(Aliasee->getGUID()); assert(AliaseeVI && "Alias expects aliasee summary to be available"); assert(AliaseeVI.getSummaryList().size() == 1 && @@ -668,7 +671,7 @@ computeAliasSummary(ModuleSummaryIndex &Index, const GlobalAlias &A, // Set LiveRoot flag on entries matching the given value name. static void setLiveRoot(ModuleSummaryIndex &Index, StringRef Name) { if (ValueInfo VI = Index.getValueInfo(GlobalValue::getGUID(Name))) - for (auto &Summary : VI.getSummaryList()) + for (const auto &Summary : VI.getSummaryList()) Summary->setLive(true); } @@ -776,7 +779,7 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex( // Compute summaries for all functions defined in module, and save in the // index. - for (auto &F : M) { + for (const auto &F : M) { if (F.isDeclaration()) continue; @@ -811,6 +814,13 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex( for (const GlobalAlias &A : M.aliases()) computeAliasSummary(Index, A, CantBePromoted); + // Iterate through ifuncs, set their resolvers all alive. + for (const GlobalIFunc &I : M.ifuncs()) { + I.applyAlongResolverPath([&Index](const GlobalValue &GV) { + Index.getGlobalValueSummary(GV)->setLive(true); + }); + } + for (auto *V : LocalsUsed) { auto *Summary = Index.getGlobalValueSummary(*V); assert(Summary && "Missing summary for global value"); diff --git a/contrib/llvm-project/llvm/lib/Analysis/MustExecute.cpp b/contrib/llvm-project/llvm/lib/Analysis/MustExecute.cpp index ad8322d7bd79..ac6590c1d8a2 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/MustExecute.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/MustExecute.cpp @@ -81,7 +81,7 @@ void ICFLoopSafetyInfo::computeLoopSafetyInfo(const Loop *CurLoop) { MW.clear(); MayThrow = false; // Figure out the fact that at least one block may throw. - for (auto &BB : CurLoop->blocks()) + for (const auto &BB : CurLoop->blocks()) if (ICF.hasICF(&*BB)) { MayThrow = true; break; @@ -164,7 +164,7 @@ static void collectTransitivePredecessors( if (BB == CurLoop->getHeader()) return; SmallVector<const BasicBlock *, 4> WorkList; - for (auto *Pred : predecessors(BB)) { + for (const auto *Pred : predecessors(BB)) { Predecessors.insert(Pred); WorkList.push_back(Pred); } @@ -180,7 +180,7 @@ static void collectTransitivePredecessors( // @nested and @nested_no_throw in test/Analysis/MustExecute/loop-header.ll. // We can ignore backedge of all loops containing BB to get a sligtly more // optimistic result. - for (auto *PredPred : predecessors(Pred)) + for (const auto *PredPred : predecessors(Pred)) if (Predecessors.insert(PredPred).second) WorkList.push_back(PredPred); } @@ -207,7 +207,7 @@ bool LoopSafetyInfo::allLoopPathsLeadToBlock(const Loop *CurLoop, // 3) Exit blocks which are not taken on 1st iteration. // Memoize blocks we've already checked. SmallPtrSet<const BasicBlock *, 4> CheckedSuccessors; - for (auto *Pred : Predecessors) { + for (const auto *Pred : Predecessors) { // Predecessor block may throw, so it has a side exit. if (blockMayThrow(Pred)) return false; @@ -217,7 +217,7 @@ bool LoopSafetyInfo::allLoopPathsLeadToBlock(const Loop *CurLoop, if (DT->dominates(BB, Pred)) continue; - for (auto *Succ : successors(Pred)) + for (const auto *Succ : successors(Pred)) if (CheckedSuccessors.insert(Succ).second && Succ != BB && !Predecessors.count(Succ)) // By discharging conditions that are not executed on the 1st iteration, @@ -285,7 +285,7 @@ bool ICFLoopSafetyInfo::doesNotWriteMemoryBefore(const BasicBlock *BB, collectTransitivePredecessors(CurLoop, BB, Predecessors); // Find if there any instruction in either predecessor that could write // to memory. - for (auto *Pred : Predecessors) + for (const auto *Pred : Predecessors) if (MW.mayWriteToMemory(Pred)) return false; return true; @@ -413,7 +413,7 @@ class MustExecuteAnnotatedWriter : public AssemblyAnnotationWriter { public: MustExecuteAnnotatedWriter(const Function &F, DominatorTree &DT, LoopInfo &LI) { - for (auto &I: instructions(F)) { + for (const auto &I: instructions(F)) { Loop *L = LI.getLoopFor(I.getParent()); while (L) { if (isMustExecuteIn(I, L, &DT)) { @@ -425,8 +425,8 @@ public: } MustExecuteAnnotatedWriter(const Module &M, DominatorTree &DT, LoopInfo &LI) { - for (auto &F : M) - for (auto &I: instructions(F)) { + for (const auto &F : M) + for (const auto &I: instructions(F)) { Loop *L = LI.getLoopFor(I.getParent()); while (L) { if (isMustExecuteIn(I, L, &DT)) { diff --git a/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolution.cpp b/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolution.cpp index f61806bd1dad..d46248aa3889 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolution.cpp @@ -1158,7 +1158,7 @@ const SCEV *ScalarEvolution::getLosslessPtrToIntExpr(const SCEV *Op, const SCEV *visitAddExpr(const SCEVAddExpr *Expr) { SmallVector<const SCEV *, 2> Operands; bool Changed = false; - for (auto *Op : Expr->operands()) { + for (const auto *Op : Expr->operands()) { Operands.push_back(visit(Op)); Changed |= Op != Operands.back(); } @@ -1168,7 +1168,7 @@ const SCEV *ScalarEvolution::getLosslessPtrToIntExpr(const SCEV *Op, const SCEV *visitMulExpr(const SCEVMulExpr *Expr) { SmallVector<const SCEV *, 2> Operands; bool Changed = false; - for (auto *Op : Expr->operands()) { + for (const auto *Op : Expr->operands()) { Operands.push_back(visit(Op)); Changed |= Op != Operands.back(); } @@ -4662,7 +4662,7 @@ ScalarEvolution::getUMinFromMismatchedTypes(SmallVectorImpl<const SCEV *> &Ops, // Find the max type first. Type *MaxType = nullptr; - for (auto *S : Ops) + for (const auto *S : Ops) if (MaxType) MaxType = getWiderType(MaxType, S->getType()); else @@ -4671,7 +4671,7 @@ ScalarEvolution::getUMinFromMismatchedTypes(SmallVectorImpl<const SCEV *> &Ops, // Extend all ops to max type. SmallVector<const SCEV *, 2> PromotedOps; - for (auto *S : Ops) + for (const auto *S : Ops) PromotedOps.push_back(getNoopOrZeroExtend(S, MaxType)); // Generate umin. @@ -6636,7 +6636,7 @@ ScalarEvolution::getRangeRef(const SCEV *S, // Make sure that we do not run over cycled Phis. if (PendingPhiRanges.insert(Phi).second) { ConstantRange RangeFromOps(BitWidth, /*isFullSet=*/false); - for (auto &Op : Phi->operands()) { + for (const auto &Op : Phi->operands()) { auto OpRange = getRangeRef(getSCEV(Op), SignHint); RangeFromOps = RangeFromOps.unionWith(OpRange); // No point to continue if we already have a full set. @@ -6651,6 +6651,13 @@ ScalarEvolution::getRangeRef(const SCEV *S, } } + // vscale can't be equal to zero + if (const auto *II = dyn_cast<IntrinsicInst>(U->getValue())) + if (II->getIntrinsicID() == Intrinsic::vscale) { + ConstantRange Disallowed = APInt::getZero(BitWidth); + ConservativeResult = ConservativeResult.difference(Disallowed); + } + return setRange(U, SignHint, std::move(ConservativeResult)); } @@ -6973,13 +6980,13 @@ static void collectUniqueOps(const SCEV *S, Ops.push_back(S); }; if (auto *S2 = dyn_cast<SCEVCastExpr>(S)) - for (auto *Op : S2->operands()) + for (const auto *Op : S2->operands()) InsertUnique(Op); else if (auto *S2 = dyn_cast<SCEVNAryExpr>(S)) - for (auto *Op : S2->operands()) + for (const auto *Op : S2->operands()) InsertUnique(Op); else if (auto *S2 = dyn_cast<SCEVUDivExpr>(S)) - for (auto *Op : S2->operands()) + for (const auto *Op : S2->operands()) InsertUnique(Op); } @@ -7001,7 +7008,7 @@ ScalarEvolution::getDefiningScopeBound(ArrayRef<const SCEV *> Ops, Worklist.push_back(S); }; - for (auto *S : Ops) + for (const auto *S : Ops) pushOp(S); const Instruction *Bound = nullptr; @@ -7013,7 +7020,7 @@ ScalarEvolution::getDefiningScopeBound(ArrayRef<const SCEV *> Ops, } else { SmallVector<const SCEV *, 4> Ops; collectUniqueOps(S, Ops); - for (auto *Op : Ops) + for (const auto *Op : Ops) pushOp(Op); } } @@ -7117,7 +7124,7 @@ bool ScalarEvolution::isAddRecNeverPoison(const Instruction *I, const Loop *L) { while (!PoisonStack.empty() && !LatchControlDependentOnPoison) { const Instruction *Poison = PoisonStack.pop_back_val(); - for (auto *PoisonUser : Poison->users()) { + for (const auto *PoisonUser : Poison->users()) { if (propagatesPoison(cast<Operator>(PoisonUser))) { if (Pushed.insert(cast<Instruction>(PoisonUser)).second) PoisonStack.push_back(cast<Instruction>(PoisonUser)); @@ -7242,7 +7249,7 @@ ScalarEvolution::getOperandsToCreate(Value *V, SmallVectorImpl<Value *> &Ops) { Operator *U = cast<Operator>(V); if (auto BO = MatchBinaryOp(U, DT)) { bool IsConstArg = isa<ConstantInt>(BO->RHS); - switch (U->getOpcode()) { + switch (BO->Opcode) { case Instruction::Add: { // For additions and multiplications, traverse add/mul chains for which we // can potentially create a single SCEV, to reduce the number of @@ -7284,7 +7291,10 @@ ScalarEvolution::getOperandsToCreate(Value *V, SmallVectorImpl<Value *> &Ops) { } while (true); return nullptr; } - + case Instruction::Sub: + case Instruction::UDiv: + case Instruction::URem: + break; case Instruction::AShr: case Instruction::Shl: case Instruction::Xor: @@ -7296,7 +7306,10 @@ ScalarEvolution::getOperandsToCreate(Value *V, SmallVectorImpl<Value *> &Ops) { if (!IsConstArg && BO->LHS->getType()->isIntegerTy(1)) return nullptr; break; + case Instruction::LShr: + return getUnknown(V); default: + llvm_unreachable("Unhandled binop"); break; } @@ -7340,12 +7353,34 @@ ScalarEvolution::getOperandsToCreate(Value *V, SmallVectorImpl<Value *> &Ops) { // Keep constructing SCEVs' for phis recursively for now. return nullptr; - case Instruction::Select: + case Instruction::Select: { + // Check if U is a select that can be simplified to a SCEVUnknown. + auto CanSimplifyToUnknown = [this, U]() { + if (U->getType()->isIntegerTy(1) || isa<ConstantInt>(U->getOperand(0))) + return false; + + auto *ICI = dyn_cast<ICmpInst>(U->getOperand(0)); + if (!ICI) + return false; + Value *LHS = ICI->getOperand(0); + Value *RHS = ICI->getOperand(1); + if (ICI->getPredicate() == CmpInst::ICMP_EQ || + ICI->getPredicate() == CmpInst::ICMP_NE) { + if (!(isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isZero())) + return true; + } else if (getTypeSizeInBits(LHS->getType()) > + getTypeSizeInBits(U->getType())) + return true; + return false; + }; + if (CanSimplifyToUnknown()) + return getUnknown(U); + for (Value *Inc : U->operands()) Ops.push_back(Inc); return nullptr; break; - + } case Instruction::Call: case Instruction::Invoke: if (Value *RV = cast<CallBase>(U)->getReturnedArgOperand()) { @@ -8338,7 +8373,7 @@ ScalarEvolution::BackedgeTakenInfo::getExact(const Loop *L, ScalarEvolution *SE, // All exiting blocks we have gathered dominate loop's latch, so exact trip // count is simply a minimum out of all these calculated exit counts. SmallVector<const SCEV *, 2> Ops; - for (auto &ENT : ExitNotTaken) { + for (const auto &ENT : ExitNotTaken) { const SCEV *BECount = ENT.ExactNotTaken; assert(BECount != SE->getCouldNotCompute() && "Bad exit SCEV!"); assert(SE->DT.dominates(ENT.ExitingBlock, Latch) && @@ -8348,7 +8383,7 @@ ScalarEvolution::BackedgeTakenInfo::getExact(const Loop *L, ScalarEvolution *SE, Ops.push_back(BECount); if (Preds) - for (auto *P : ENT.Predicates) + for (const auto *P : ENT.Predicates) Preds->push_back(P); assert((Preds || ENT.hasAlwaysTruePredicate()) && @@ -8365,7 +8400,7 @@ ScalarEvolution::BackedgeTakenInfo::getExact(const Loop *L, ScalarEvolution *SE, const SCEV * ScalarEvolution::BackedgeTakenInfo::getExact(const BasicBlock *ExitingBlock, ScalarEvolution *SE) const { - for (auto &ENT : ExitNotTaken) + for (const auto &ENT : ExitNotTaken) if (ENT.ExitingBlock == ExitingBlock && ENT.hasAlwaysTruePredicate()) return ENT.ExactNotTaken; @@ -8374,7 +8409,7 @@ ScalarEvolution::BackedgeTakenInfo::getExact(const BasicBlock *ExitingBlock, const SCEV *ScalarEvolution::BackedgeTakenInfo::getConstantMax( const BasicBlock *ExitingBlock, ScalarEvolution *SE) const { - for (auto &ENT : ExitNotTaken) + for (const auto &ENT : ExitNotTaken) if (ENT.ExitingBlock == ExitingBlock && ENT.hasAlwaysTruePredicate()) return ENT.MaxNotTaken; @@ -8433,8 +8468,8 @@ ScalarEvolution::ExitLimit::ExitLimit( assert((isa<SCEVCouldNotCompute>(MaxNotTaken) || isa<SCEVConstant>(MaxNotTaken)) && "No point in having a non-constant max backedge taken count!"); - for (auto *PredSet : PredSetList) - for (auto *P : *PredSet) + for (const auto *PredSet : PredSetList) + for (const auto *P : *PredSet) addPredicate(P); assert((isa<SCEVCouldNotCompute>(E) || !E->getType()->isPointerTy()) && "Backedge count should be int"); @@ -10522,8 +10557,8 @@ bool ScalarEvolution::isKnownViaInduction(ICmpInst::Predicate Pred, // Domination relationship must be a linear order on collected loops. #ifndef NDEBUG - for (auto *L1 : LoopsUsed) - for (auto *L2 : LoopsUsed) + for (const auto *L1 : LoopsUsed) + for (const auto *L2 : LoopsUsed) assert((DT.dominates(L1->getHeader(), L2->getHeader()) || DT.dominates(L2->getHeader(), L1->getHeader())) && "Domination relationship is not a linear order"); @@ -10977,8 +11012,10 @@ ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L, ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS) { // Interpret a null as meaning no loop, where there is obviously no guard - // (interprocedural conditions notwithstanding). - if (!L) return true; + // (interprocedural conditions notwithstanding). Do not bother about + // unreachable loops. + if (!L || !DT.isReachableFromEntry(L->getHeader())) + return true; if (VerifyIR) assert(!verifyFunction(*L->getHeader()->getParent(), &dbgs()) && @@ -11035,12 +11072,6 @@ ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L, return true; } - // If the loop is not reachable from the entry block, we risk running into an - // infinite loop as we walk up into the dom tree. These loops do not matter - // anyway, so we just return a conservative answer when we see them. - if (!DT.isReachableFromEntry(L->getHeader())) - return false; - if (isImpliedViaGuard(Latch, Pred, LHS, RHS)) return true; @@ -11086,6 +11117,9 @@ bool ScalarEvolution::isBasicBlockEntryGuardedByCond(const BasicBlock *BB, ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS) { + // Do not bother proving facts for unreachable code. + if (!DT.isReachableFromEntry(BB)) + return true; if (VerifyIR) assert(!verifyFunction(*BB->getParent(), &dbgs()) && "This cannot be done on broken IR!"); @@ -11162,14 +11196,13 @@ bool ScalarEvolution::isBasicBlockEntryGuardedByCond(const BasicBlock *BB, if (ProveViaGuard(Pair.first)) return true; - const BranchInst *LoopEntryPredicate = + const BranchInst *BlockEntryPredicate = dyn_cast<BranchInst>(Pair.first->getTerminator()); - if (!LoopEntryPredicate || - LoopEntryPredicate->isUnconditional()) + if (!BlockEntryPredicate || BlockEntryPredicate->isUnconditional()) continue; - if (ProveViaCond(LoopEntryPredicate->getCondition(), - LoopEntryPredicate->getSuccessor(0) != Pair.second)) + if (ProveViaCond(BlockEntryPredicate->getCondition(), + BlockEntryPredicate->getSuccessor(0) != Pair.second)) return true; } @@ -13179,7 +13212,7 @@ static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE, if (!isa<SCEVCouldNotCompute>(PBT)) { OS << "Predicated backedge-taken count is " << *PBT << "\n"; OS << " Predicates:\n"; - for (auto *P : Preds) + for (const auto *P : Preds) P->print(OS, 4); } else { OS << "Unpredictable predicated backedge-taken count. "; @@ -13256,7 +13289,7 @@ void ScalarEvolution::print(raw_ostream &OS) const { } bool First = true; - for (auto *Iter = L; Iter; Iter = Iter->getParentLoop()) { + for (const auto *Iter = L; Iter; Iter = Iter->getParentLoop()) { if (First) { OS << "\t\t" "LoopDispositions: { "; First = false; @@ -13268,7 +13301,7 @@ void ScalarEvolution::print(raw_ostream &OS) const { OS << ": " << loopDispositionToStr(SE.getLoopDisposition(SV, Iter)); } - for (auto *InnerL : depth_first(L)) { + for (const auto *InnerL : depth_first(L)) { if (InnerL == L) continue; if (First) { @@ -13348,7 +13381,7 @@ ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) { // This recurrence is variant w.r.t. L if any of its operands // are variant. - for (auto *Op : AR->operands()) + for (const auto *Op : AR->operands()) if (!isLoopInvariant(Op, L)) return LoopVariant; @@ -13363,7 +13396,7 @@ ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) { case scSMinExpr: case scSequentialUMinExpr: { bool HasVarying = false; - for (auto *Op : cast<SCEVNAryExpr>(S)->operands()) { + for (const auto *Op : cast<SCEVNAryExpr>(S)->operands()) { LoopDisposition D = getLoopDisposition(Op, L); if (D == LoopVariant) return LoopVariant; @@ -13529,12 +13562,12 @@ void ScalarEvolution::forgetMemoizedResults(ArrayRef<const SCEV *> SCEVs) { const SCEV *Curr = Worklist.pop_back_val(); auto Users = SCEVUsers.find(Curr); if (Users != SCEVUsers.end()) - for (auto *User : Users->second) + for (const auto *User : Users->second) if (ToForget.insert(User).second) Worklist.push_back(User); } - for (auto *S : ToForget) + for (const auto *S : ToForget) forgetMemoizedResultsImpl(S); for (auto I = PredicatedSCEVRewrites.begin(); @@ -13747,7 +13780,7 @@ void ScalarEvolution::verify() const { if (ValidLoops.insert(L).second) Worklist.append(L->begin(), L->end()); } - for (auto &KV : ValueExprMap) { + for (const auto &KV : ValueExprMap) { #ifndef NDEBUG // Check for SCEV expressions referencing invalid/deleted loops. if (auto *AR = dyn_cast<SCEVAddRecExpr>(KV.second)) { @@ -14018,7 +14051,7 @@ public: const SCEV *visitUnknown(const SCEVUnknown *Expr) { if (Pred) { if (auto *U = dyn_cast<SCEVUnionPredicate>(Pred)) { - for (auto *Pred : U->getPredicates()) + for (const auto *Pred : U->getPredicates()) if (const auto *IPred = dyn_cast<SCEVComparePredicate>(Pred)) if (IPred->getLHS() == Expr && IPred->getPredicate() == ICmpInst::ICMP_EQ) @@ -14098,7 +14131,7 @@ private: PredicatedRewrite = SE.createAddRecFromPHIWithCasts(Expr); if (!PredicatedRewrite) return Expr; - for (auto *P : PredicatedRewrite->second){ + for (const auto *P : PredicatedRewrite->second){ // Wrap predicates from outer loops are not supported. if (auto *WP = dyn_cast<const SCEVWrapPredicate>(P)) { if (L != WP->getExpr()->getLoop()) @@ -14135,7 +14168,7 @@ const SCEVAddRecExpr *ScalarEvolution::convertSCEVToAddRecWithPredicates( // Since the transformation was successful, we can now transfer the SCEV // predicates. - for (auto *P : TransformPreds) + for (const auto *P : TransformPreds) Preds.insert(P); return AddRec; @@ -14234,7 +14267,7 @@ SCEVWrapPredicate::getImpliedFlags(const SCEVAddRecExpr *AR, /// Union predicates don't get cached so create a dummy set ID for it. SCEVUnionPredicate::SCEVUnionPredicate(ArrayRef<const SCEVPredicate *> Preds) : SCEVPredicate(FoldingSetNodeIDRef(nullptr, 0), P_Union) { - for (auto *P : Preds) + for (const auto *P : Preds) add(P); } @@ -14253,13 +14286,13 @@ bool SCEVUnionPredicate::implies(const SCEVPredicate *N) const { } void SCEVUnionPredicate::print(raw_ostream &OS, unsigned Depth) const { - for (auto Pred : Preds) + for (const auto *Pred : Preds) Pred->print(OS, Depth); } void SCEVUnionPredicate::add(const SCEVPredicate *N) { if (const auto *Set = dyn_cast<SCEVUnionPredicate>(N)) { - for (auto Pred : Set->Preds) + for (const auto *Pred : Set->Preds) add(Pred); return; } @@ -14276,7 +14309,7 @@ PredicatedScalarEvolution::PredicatedScalarEvolution(ScalarEvolution &SE, void ScalarEvolution::registerUser(const SCEV *User, ArrayRef<const SCEV *> Ops) { - for (auto *Op : Ops) + for (const auto *Op : Ops) // We do not expect that forgetting cached data for SCEVConstants will ever // open any prospects for sharpening or introduce any correctness issues, // so we don't bother storing their dependencies. @@ -14307,7 +14340,7 @@ const SCEV *PredicatedScalarEvolution::getBackedgeTakenCount() { if (!BackedgeCount) { SmallVector<const SCEVPredicate *, 4> Preds; BackedgeCount = SE.getPredicatedBackedgeTakenCount(&L, Preds); - for (auto *P : Preds) + for (const auto *P : Preds) addPredicate(*P); } return BackedgeCount; @@ -14378,7 +14411,7 @@ const SCEVAddRecExpr *PredicatedScalarEvolution::getAsAddRec(Value *V) { if (!New) return nullptr; - for (auto *P : NewPreds) + for (const auto *P : NewPreds) addPredicate(*P); RewriteMap[SE.getSCEV(V)] = {Generation, New}; diff --git a/contrib/llvm-project/llvm/lib/Analysis/StackLifetime.cpp b/contrib/llvm-project/llvm/lib/Analysis/StackLifetime.cpp index 52e8566aca3c..162fd75c73e0 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/StackLifetime.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/StackLifetime.cpp @@ -182,7 +182,7 @@ void StackLifetime::calculateLocalLiveness() { // Compute LiveIn by unioning together the LiveOut sets of all preds. BitVector LocalLiveIn; - for (auto *PredBB : predecessors(BB)) { + for (const auto *PredBB : predecessors(BB)) { LivenessMap::const_iterator I = BlockLiveness.find(PredBB); // If a predecessor is unreachable, ignore it. if (I == BlockLiveness.end()) diff --git a/contrib/llvm-project/llvm/lib/Analysis/StackSafetyAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/StackSafetyAnalysis.cpp index 94b646ab7c06..9698af3ca85c 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/StackSafetyAnalysis.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/StackSafetyAnalysis.cpp @@ -206,7 +206,7 @@ template <typename CalleeTy> struct FunctionInfo { O << " allocas uses:\n"; if (F) { - for (auto &I : instructions(F)) { + for (const auto &I : instructions(F)) { if (const AllocaInst *AI = dyn_cast<AllocaInst>(&I)) { auto &AS = Allocas.find(AI)->second; O << " " << AI->getName() << "[" @@ -763,7 +763,7 @@ const ConstantRange *findParamAccess(const FunctionSummary &FS, uint32_t ParamNo) { assert(FS.isLive()); assert(FS.isDSOLocal()); - for (auto &PS : FS.paramAccesses()) + for (const auto &PS : FS.paramAccesses()) if (ParamNo == PS.ParamNo) return &PS.Use; return nullptr; @@ -823,7 +823,7 @@ GVToSSI createGlobalStackSafetyInfo( Copy.begin()->first->getParent()->getDataLayout().getPointerSizeInBits(); StackSafetyDataFlowAnalysis<GlobalValue> SSDFA(PointerSize, std::move(Copy)); - for (auto &F : SSDFA.run()) { + for (const auto &F : SSDFA.run()) { auto FI = F.second; auto &SrcF = Functions[F.first]; for (auto &KV : FI.Allocas) { @@ -922,7 +922,7 @@ StackSafetyInfo::getParamAccesses(ModuleSummaryIndex &Index) const { FunctionSummary::ParamAccess &Param = ParamAccesses.back(); Param.Calls.reserve(PS.Calls.size()); - for (auto &C : PS.Calls) { + for (const auto &C : PS.Calls) { // Parameter forwarded into another function by any or unknown offset // will make ParamAccess::Range as FullSet anyway. So we can drop the // entire parameter like we did above. @@ -978,7 +978,7 @@ void StackSafetyGlobalInfo::print(raw_ostream &O) const { if (SSI.empty()) return; const Module &M = *SSI.begin()->first->getParent(); - for (auto &F : M.functions()) { + for (const auto &F : M.functions()) { if (!F.isDeclaration()) { SSI.find(&F)->second.print(O, F.getName(), &F); O << " safe accesses:" @@ -1094,7 +1094,7 @@ bool StackSafetyGlobalInfoWrapperPass::runOnModule(Module &M) { bool llvm::needsParamAccessSummary(const Module &M) { if (StackSafetyRun) return true; - for (auto &F : M.functions()) + for (const auto &F : M.functions()) if (F.hasFnAttribute(Attribute::SanitizeMemTag)) return true; return false; @@ -1126,13 +1126,13 @@ void llvm::generateParamAccessSummary(ModuleSummaryIndex &Index) { continue; if (FS->isLive() && FS->isDSOLocal()) { FunctionInfo<FunctionSummary> FI; - for (auto &PS : FS->paramAccesses()) { + for (const auto &PS : FS->paramAccesses()) { auto &US = FI.Params .emplace(PS.ParamNo, FunctionSummary::ParamAccess::RangeWidth) .first->second; US.Range = PS.Use; - for (auto &Call : PS.Calls) { + for (const auto &Call : PS.Calls) { assert(!Call.Offsets.isFullSet()); FunctionSummary *S = findCalleeFunctionSummary(Call.Callee, FS->modulePath()); @@ -1158,10 +1158,10 @@ void llvm::generateParamAccessSummary(ModuleSummaryIndex &Index) { NumCombinedDataFlowNodes += Functions.size(); StackSafetyDataFlowAnalysis<FunctionSummary> SSDFA( FunctionSummary::ParamAccess::RangeWidth, std::move(Functions)); - for (auto &KV : SSDFA.run()) { + for (const auto &KV : SSDFA.run()) { std::vector<FunctionSummary::ParamAccess> NewParams; NewParams.reserve(KV.second.Params.size()); - for (auto &Param : KV.second.Params) { + for (const auto &Param : KV.second.Params) { // It's not needed as FullSet is processed the same as a missing value. if (Param.second.Range.isFullSet()) continue; diff --git a/contrib/llvm-project/llvm/lib/Analysis/TargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Analysis/TargetTransformInfo.cpp index 6e34a8303c08..cfa6e3a97626 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -58,14 +58,16 @@ bool HardwareLoopInfo::canAnalyze(LoopInfo &LI) { } IntrinsicCostAttributes::IntrinsicCostAttributes( - Intrinsic::ID Id, const CallBase &CI, InstructionCost ScalarizationCost) + Intrinsic::ID Id, const CallBase &CI, InstructionCost ScalarizationCost, + bool TypeBasedOnly) : II(dyn_cast<IntrinsicInst>(&CI)), RetTy(CI.getType()), IID(Id), ScalarizationCost(ScalarizationCost) { if (const auto *FPMO = dyn_cast<FPMathOperator>(&CI)) FMF = FPMO->getFastMathFlags(); - Arguments.insert(Arguments.begin(), CI.arg_begin(), CI.arg_end()); + if (!TypeBasedOnly) + Arguments.insert(Arguments.begin(), CI.arg_begin(), CI.arg_end()); FunctionType *FTy = CI.getCalledFunction()->getFunctionType(); ParamTys.insert(ParamTys.begin(), FTy->param_begin(), FTy->param_end()); } @@ -294,8 +296,8 @@ bool TargetTransformInfo::isHardwareLoopProfitable( bool TargetTransformInfo::preferPredicateOverEpilogue( Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *TLI, DominatorTree *DT, - const LoopAccessInfo *LAI) const { - return TTIImpl->preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LAI); + LoopVectorizationLegality *LVL) const { + return TTIImpl->preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LVL); } PredicationStyle TargetTransformInfo::emitGetActiveLaneMask() const { diff --git a/contrib/llvm-project/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp index 560f46d39d0d..216027778fab 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp @@ -303,7 +303,7 @@ public: /// given offset. Update the offset to be relative to the field type. TBAAStructTypeNode getField(uint64_t &Offset) const { bool NewFormat = isNewFormat(); - const ArrayRef<MDOperand> Operands(Node->op_begin(), Node->op_end()); + const ArrayRef<MDOperand> Operands = Node->operands(); const unsigned NumOperands = Operands.size(); if (NewFormat) { @@ -811,7 +811,8 @@ MDNode *AAMDNodes::extendToTBAA(MDNode *MD, ssize_t Len) { return nullptr; // Otherwise, create TBAA with the new Len - SmallVector<Metadata *, 4> NextNodes(MD->operands()); + ArrayRef<MDOperand> MDOperands = MD->operands(); + SmallVector<Metadata *, 4> NextNodes(MDOperands.begin(), MDOperands.end()); ConstantInt *PreviousSize = mdconst::extract<ConstantInt>(NextNodes[3]); // Don't create a new MDNode if it is the same length. diff --git a/contrib/llvm-project/llvm/lib/Analysis/ValueTracking.cpp b/contrib/llvm-project/llvm/lib/Analysis/ValueTracking.cpp index add2d427e05b..1f3798d1338e 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/ValueTracking.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/ValueTracking.cpp @@ -2297,7 +2297,7 @@ static bool isKnownNonNullFromDominatingCondition(const Value *V, return false; unsigned NumUsesExplored = 0; - for (auto *U : V->users()) { + for (const auto *U : V->users()) { // Avoid massive lists if (NumUsesExplored >= DomConditionsMaxUses) break; @@ -2338,7 +2338,7 @@ static bool isKnownNonNullFromDominatingCondition(const Value *V, SmallVector<const User *, 4> WorkList; SmallPtrSet<const User *, 4> Visited; - for (auto *CmpU : U->users()) { + for (const auto *CmpU : U->users()) { assert(WorkList.empty() && "Should be!"); if (Visited.insert(CmpU).second) WorkList.push_back(CmpU); @@ -2352,7 +2352,7 @@ static bool isKnownNonNullFromDominatingCondition(const Value *V, // TODO: Support similar logic of OR and EQ predicate? if (NonNullIfTrue) if (match(Curr, m_LogicalAnd(m_Value(), m_Value()))) { - for (auto *CurrU : Curr->users()) + for (const auto *CurrU : Curr->users()) if (Visited.insert(CurrU).second) WorkList.push_back(CurrU); continue; @@ -5073,7 +5073,7 @@ bool llvm::isOverflowIntrinsicNoWrap(const WithOverflowInst *WO, if (DT.dominates(NoWrapEdge, Result->getParent())) continue; - for (auto &RU : Result->uses()) + for (const auto &RU : Result->uses()) if (!DT.dominates(NoWrapEdge, RU)) return false; } @@ -5645,7 +5645,7 @@ static bool programUndefinedIfUndefOrPoison(const Value *V, // whether a value is directly passed to an instruction that must take // well-defined operands. - for (auto &I : make_range(Begin, End)) { + for (const auto &I : make_range(Begin, End)) { if (isa<DbgInfoIntrinsic>(I)) continue; if (--ScanLimit == 0) @@ -5676,7 +5676,7 @@ static bool programUndefinedIfUndefOrPoison(const Value *V, Visited.insert(BB); while (true) { - for (auto &I : make_range(Begin, End)) { + for (const auto &I : make_range(Begin, End)) { if (isa<DbgInfoIntrinsic>(I)) continue; if (--ScanLimit == 0) diff --git a/contrib/llvm-project/llvm/lib/Analysis/VectorUtils.cpp b/contrib/llvm-project/llvm/lib/Analysis/VectorUtils.cpp index 894680cda1fc..c4795a80ead2 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/VectorUtils.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/VectorUtils.cpp @@ -762,7 +762,7 @@ static void addToAccessGroupList(ListT &List, MDNode *AccGroups) { return; } - for (auto &AccGroupListOp : AccGroups->operands()) { + for (const auto &AccGroupListOp : AccGroups->operands()) { auto *Item = cast<MDNode>(AccGroupListOp.get()); assert(isValidAsAccessGroup(Item) && "List item must be an access group"); List.insert(Item); @@ -1497,7 +1497,7 @@ void VFABI::getVectorVariantNames( SmallVector<StringRef, 8> ListAttr; S.split(ListAttr, ","); - for (auto &S : SetVector<StringRef>(ListAttr.begin(), ListAttr.end())) { + for (const auto &S : SetVector<StringRef>(ListAttr.begin(), ListAttr.end())) { #ifndef NDEBUG LLVM_DEBUG(dbgs() << "VFABI: adding mapping '" << S << "'\n"); Optional<VFInfo> Info = VFABI::tryDemangleForVFABI(S, *(CI.getModule())); diff --git a/contrib/llvm-project/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp b/contrib/llvm-project/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp index 1613e7e42a0a..c5ab35d94860 100644 --- a/contrib/llvm-project/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp +++ b/contrib/llvm-project/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp @@ -260,6 +260,9 @@ bool MetadataVerifier::verifyKernel(msgpack::DocNode &Node) { return false; if (!verifyIntegerEntry(KernelMap, ".private_segment_fixed_size", true)) return false; + if (!verifyScalarEntry(KernelMap, ".uses_dynamic_stack", false, + msgpack::Type::Boolean)) + return false; if (!verifyIntegerEntry(KernelMap, ".kernarg_segment_align", true)) return false; if (!verifyIntegerEntry(KernelMap, ".wavefront_size", true)) diff --git a/contrib/llvm-project/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/contrib/llvm-project/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index 8d5a2555f9af..1d6c21bd66d1 100644 --- a/contrib/llvm-project/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/contrib/llvm-project/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -5510,6 +5510,61 @@ Error BitcodeReader::parseFunctionBody(Function *F) { if (!OperandBundles.empty()) UpgradeOperandBundles(OperandBundles); + if (auto *IA = dyn_cast<InlineAsm>(Callee)) { + InlineAsm::ConstraintInfoVector ConstraintInfo = IA->ParseConstraints(); + auto IsLabelConstraint = [](const InlineAsm::ConstraintInfo &CI) { + return CI.Type == InlineAsm::isLabel; + }; + if (none_of(ConstraintInfo, IsLabelConstraint)) { + // Upgrade explicit blockaddress arguments to label constraints. + // Verify that the last arguments are blockaddress arguments that + // match the indirect destinations. Clang always generates callbr + // in this form. We could support reordering with more effort. + unsigned FirstBlockArg = Args.size() - IndirectDests.size(); + for (unsigned ArgNo = FirstBlockArg; ArgNo < Args.size(); ++ArgNo) { + unsigned LabelNo = ArgNo - FirstBlockArg; + auto *BA = dyn_cast<BlockAddress>(Args[ArgNo]); + if (!BA || BA->getFunction() != F || + LabelNo > IndirectDests.size() || + BA->getBasicBlock() != IndirectDests[LabelNo]) + return error("callbr argument does not match indirect dest"); + } + + // Remove blockaddress arguments. + Args.erase(Args.begin() + FirstBlockArg, Args.end()); + ArgTyIDs.erase(ArgTyIDs.begin() + FirstBlockArg, ArgTyIDs.end()); + + // Recreate the function type with less arguments. + SmallVector<Type *> ArgTys; + for (Value *Arg : Args) + ArgTys.push_back(Arg->getType()); + FTy = + FunctionType::get(FTy->getReturnType(), ArgTys, FTy->isVarArg()); + + // Update constraint string to use label constraints. + std::string Constraints = IA->getConstraintString(); + unsigned ArgNo = 0; + size_t Pos = 0; + for (const auto &CI : ConstraintInfo) { + if (CI.hasArg()) { + if (ArgNo >= FirstBlockArg) + Constraints.insert(Pos, "!"); + ++ArgNo; + } + + // Go to next constraint in string. + Pos = Constraints.find(',', Pos); + if (Pos == std::string::npos) + break; + ++Pos; + } + + Callee = InlineAsm::get(FTy, IA->getAsmString(), Constraints, + IA->hasSideEffects(), IA->isAlignStack(), + IA->getDialect(), IA->canThrow()); + } + } + I = CallBrInst::Create(FTy, Callee, DefaultDest, IndirectDests, Args, OperandBundles); ResTypeID = getContainedTypeID(FTyID); diff --git a/contrib/llvm-project/llvm/lib/Bitcode/Reader/MetadataLoader.cpp b/contrib/llvm-project/llvm/lib/Bitcode/Reader/MetadataLoader.cpp index 0d57ae4ef9df..13d53a35084d 100644 --- a/contrib/llvm-project/llvm/lib/Bitcode/Reader/MetadataLoader.cpp +++ b/contrib/llvm-project/llvm/lib/Bitcode/Reader/MetadataLoader.cpp @@ -1226,10 +1226,12 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( break; } - MetadataList.assignValue( - LocalAsMetadata::get(ValueList.getValueFwdRef( - Record[1], Ty, TyID, /*ConstExprInsertBB*/ nullptr)), - NextMetadataNo); + Value *V = ValueList.getValueFwdRef(Record[1], Ty, TyID, + /*ConstExprInsertBB*/ nullptr); + if (!V) + return error("Invalid value reference from old fn metadata"); + + MetadataList.assignValue(LocalAsMetadata::get(V), NextMetadataNo); NextMetadataNo++; break; } @@ -1248,8 +1250,11 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( if (Ty->isMetadataTy()) Elts.push_back(getMD(Record[i + 1])); else if (!Ty->isVoidTy()) { - auto *MD = ValueAsMetadata::get(ValueList.getValueFwdRef( - Record[i + 1], Ty, TyID, /*ConstExprInsertBB*/ nullptr)); + Value *V = ValueList.getValueFwdRef(Record[i + 1], Ty, TyID, + /*ConstExprInsertBB*/ nullptr); + if (!V) + return error("Invalid value reference from old metadata"); + auto *MD = ValueAsMetadata::get(V); assert(isa<ConstantAsMetadata>(MD) && "Expected non-function-local metadata"); Elts.push_back(MD); @@ -1269,10 +1274,12 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( if (Ty->isMetadataTy() || Ty->isVoidTy()) return error("Invalid record"); - MetadataList.assignValue( - ValueAsMetadata::get(ValueList.getValueFwdRef( - Record[1], Ty, TyID, /*ConstExprInsertBB*/ nullptr)), - NextMetadataNo); + Value *V = ValueList.getValueFwdRef(Record[1], Ty, TyID, + /*ConstExprInsertBB*/ nullptr); + if (!V) + return error("Invalid value reference from metadata"); + + MetadataList.assignValue(ValueAsMetadata::get(V), NextMetadataNo); NextMetadataNo++; break; } diff --git a/contrib/llvm-project/llvm/lib/Bitcode/Reader/ValueList.h b/contrib/llvm-project/llvm/lib/Bitcode/Reader/ValueList.h index 995d46f01f75..a5b3f6e20707 100644 --- a/contrib/llvm-project/llvm/lib/Bitcode/Reader/ValueList.h +++ b/contrib/llvm-project/llvm/lib/Bitcode/Reader/ValueList.h @@ -21,7 +21,6 @@ namespace llvm { -class Constant; class Error; class Type; class Value; diff --git a/contrib/llvm-project/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/contrib/llvm-project/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index 590562ce2796..d7e012fb6a9e 100644 --- a/contrib/llvm-project/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/contrib/llvm-project/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -4104,8 +4104,9 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() { for (const GlobalAlias &A : M.aliases()) { auto *Aliasee = A.getAliaseeObject(); - if (!Aliasee->hasName()) - // Nameless function don't have an entry in the summary, skip it. + // Skip ifunc and nameless functions which don't have an entry in the + // summary. + if (!Aliasee->hasName() || isa<GlobalIFunc>(Aliasee)) continue; auto AliasId = VE.getValueID(&A); auto AliaseeId = VE.getValueID(Aliasee); diff --git a/contrib/llvm-project/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp b/contrib/llvm-project/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp index 727ec2e02cc2..998f629aaa4e 100644 --- a/contrib/llvm-project/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp +++ b/contrib/llvm-project/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp @@ -802,7 +802,7 @@ void ValueEnumerator::organizeMetadata() { // - by function, then // - by isa<MDString> // and then sort by the original/current ID. Since the IDs are guaranteed to - // be unique, the result of std::sort will be deterministic. There's no need + // be unique, the result of llvm::sort will be deterministic. There's no need // for std::stable_sort. llvm::sort(Order, [this](MDIndex LHS, MDIndex RHS) { return std::make_tuple(LHS.F, getMetadataTypeOrder(LHS.get(MDs)), LHS.ID) < diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp index b10d79f4b5a6..9526bf7610b4 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp @@ -245,8 +245,8 @@ public: void AccelTableWriter::emitHashes() const { uint64_t PrevHash = std::numeric_limits<uint64_t>::max(); unsigned BucketIdx = 0; - for (auto &Bucket : Contents.getBuckets()) { - for (auto &Hash : Bucket) { + for (const auto &Bucket : Contents.getBuckets()) { + for (const auto &Hash : Bucket) { uint32_t HashValue = Hash->HashValue; if (SkipIdenticalHashes && PrevHash == HashValue) continue; @@ -327,7 +327,7 @@ void AppleAccelTableWriter::emitData() const { const auto &Buckets = Contents.getBuckets(); for (const AccelTableBase::HashList &Bucket : Buckets) { uint64_t PrevHash = std::numeric_limits<uint64_t>::max(); - for (auto &Hash : Bucket) { + for (const auto &Hash : Bucket) { // Terminate the previous entry if there is no hash collision with the // current one. if (PrevHash != std::numeric_limits<uint64_t>::max() && @@ -667,12 +667,12 @@ void AccelTableBase::print(raw_ostream &OS) const { } OS << "Buckets and Hashes: \n"; - for (auto &Bucket : Buckets) - for (auto &Hash : Bucket) + for (const auto &Bucket : Buckets) + for (const auto &Hash : Bucket) Hash->print(OS); OS << "Data: \n"; - for (auto &E : Entries) + for (const auto &E : Entries) E.second.print(OS); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 94612a51d2e1..e0050a47a6f6 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -488,7 +488,7 @@ bool AsmPrinter::doInitialization(Module &M) { GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>(); assert(MI && "AsmPrinter didn't require GCModuleInfo?"); - for (auto &I : *MI) + for (const auto &I : *MI) if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(*I)) MP->beginAssembly(M, *MI, *this); @@ -1731,7 +1731,7 @@ static unsigned getNumGlobalVariableUses(const Constant *C) { return 1; unsigned NumUses = 0; - for (auto *CU : C->users()) + for (const auto *CU : C->users()) NumUses += getNumGlobalVariableUses(dyn_cast<Constant>(CU)); return NumUses; @@ -1754,7 +1754,7 @@ static bool isGOTEquivalentCandidate(const GlobalVariable *GV, // To be a got equivalent, at least one of its users need to be a constant // expression used by another global variable. - for (auto *U : GV->users()) + for (const auto *U : GV->users()) NumGOTEquivUsers += getNumGlobalVariableUses(dyn_cast<Constant>(U)); return NumGOTEquivUsers > 0; @@ -1797,7 +1797,7 @@ void AsmPrinter::emitGlobalGOTEquivs() { } GlobalGOTEquivs.clear(); - for (auto *GV : FailedCandidates) + for (const auto *GV : FailedCandidates) emitGlobalVariable(GV); } @@ -2731,6 +2731,8 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) { // to represent relocations on supported targets. Expressions involving only // constant addresses are constant folded instead. switch (CE->getOpcode()) { + default: + break; // Error case Instruction::AddrSpaceCast: { const Constant *Op = CE->getOperand(0); unsigned DstAS = CE->getType()->getPointerAddressSpace(); @@ -2738,24 +2740,7 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) { if (TM.isNoopAddrSpaceCast(SrcAS, DstAS)) return lowerConstant(Op); - // Fallthrough to error. - LLVM_FALLTHROUGH; - } - default: { - // If the code isn't optimized, there may be outstanding folding - // opportunities. Attempt to fold the expression using DataLayout as a - // last resort before giving up. - Constant *C = ConstantFoldConstant(CE, getDataLayout()); - if (C != CE) - return lowerConstant(C); - - // Otherwise report the problem to the user. - std::string S; - raw_string_ostream OS(S); - OS << "Unsupported expression in static initializer: "; - CE->printAsOperand(OS, /*PrintType=*/false, - !MF ? nullptr : MF->getFunction().getParent()); - report_fatal_error(Twine(OS.str())); + break; // Error } case Instruction::GetElementPtr: { // Generate a symbolic expression for the byte address @@ -2860,6 +2845,21 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) { return MCBinaryExpr::createAdd(LHS, RHS, Ctx); } } + + // If the code isn't optimized, there may be outstanding folding + // opportunities. Attempt to fold the expression using DataLayout as a + // last resort before giving up. + Constant *C = ConstantFoldConstant(CE, getDataLayout()); + if (C != CE) + return lowerConstant(C); + + // Otherwise report the problem to the user. + std::string S; + raw_string_ostream OS(S); + OS << "Unsupported expression in static initializer: "; + CE->printAsOperand(OS, /*PrintType=*/false, + !MF ? nullptr : MF->getFunction().getParent()); + report_fatal_error(Twine(OS.str())); } static void emitGlobalConstantImpl(const DataLayout &DL, const Constant *C, @@ -3359,9 +3359,12 @@ void AsmPrinter::emitGlobalConstant(const DataLayout &DL, const Constant *CV, } if (!AliasList) return; - for (const auto &AliasPair : *AliasList) - report_fatal_error("Aliases with offset " + Twine(AliasPair.first) + - " were not emitted."); + // TODO: These remaining aliases are not emitted in the correct location. Need + // to handle the case where the alias offset doesn't refer to any sub-element. + for (auto &AliasPair : *AliasList) { + for (const GlobalAlias *GA : AliasPair.second) + OutStreamer->emitLabel(getSymbol(GA)); + } } void AsmPrinter::emitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) { @@ -3717,7 +3720,7 @@ void AsmPrinter::emitStackMaps(StackMaps &SM) { // No GC strategy, use the default format. NeedsDefault = true; else - for (auto &I : *MI) { + for (const auto &I : *MI) { if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(*I)) if (MP->emitStackMaps(SM, *this)) continue; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp index 719fec06aa33..bfa53f5b9374 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -309,7 +309,7 @@ void AsmPrinter::emitDwarfDIE(const DIE &Die) const { // Emit the DIE children if any. if (Die.hasChildren()) { - for (auto &Child : Die.children()) + for (const auto &Child : Die.children()) emitDwarfDIE(Child); OutStreamer->AddComment("End Of Children Mark"); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp index 5da50d7aab9f..1d546e5fd72e 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp @@ -374,7 +374,7 @@ void DIEHash::computeHash(const DIE &Die) { addAttributes(Die); // Then hash each of the children of the DIE. - for (auto &C : Die.children()) { + for (const auto &C : Die.children()) { // 7.27 Step 7 // If C is a nested type entry or a member function entry, ... if (isType(C.getTag()) || (C.getTag() == dwarf::DW_TAG_subprogram && isType(C.getParent()->getTag()))) { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp index 1358f4d25990..dabbfb45f687 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp @@ -340,11 +340,11 @@ static void clobberRegEntries(InlinedEntity Var, unsigned RegNo, if (Entry.getInstr()->hasDebugOperandForReg(RegNo)) { IndicesToErase.push_back(Index); Entry.endEntry(ClobberIndex); - for (auto &MO : Entry.getInstr()->debug_operands()) + for (const auto &MO : Entry.getInstr()->debug_operands()) if (MO.isReg() && MO.getReg() && MO.getReg() != RegNo) MaybeRemovedRegisters.insert(MO.getReg()); } else { - for (auto &MO : Entry.getInstr()->debug_operands()) + for (const auto &MO : Entry.getInstr()->debug_operands()) if (MO.isReg() && MO.getReg()) KeepRegisters.insert(MO.getReg()); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp index 660a064687d3..8ebbed974abb 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp @@ -304,7 +304,7 @@ void DebugHandlerBase::beginFunction(const MachineFunction *MF) { LabelsBeforeInsn[Entries.front().getInstr()] = Asm->getFunctionBegin(); if (Entries.front().getInstr()->getDebugExpression()->isFragment()) { // Mark all non-overlapping initial fragments. - for (auto I = Entries.begin(); I != Entries.end(); ++I) { + for (const auto *I = Entries.begin(); I != Entries.end(); ++I) { if (!I->isDbgValue()) continue; const DIExpression *Fragment = I->getInstr()->getDebugExpression(); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index b3f99d346faa..b26960cdebb8 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -848,7 +848,7 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV, Optional<unsigned> NVPTXAddressSpace; DIELoc *Loc = new (DIEValueAllocator) DIELoc; DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc); - for (auto &Fragment : DV.getFrameIndexExprs()) { + for (const auto &Fragment : DV.getFrameIndexExprs()) { Register FrameReg; const DIExpression *Expr = Fragment.Expr; const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering(); @@ -970,7 +970,7 @@ sortLocalVars(SmallVectorImpl<DbgVariable *> &Input) { SmallDenseSet<DbgVariable *, 8> Visiting; // Initialize the worklist and the DIVariable lookup table. - for (auto Var : reverse(Input)) { + for (auto *Var : reverse(Input)) { DbgVar.insert({Var->getVariable(), Var}); WorkList.push_back({Var, 0}); } @@ -1005,7 +1005,7 @@ sortLocalVars(SmallVectorImpl<DbgVariable *> &Input) { // Push dependencies and this node onto the worklist, so that this node is // visited again after all of its dependencies are handled. WorkList.push_back({Var, 1}); - for (auto *Dependency : dependencies(Var)) { + for (const auto *Dependency : dependencies(Var)) { // Don't add dependency if it is in a different lexical scope or a global. if (const auto *Dep = dyn_cast<const DILocalVariable>(Dependency)) if (DbgVariable *Var = DbgVar.lookup(Dep)) diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 866338a949f3..54af14429907 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -819,7 +819,7 @@ static void collectCallSiteParameters(const MachineInstr *CallMI, } // Do not emit CSInfo for undef forwarding registers. - for (auto &MO : CallMI->uses()) + for (const auto &MO : CallMI->uses()) if (MO.isReg() && MO.isUndef()) ForwardedRegWorklist.erase(MO.getReg()); @@ -2235,7 +2235,7 @@ void DwarfDebug::endFunctionImpl(const MachineFunction *MF) { #endif // Construct abstract scopes. for (LexicalScope *AScope : LScopes.getAbstractScopesList()) { - auto *SP = cast<DISubprogram>(AScope->getScopeNode()); + const auto *SP = cast<DISubprogram>(AScope->getScopeNode()); for (const DINode *DN : SP->getRetainedNodes()) { if (!Processed.insert(InlinedEntity(DN, nullptr)).second) continue; @@ -2527,7 +2527,7 @@ void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer, using Encoding = DWARFExpression::Operation::Encoding; uint64_t Offset = 0; - for (auto &Op : Expr) { + for (const auto &Op : Expr) { assert(Op.getCode() != dwarf::DW_OP_const_type && "3 operand ops not yet supported"); Streamer.emitInt8(Op.getCode(), Comment != End ? *(Comment++) : ""); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp index ad9dc517539a..f21c1bf4e914 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -187,7 +187,7 @@ bool AtomicExpand::runOnFunction(Function &F) { AtomicInsts.push_back(&I); bool MadeChange = false; - for (auto I : AtomicInsts) { + for (auto *I : AtomicInsts) { auto LI = dyn_cast<LoadInst>(I); auto SI = dyn_cast<StoreInst>(I); auto RMWI = dyn_cast<AtomicRMWInst>(I); @@ -1371,7 +1371,7 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { // Look for any users of the cmpxchg that are just comparing the loaded value // against the desired one, and replace them with the CFG-derived version. SmallVector<ExtractValueInst *, 2> PrunedInsts; - for (auto User : CI->users()) { + for (auto *User : CI->users()) { ExtractValueInst *EV = dyn_cast<ExtractValueInst>(User); if (!EV) continue; @@ -1388,7 +1388,7 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { } // We can remove the instructions now we're no longer iterating through them. - for (auto EV : PrunedInsts) + for (auto *EV : PrunedInsts) EV->eraseFromParent(); if (!CI->use_empty()) { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSections.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSections.cpp index f05f5b9f9947..958212a0e448 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSections.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSections.cpp @@ -268,8 +268,8 @@ void llvm::sortBasicBlocksAndUpdateBranches( // If the exception section begins with a landing pad, that landing pad will // assume a zero offset (relative to @LPStart) in the LSDA. However, a value of // zero implies "no landing pad." This function inserts a NOP just before the EH -// pad label to ensure a nonzero offset. Returns true if padding is not needed. -static bool avoidZeroOffsetLandingPad(MachineFunction &MF) { +// pad label to ensure a nonzero offset. +void llvm::avoidZeroOffsetLandingPad(MachineFunction &MF) { for (auto &MBB : MF) { if (MBB.isBeginSection() && MBB.isEHPad()) { MachineBasicBlock::iterator MI = MBB.begin(); @@ -278,10 +278,8 @@ static bool avoidZeroOffsetLandingPad(MachineFunction &MF) { MCInst Nop = MF.getSubtarget().getInstrInfo()->getNop(); BuildMI(MBB, MI, DebugLoc(), MF.getSubtarget().getInstrInfo()->get(Nop.getOpcode())); - return false; } } - return true; } // This checks if the source of this function has drifted since this binary was @@ -297,7 +295,7 @@ static bool hasInstrProfHashMismatch(MachineFunction &MF) { auto *Existing = MF.getFunction().getMetadata(LLVMContext::MD_annotation); if (Existing) { MDTuple *Tuple = cast<MDTuple>(Existing); - for (auto &N : Tuple->operands()) + for (const auto &N : Tuple->operands()) if (cast<MDString>(N.get())->getString() == MetadataName) return true; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CalcSpillWeights.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CalcSpillWeights.cpp index 689e49978d43..519b24c21d7a 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/CalcSpillWeights.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/CalcSpillWeights.cpp @@ -121,7 +121,7 @@ bool VirtRegAuxInfo::isRematerializable(const LiveInterval &LI, assert(MI && "Dead valno in interval"); } - if (!TII.isTriviallyReMaterializable(*MI, LIS.getAliasAnalysis())) + if (!TII.isTriviallyReMaterializable(*MI)) return false; } return true; @@ -279,7 +279,7 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start, MRI.clearSimpleHint(LI.reg()); std::set<Register> HintedRegs; - for (auto &Hint : CopyHints) { + for (const auto &Hint : CopyHints) { if (!HintedRegs.insert(Hint.Reg).second || (TargetHint.first != 0 && Hint.Reg == TargetHint.second)) // Don't add the same reg twice or the target-type hint again. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp index 6778af22f532..b6c762b93ca5 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -730,7 +730,7 @@ bool CodeGenPrepare::eliminateFallThrough(Function &F) { // (Repeatedly) merging blocks into their predecessors can create redundant // debug intrinsics. - for (auto &Pred : Preds) + for (const auto &Pred : Preds) if (auto *BB = cast_or_null<BasicBlock>(Pred)) RemoveRedundantDbgInstrs(BB); @@ -3684,7 +3684,7 @@ private: // Phi we added (subject to match) and both of them is in the same basic // block then we can match our pair if values match. So we state that // these values match and add it to work list to verify that. - for (auto B : Item.first->blocks()) { + for (auto *B : Item.first->blocks()) { Value *FirstValue = Item.first->getIncomingValueForBlock(B); Value *SecondValue = Item.second->getIncomingValueForBlock(B); if (FirstValue == SecondValue) @@ -5227,18 +5227,31 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, WeakTrackingVH SunkAddrVH = SunkAddrs[Addr]; Value * SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr; + Type *IntPtrTy = DL->getIntPtrType(Addr->getType()); if (SunkAddr) { LLVM_DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for " << *MemoryInst << "\n"); - if (SunkAddr->getType() != Addr->getType()) - SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType()); + if (SunkAddr->getType() != Addr->getType()) { + if (SunkAddr->getType()->getPointerAddressSpace() != + Addr->getType()->getPointerAddressSpace() && + !DL->isNonIntegralPointerType(Addr->getType())) { + // There are two reasons the address spaces might not match: a no-op + // addrspacecast, or a ptrtoint/inttoptr pair. Either way, we emit a + // ptrtoint/inttoptr pair to ensure we match the original semantics. + // TODO: allow bitcast between different address space pointers with the + // same size. + SunkAddr = Builder.CreatePtrToInt(SunkAddr, IntPtrTy, "sunkaddr"); + SunkAddr = + Builder.CreateIntToPtr(SunkAddr, Addr->getType(), "sunkaddr"); + } else + SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType()); + } } else if (AddrSinkUsingGEPs || (!AddrSinkUsingGEPs.getNumOccurrences() && SubtargetInfo->addrSinkUsingGEPs())) { // By default, we use the GEP-based method when AA is used later. This // prevents new inttoptr/ptrtoint pairs from degrading AA capabilities. LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for " << *MemoryInst << "\n"); - Type *IntPtrTy = DL->getIntPtrType(Addr->getType()); Value *ResultPtr = nullptr, *ResultIndex = nullptr; // First, find the pointer. @@ -5361,8 +5374,21 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, AddrMode.InBounds); } - if (SunkAddr->getType() != Addr->getType()) - SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType()); + if (SunkAddr->getType() != Addr->getType()) { + if (SunkAddr->getType()->getPointerAddressSpace() != + Addr->getType()->getPointerAddressSpace() && + !DL->isNonIntegralPointerType(Addr->getType())) { + // There are two reasons the address spaces might not match: a no-op + // addrspacecast, or a ptrtoint/inttoptr pair. Either way, we emit a + // ptrtoint/inttoptr pair to ensure we match the original semantics. + // TODO: allow bitcast between different address space pointers with + // the same size. + SunkAddr = Builder.CreatePtrToInt(SunkAddr, IntPtrTy, "sunkaddr"); + SunkAddr = + Builder.CreateIntToPtr(SunkAddr, Addr->getType(), "sunkaddr"); + } else + SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType()); + } } } else { // We'd require a ptrtoint/inttoptr down the line, which we can't do for @@ -7793,9 +7819,11 @@ static bool tryUnmergingGEPsAcrossIndirectBr(GetElementPtrInst *GEPI, } // After unmerging, verify that GEPIOp is actually only used in SrcBlock (not // alive on IndirectBr edges). - assert(find_if(GEPIOp->users(), [&](User *Usr) { - return cast<Instruction>(Usr)->getParent() != SrcBlock; - }) == GEPIOp->users().end() && "GEPIOp is used outside SrcBlock"); + assert(llvm::none_of(GEPIOp->users(), + [&](User *Usr) { + return cast<Instruction>(Usr)->getParent() != SrcBlock; + }) && + "GEPIOp is used outside SrcBlock"); return true; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/DFAPacketizer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/DFAPacketizer.cpp index 42192f41dbda..34fb1d286a58 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/DFAPacketizer.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/DFAPacketizer.cpp @@ -239,7 +239,7 @@ void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB, }); if (ResourceAvail && shouldAddToPacket(MI)) { // Dependency check for MI with instructions in CurrentPacketMIs. - for (auto MJ : CurrentPacketMIs) { + for (auto *MJ : CurrentPacketMIs) { SUnit *SUJ = MIToSUnit[MJ]; assert(SUJ && "Missing SUnit Info!"); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/EarlyIfConversion.cpp b/contrib/llvm-project/llvm/lib/CodeGen/EarlyIfConversion.cpp index 32858d043383..c108f0088d43 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/EarlyIfConversion.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/EarlyIfConversion.cpp @@ -576,7 +576,7 @@ static bool hasSameValue(const MachineRegisterInfo &MRI, // If the instruction could modify memory, or there may be some intervening // store between the two, we can't consider them to be equal. - if (TDef->mayLoadOrStore() && !TDef->isDereferenceableInvariantLoad(nullptr)) + if (TDef->mayLoadOrStore() && !TDef->isDereferenceableInvariantLoad()) return false; // We also can't guarantee that they are the same if, for example, the @@ -808,7 +808,7 @@ void updateDomTree(MachineDominatorTree *DomTree, const SSAIfConv &IfConv, // TBB and FBB should not dominate any blocks. // Tail children should be transferred to Head. MachineDomTreeNode *HeadNode = DomTree->getNode(IfConv.Head); - for (auto B : Removed) { + for (auto *B : Removed) { MachineDomTreeNode *Node = DomTree->getNode(B); assert(Node != HeadNode && "Cannot erase the head node"); while (Node->getNumChildren()) { @@ -826,7 +826,7 @@ void updateLoops(MachineLoopInfo *Loops, return; // If-conversion doesn't change loop structure, and it doesn't mess with back // edges, so updating LoopInfo is simply removing the dead blocks. - for (auto B : Removed) + for (auto *B : Removed) Loops->removeBlock(B); } } // namespace @@ -1065,7 +1065,7 @@ bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) { // if-conversion in a single pass. The tryConvertIf() function may erase // blocks, but only blocks dominated by the head block. This makes it safe to // update the dominator tree while the post-order iterator is still active. - for (auto DomNode : post_order(DomTree)) + for (auto *DomNode : post_order(DomTree)) if (tryConvertIf(DomNode->getBlock())) Changed = true; @@ -1198,7 +1198,7 @@ bool EarlyIfPredicator::runOnMachineFunction(MachineFunction &MF) { // if-conversion in a single pass. The tryConvertIf() function may erase // blocks, but only blocks dominated by the head block. This makes it safe to // update the dominator tree while the post-order iterator is still active. - for (auto DomNode : post_order(DomTree)) + for (auto *DomNode : post_order(DomTree)) if (tryConvertIf(DomNode->getBlock())) Changed = true; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ExpandVectorPredication.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ExpandVectorPredication.cpp index 59932a542bbc..db4d42bf3ca4 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/ExpandVectorPredication.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/ExpandVectorPredication.cpp @@ -15,6 +15,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Analysis/VectorUtils.h" #include "llvm/CodeGen/Passes.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" @@ -82,8 +83,11 @@ STATISTIC(NumLoweredVPOps, "Number of folded vector predication operations"); /// \returns Whether the vector mask \p MaskVal has all lane bits set. static bool isAllTrueMask(Value *MaskVal) { - auto *ConstVec = dyn_cast<ConstantVector>(MaskVal); - return ConstVec && ConstVec->isAllOnesValue(); + if (Value *SplattedVal = getSplatValue(MaskVal)) + if (auto *ConstValue = dyn_cast<Constant>(SplattedVal)) + return ConstValue->isAllOnesValue(); + + return false; } /// \returns A non-excepting divisor constant for this type. @@ -171,6 +175,10 @@ struct CachingVPExpander { Value *expandPredicationInReduction(IRBuilder<> &Builder, VPReductionIntrinsic &PI); + /// \brief Lower this VP memory operation to a non-VP intrinsic. + Value *expandPredicationInMemoryIntrinsic(IRBuilder<> &Builder, + VPIntrinsic &VPI); + /// \brief Query TTI and expand the vector predication in \p P accordingly. Value *expandPredication(VPIntrinsic &PI); @@ -389,6 +397,71 @@ CachingVPExpander::expandPredicationInReduction(IRBuilder<> &Builder, return Reduction; } +Value * +CachingVPExpander::expandPredicationInMemoryIntrinsic(IRBuilder<> &Builder, + VPIntrinsic &VPI) { + assert(VPI.canIgnoreVectorLengthParam()); + + const auto &DL = F.getParent()->getDataLayout(); + + Value *MaskParam = VPI.getMaskParam(); + Value *PtrParam = VPI.getMemoryPointerParam(); + Value *DataParam = VPI.getMemoryDataParam(); + bool IsUnmasked = isAllTrueMask(MaskParam); + + MaybeAlign AlignOpt = VPI.getPointerAlignment(); + + Value *NewMemoryInst = nullptr; + switch (VPI.getIntrinsicID()) { + default: + llvm_unreachable("Not a VP memory intrinsic"); + case Intrinsic::vp_store: + if (IsUnmasked) { + StoreInst *NewStore = + Builder.CreateStore(DataParam, PtrParam, /*IsVolatile*/ false); + if (AlignOpt.has_value()) + NewStore->setAlignment(AlignOpt.value()); + NewMemoryInst = NewStore; + } else + NewMemoryInst = Builder.CreateMaskedStore( + DataParam, PtrParam, AlignOpt.valueOrOne(), MaskParam); + + break; + case Intrinsic::vp_load: + if (IsUnmasked) { + LoadInst *NewLoad = + Builder.CreateLoad(VPI.getType(), PtrParam, /*IsVolatile*/ false); + if (AlignOpt.has_value()) + NewLoad->setAlignment(AlignOpt.value()); + NewMemoryInst = NewLoad; + } else + NewMemoryInst = Builder.CreateMaskedLoad( + VPI.getType(), PtrParam, AlignOpt.valueOrOne(), MaskParam); + + break; + case Intrinsic::vp_scatter: { + auto *ElementType = + cast<VectorType>(DataParam->getType())->getElementType(); + NewMemoryInst = Builder.CreateMaskedScatter( + DataParam, PtrParam, + AlignOpt.value_or(DL.getPrefTypeAlign(ElementType)), MaskParam); + break; + } + case Intrinsic::vp_gather: { + auto *ElementType = cast<VectorType>(VPI.getType())->getElementType(); + NewMemoryInst = Builder.CreateMaskedGather( + VPI.getType(), PtrParam, + AlignOpt.value_or(DL.getPrefTypeAlign(ElementType)), MaskParam, nullptr, + VPI.getName()); + break; + } + } + + assert(NewMemoryInst); + replaceOperation(*NewMemoryInst, VPI); + return NewMemoryInst; +} + void CachingVPExpander::discardEVLParameter(VPIntrinsic &VPI) { LLVM_DEBUG(dbgs() << "Discard EVL parameter in " << VPI << "\n"); @@ -465,6 +538,16 @@ Value *CachingVPExpander::expandPredication(VPIntrinsic &VPI) { if (auto *VPRI = dyn_cast<VPReductionIntrinsic>(&VPI)) return expandPredicationInReduction(Builder, *VPRI); + switch (VPI.getIntrinsicID()) { + default: + break; + case Intrinsic::vp_load: + case Intrinsic::vp_store: + case Intrinsic::vp_gather: + case Intrinsic::vp_scatter: + return expandPredicationInMemoryIntrinsic(Builder, VPI); + } + return &VPI; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/FaultMaps.cpp b/contrib/llvm-project/llvm/lib/CodeGen/FaultMaps.cpp index 3ec666227651..3f8fe2402d65 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/FaultMaps.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/FaultMaps.cpp @@ -85,7 +85,7 @@ void FaultMaps::emitFunctionInfo(const MCSymbol *FnLabel, OS.emitInt32(0); // Reserved - for (auto &Fault : FFI) { + for (const auto &Fault : FFI) { LLVM_DEBUG(dbgs() << WFMP << " fault type: " << faultTypeToString(Fault.Kind) << "\n"); OS.emitInt32(Fault.Kind); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp index ac140e745600..6a0d1c33d3e3 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp @@ -319,7 +319,7 @@ const GISelInstProfileBuilder & GISelInstProfileBuilder::addNodeID(const MachineInstr *MI) const { addNodeIDMBB(MI->getParent()); addNodeIDOpcode(MI->getOpcode()); - for (auto &Op : MI->operands()) + for (const auto &Op : MI->operands()) addNodeIDMachineOperand(Op); addNodeIDFlag(MI->getFlags()); return *this; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp index b06043fb4c31..6c36c6445c65 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -116,7 +116,7 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB, // we'll pass to the assigner function. unsigned i = 0; unsigned NumFixedArgs = CB.getFunctionType()->getNumParams(); - for (auto &Arg : CB.args()) { + for (const auto &Arg : CB.args()) { ArgInfo OrigArg{ArgRegs[i], *Arg.get(), i, getAttributesForArgIdx(CB, i), i < NumFixedArgs}; setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, CB); @@ -960,7 +960,7 @@ bool CallLowering::parametersInCSRMatch( const SmallVectorImpl<CCValAssign> &OutLocs, const SmallVectorImpl<ArgInfo> &OutArgs) const { for (unsigned i = 0; i < OutLocs.size(); ++i) { - auto &ArgLoc = OutLocs[i]; + const auto &ArgLoc = OutLocs[i]; // If it's not a register, it's fine. if (!ArgLoc.isRegLoc()) continue; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index ad0c0c8315dc..da054b9c14fb 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -2385,7 +2385,7 @@ bool CombinerHelper::matchEqualDefs(const MachineOperand &MOP1, // loading from. To be safe, let's just assume that all loads and stores // are different (unless we have something which is guaranteed to not // change.) - if (I1->mayLoadOrStore() && !I1->isDereferenceableInvariantLoad(nullptr)) + if (I1->mayLoadOrStore() && !I1->isDereferenceableInvariantLoad()) return false; // If both instructions are loads or stores, they are equal only if both @@ -2396,7 +2396,7 @@ bool CombinerHelper::matchEqualDefs(const MachineOperand &MOP1, if (!LS1 || !LS2) return false; - if (!I2->isDereferenceableInvariantLoad(nullptr) || + if (!I2->isDereferenceableInvariantLoad() || (LS1->getMemSizeInBits() != LS2->getMemSizeInBits())) return false; } @@ -4800,24 +4800,22 @@ MachineInstr *CombinerHelper::buildUDivUsingMul(MachineInstr &MI) { auto BuildUDIVPattern = [&](const Constant *C) { auto *CI = cast<ConstantInt>(C); const APInt &Divisor = CI->getValue(); - UnsignedDivisonByConstantInfo magics = - UnsignedDivisonByConstantInfo::get(Divisor); + UnsignedDivisionByConstantInfo magics = + UnsignedDivisionByConstantInfo::get(Divisor); unsigned PreShift = 0, PostShift = 0; // If the divisor is even, we can avoid using the expensive fixup by // shifting the divided value upfront. - if (magics.IsAdd != 0 && !Divisor[0]) { + if (magics.IsAdd && !Divisor[0]) { PreShift = Divisor.countTrailingZeros(); // Get magic number for the shifted divisor. magics = - UnsignedDivisonByConstantInfo::get(Divisor.lshr(PreShift), PreShift); - assert(magics.IsAdd == 0 && "Should use cheap fixup now"); + UnsignedDivisionByConstantInfo::get(Divisor.lshr(PreShift), PreShift); + assert(!magics.IsAdd && "Should use cheap fixup now"); } - APInt Magic = magics.Magic; - unsigned SelNPQ; - if (magics.IsAdd == 0 || Divisor.isOneValue()) { + if (!magics.IsAdd || Divisor.isOneValue()) { assert(magics.ShiftAmount < Divisor.getBitWidth() && "We shouldn't generate an undefined shift!"); PostShift = magics.ShiftAmount; @@ -4829,7 +4827,7 @@ MachineInstr *CombinerHelper::buildUDivUsingMul(MachineInstr &MI) { PreShifts.push_back( MIB.buildConstant(ScalarShiftAmtTy, PreShift).getReg(0)); - MagicFactors.push_back(MIB.buildConstant(ScalarTy, Magic).getReg(0)); + MagicFactors.push_back(MIB.buildConstant(ScalarTy, magics.Magic).getReg(0)); NPQFactors.push_back( MIB.buildConstant(ScalarTy, SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1) diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 947facc87b71..dbdcfe0b6f0b 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -15,6 +15,7 @@ #include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ValueTracking.h" @@ -166,8 +167,10 @@ void IRTranslator::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<StackProtector>(); AU.addRequired<TargetPassConfig>(); AU.addRequired<GISelCSEAnalysisWrapperPass>(); - if (OptLevel != CodeGenOpt::None) + if (OptLevel != CodeGenOpt::None) { AU.addRequired<BranchProbabilityInfoWrapperPass>(); + AU.addRequired<AAResultsWrapperPass>(); + } AU.addRequired<TargetLibraryInfoWrapperPass>(); AU.addPreserved<TargetLibraryInfoWrapperPass>(); getSelectionDAGFallbackAnalysisUsage(AU); @@ -684,7 +687,7 @@ bool IRTranslator::translateSwitch(const User &U, MachineIRBuilder &MIB) { BranchProbabilityInfo *BPI = FuncInfo.BPI; CaseClusterVector Clusters; Clusters.reserve(SI.getNumCases()); - for (auto &I : SI.cases()) { + for (const auto &I : SI.cases()) { MachineBasicBlock *Succ = &getMBB(*I.getCaseSuccessor()); assert(Succ && "Could not find successor mbb in mapping"); const ConstantInt *CaseVal = I.getCaseValue(); @@ -1275,26 +1278,41 @@ static bool isSwiftError(const Value *V) { bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) { const LoadInst &LI = cast<LoadInst>(U); - if (DL->getTypeStoreSize(LI.getType()) == 0) + + unsigned StoreSize = DL->getTypeStoreSize(LI.getType()); + if (StoreSize == 0) return true; ArrayRef<Register> Regs = getOrCreateVRegs(LI); ArrayRef<uint64_t> Offsets = *VMap.getOffsets(LI); Register Base = getOrCreateVReg(*LI.getPointerOperand()); + AAMDNodes AAInfo = LI.getAAMetadata(); - Type *OffsetIRTy = DL->getIntPtrType(LI.getPointerOperandType()); + const Value *Ptr = LI.getPointerOperand(); + Type *OffsetIRTy = DL->getIntPtrType(Ptr->getType()); LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL); - if (CLI->supportSwiftError() && isSwiftError(LI.getPointerOperand())) { + if (CLI->supportSwiftError() && isSwiftError(Ptr)) { assert(Regs.size() == 1 && "swifterror should be single pointer"); - Register VReg = SwiftError.getOrCreateVRegUseAt(&LI, &MIRBuilder.getMBB(), - LI.getPointerOperand()); + Register VReg = + SwiftError.getOrCreateVRegUseAt(&LI, &MIRBuilder.getMBB(), Ptr); MIRBuilder.buildCopy(Regs[0], VReg); return true; } auto &TLI = *MF->getSubtarget().getTargetLowering(); MachineMemOperand::Flags Flags = TLI.getLoadMemOperandFlags(LI, *DL); + if (AA && !(Flags & MachineMemOperand::MOInvariant)) { + if (AA->pointsToConstantMemory( + MemoryLocation(Ptr, LocationSize::precise(StoreSize), AAInfo))) { + Flags |= MachineMemOperand::MOInvariant; + + // FIXME: pointsToConstantMemory probably does not imply dereferenceable, + // but the previous usage implied it did. Probably should check + // isDereferenceableAndAlignedPointer. + Flags |= MachineMemOperand::MODereferenceable; + } + } const MDNode *Ranges = Regs.size() == 1 ? LI.getMetadata(LLVMContext::MD_range) : nullptr; @@ -1306,7 +1324,7 @@ bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) { Align BaseAlign = getMemOpAlign(LI); auto MMO = MF->getMachineMemOperand( Ptr, Flags, MRI->getType(Regs[i]), - commonAlignment(BaseAlign, Offsets[i] / 8), LI.getAAMetadata(), Ranges, + commonAlignment(BaseAlign, Offsets[i] / 8), AAInfo, Ranges, LI.getSyncScopeID(), LI.getOrdering()); MIRBuilder.buildLoad(Regs[i], Addr, *MMO); } @@ -1400,7 +1418,7 @@ bool IRTranslator::translateInsertValue(const User &U, ArrayRef<uint64_t> DstOffsets = *VMap.getOffsets(U); ArrayRef<Register> SrcRegs = getOrCreateVRegs(*Src); ArrayRef<Register> InsertedRegs = getOrCreateVRegs(*U.getOperand(1)); - auto InsertedIt = InsertedRegs.begin(); + auto *InsertedIt = InsertedRegs.begin(); for (unsigned i = 0; i < DstRegs.size(); ++i) { if (DstOffsets[i] >= Offset && InsertedIt != InsertedRegs.end()) @@ -1563,9 +1581,9 @@ bool IRTranslator::translateGetElementPtr(const User &U, bool IRTranslator::translateMemFunc(const CallInst &CI, MachineIRBuilder &MIRBuilder, unsigned Opcode) { - + const Value *SrcPtr = CI.getArgOperand(1); // If the source is undef, then just emit a nop. - if (isa<UndefValue>(CI.getArgOperand(1))) + if (isa<UndefValue>(SrcPtr)) return true; SmallVector<Register, 3> SrcRegs; @@ -1595,15 +1613,20 @@ bool IRTranslator::translateMemFunc(const CallInst &CI, unsigned IsVol = cast<ConstantInt>(CI.getArgOperand(CI.arg_size() - 1))->getZExtValue(); + ConstantInt *CopySize = nullptr; + if (auto *MCI = dyn_cast<MemCpyInst>(&CI)) { DstAlign = MCI->getDestAlign().valueOrOne(); SrcAlign = MCI->getSourceAlign().valueOrOne(); + CopySize = dyn_cast<ConstantInt>(MCI->getArgOperand(2)); } else if (auto *MCI = dyn_cast<MemCpyInlineInst>(&CI)) { DstAlign = MCI->getDestAlign().valueOrOne(); SrcAlign = MCI->getSourceAlign().valueOrOne(); + CopySize = dyn_cast<ConstantInt>(MCI->getArgOperand(2)); } else if (auto *MMI = dyn_cast<MemMoveInst>(&CI)) { DstAlign = MMI->getDestAlign().valueOrOne(); SrcAlign = MMI->getSourceAlign().valueOrOne(); + CopySize = dyn_cast<ConstantInt>(MMI->getArgOperand(2)); } else { auto *MSI = cast<MemSetInst>(&CI); DstAlign = MSI->getDestAlign().valueOrOne(); @@ -1617,14 +1640,31 @@ bool IRTranslator::translateMemFunc(const CallInst &CI, } // Create mem operands to store the alignment and volatile info. - auto VolFlag = IsVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone; - ICall.addMemOperand(MF->getMachineMemOperand( - MachinePointerInfo(CI.getArgOperand(0)), - MachineMemOperand::MOStore | VolFlag, 1, DstAlign)); + MachineMemOperand::Flags LoadFlags = MachineMemOperand::MOLoad; + MachineMemOperand::Flags StoreFlags = MachineMemOperand::MOStore; + if (IsVol) { + LoadFlags |= MachineMemOperand::MOVolatile; + StoreFlags |= MachineMemOperand::MOVolatile; + } + + AAMDNodes AAInfo = CI.getAAMetadata(); + if (AA && CopySize && + AA->pointsToConstantMemory(MemoryLocation( + SrcPtr, LocationSize::precise(CopySize->getZExtValue()), AAInfo))) { + LoadFlags |= MachineMemOperand::MOInvariant; + + // FIXME: pointsToConstantMemory probably does not imply dereferenceable, + // but the previous usage implied it did. Probably should check + // isDereferenceableAndAlignedPointer. + LoadFlags |= MachineMemOperand::MODereferenceable; + } + + ICall.addMemOperand( + MF->getMachineMemOperand(MachinePointerInfo(CI.getArgOperand(0)), + StoreFlags, 1, DstAlign, AAInfo)); if (Opcode != TargetOpcode::G_MEMSET) ICall.addMemOperand(MF->getMachineMemOperand( - MachinePointerInfo(CI.getArgOperand(1)), - MachineMemOperand::MOLoad | VolFlag, 1, SrcAlign)); + MachinePointerInfo(SrcPtr), LoadFlags, 1, SrcAlign, AAInfo)); return true; } @@ -1785,7 +1825,7 @@ bool IRTranslator::translateSimpleIntrinsic(const CallInst &CI, // Yes. Let's translate it. SmallVector<llvm::SrcOp, 4> VRegs; - for (auto &Arg : CI.args()) + for (const auto &Arg : CI.args()) VRegs.push_back(getOrCreateVReg(*Arg)); MIRBuilder.buildInstr(Op, {getOrCreateVReg(CI)}, VRegs, @@ -2305,7 +2345,7 @@ bool IRTranslator::translateCallBase(const CallBase &CB, SmallVector<ArrayRef<Register>, 8> Args; Register SwiftInVReg = 0; Register SwiftErrorVReg = 0; - for (auto &Arg : CB.args()) { + for (const auto &Arg : CB.args()) { if (CLI->supportSwiftError() && isSwiftError(Arg)) { assert(SwiftInVReg == 0 && "Expected only one swift error argument"); LLT Ty = getLLTForType(*Arg->getType(), *DL); @@ -2394,7 +2434,7 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { if (isa<FPMathOperator>(CI)) MIB->copyIRFlags(CI); - for (auto &Arg : enumerate(CI.args())) { + for (const auto &Arg : enumerate(CI.args())) { // If this is required to be an immediate, don't materialize it in a // register. if (CI.paramHasAttr(Arg.index(), Attribute::ImmArg)) { @@ -2947,7 +2987,7 @@ void IRTranslator::finishPendingPhis() { for (unsigned i = 0; i < PI->getNumIncomingValues(); ++i) { auto IRPred = PI->getIncomingBlock(i); ArrayRef<Register> ValRegs = getOrCreateVRegs(*PI->getIncomingValue(i)); - for (auto Pred : getMachinePredBBs({IRPred, PI->getParent()})) { + for (auto *Pred : getMachinePredBBs({IRPred, PI->getParent()})) { if (SeenPreds.count(Pred) || !PhiMBB->isPredecessor(Pred)) continue; SeenPreds.insert(Pred); @@ -3347,10 +3387,13 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { TM.resetTargetOptions(F); EnableOpts = OptLevel != CodeGenOpt::None && !skipFunction(F); FuncInfo.MF = MF; - if (EnableOpts) + if (EnableOpts) { + AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); FuncInfo.BPI = &getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI(); - else + } else { + AA = nullptr; FuncInfo.BPI = nullptr; + } FuncInfo.CanLowerReturn = CLI->checkReturnTypeForCallConv(*MF); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp index 95ae8383b6fa..e0357c50e555 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp @@ -332,6 +332,8 @@ bool InlineAsmLowering::lowerInlineAsm( } ++ResNo; } else { + assert(OpInfo.Type != InlineAsm::isLabel && + "GlobalISel currently doesn't support callbr"); OpInfo.ConstraintVT = MVT::Other; } @@ -427,7 +429,8 @@ bool InlineAsmLowering::lowerInlineAsm( } break; - case InlineAsm::isInput: { + case InlineAsm::isInput: + case InlineAsm::isLabel: { if (OpInfo.isMatchingInputConstraint()) { unsigned DefIdx = OpInfo.getMatchedOperand(); // Find operand with register def that corresponds to DefIdx. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index fb046d519ac8..52ee13757f27 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -2393,30 +2393,14 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { return Legalized; } case TargetOpcode::G_FCONSTANT: { + // To avoid changing the bits of the constant due to extension to a larger + // type and then using G_FPTRUNC, we simply convert to a G_CONSTANT. MachineOperand &SrcMO = MI.getOperand(1); - LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); - APFloat Val = SrcMO.getFPImm()->getValueAPF(); - bool LosesInfo; - switch (WideTy.getSizeInBits()) { - case 32: - Val.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, - &LosesInfo); - break; - case 64: - Val.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, - &LosesInfo); - break; - default: - return UnableToLegalize; - } - - assert(!LosesInfo && "extend should always be lossless"); - - Observer.changingInstr(MI); - SrcMO.setFPImm(ConstantFP::get(Ctx, Val)); - - widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); - Observer.changedInstr(MI); + APInt Val = SrcMO.getFPImm()->getValueAPF().bitcastToAPInt(); + MIRBuilder.setInstrAndDebugLoc(MI); + auto IntCst = MIRBuilder.buildConstant(MI.getOperand(0).getReg(), Val); + widenScalarDst(*IntCst, WideTy, 0, TargetOpcode::G_TRUNC); + MI.eraseFromParent(); return Legalized; } case TargetOpcode::G_IMPLICIT_DEF: { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp index d4fbf7d15089..be1bc865d1e1 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp @@ -298,7 +298,7 @@ bool LoadStoreOpt::mergeStores(SmallVectorImpl<GStore *> &StoresToMerge) { const auto &LegalSizes = LegalStoreSizes[AS]; #ifndef NDEBUG - for (auto StoreMI : StoresToMerge) + for (auto *StoreMI : StoresToMerge) assert(MRI->getType(StoreMI->getValueReg()) == OrigTy); #endif @@ -366,7 +366,7 @@ bool LoadStoreOpt::doSingleStoreMerge(SmallVectorImpl<GStore *> &Stores) { // directly. Otherwise, we need to generate some instructions to merge the // existing values together into a wider type. SmallVector<APInt, 8> ConstantVals; - for (auto Store : Stores) { + for (auto *Store : Stores) { auto MaybeCst = getIConstantVRegValWithLookThrough(Store->getValueReg(), *MRI); if (!MaybeCst) { @@ -415,7 +415,7 @@ bool LoadStoreOpt::doSingleStoreMerge(SmallVectorImpl<GStore *> &Stores) { return R; }); - for (auto MI : Stores) + for (auto *MI : Stores) InstsToErase.insert(MI); return true; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index 0d9580e25606..2e22dae35e5a 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -646,7 +646,7 @@ MachineIRBuilder::buildBuildVectorConstant(const DstOp &Res, SmallVector<SrcOp> TmpVec; TmpVec.reserve(Ops.size()); LLT EltTy = Res.getLLTTy(*getMRI()).getElementType(); - for (auto &Op : Ops) + for (const auto &Op : Ops) TmpVec.push_back(buildConstant(EltTy, Op)); return buildInstr(TargetOpcode::G_BUILD_VECTOR, Res, TmpVec); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp index 7781761bc131..013c8700e8ae 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -228,7 +228,7 @@ bool llvm::isTriviallyDead(const MachineInstr &MI, return false; // Instructions without side-effects are dead iff they only define dead vregs. - for (auto &MO : MI.operands()) { + for (const auto &MO : MI.operands()) { if (!MO.isReg() || !MO.isDef()) continue; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/HardwareLoops.cpp b/contrib/llvm-project/llvm/lib/CodeGen/HardwareLoops.cpp index 67d6a3df7807..258ad1931b12 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/HardwareLoops.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/HardwareLoops.cpp @@ -332,7 +332,7 @@ void HardwareLoop::Create() { // Run through the basic blocks of the loop and see if any of them have dead // PHIs that can be removed. - for (auto I : L->blocks()) + for (auto *I : L->blocks()) DeleteDeadPHIs(I); } @@ -407,13 +407,13 @@ Value *HardwareLoop::InitLoopCount() { BasicBlock *Predecessor = BB->getSinglePredecessor(); // If it's not safe to create a while loop then don't force it and create a // do-while loop instead - if (!isSafeToExpandAt(ExitCount, Predecessor->getTerminator(), SE)) + if (!SCEVE.isSafeToExpandAt(ExitCount, Predecessor->getTerminator())) UseLoopGuard = false; else BB = Predecessor; } - if (!isSafeToExpandAt(ExitCount, BB->getTerminator(), SE)) { + if (!SCEVE.isSafeToExpandAt(ExitCount, BB->getTerminator())) { LLVM_DEBUG(dbgs() << "- Bailing, unsafe to expand ExitCount " << *ExitCount << "\n"); return nullptr; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ImplicitNullChecks.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ImplicitNullChecks.cpp index fc97938ccd3e..da6ec76bd770 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/ImplicitNullChecks.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/ImplicitNullChecks.cpp @@ -758,7 +758,7 @@ void ImplicitNullChecks::rewriteNullChecks( ArrayRef<ImplicitNullChecks::NullCheck> NullCheckList) { DebugLoc DL; - for (auto &NC : NullCheckList) { + for (const auto &NC : NullCheckList) { // Remove the conditional branch dependent on the null check. unsigned BranchesRemoved = TII->removeBranch(*NC.getCheckBlock()); (void)BranchesRemoved; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp b/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp index 06c660807c5c..3ea1d6c7f1ef 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp @@ -86,7 +86,6 @@ class HoistSpillHelper : private LiveRangeEdit::Delegate { MachineFunction &MF; LiveIntervals &LIS; LiveStacks &LSS; - AliasAnalysis *AA; MachineDominatorTree &MDT; MachineLoopInfo &Loops; VirtRegMap &VRM; @@ -140,7 +139,6 @@ public: VirtRegMap &vrm) : MF(mf), LIS(pass.getAnalysis<LiveIntervals>()), LSS(pass.getAnalysis<LiveStacks>()), - AA(&pass.getAnalysis<AAResultsWrapperPass>().getAAResults()), MDT(pass.getAnalysis<MachineDominatorTree>()), Loops(pass.getAnalysis<MachineLoopInfo>()), VRM(vrm), MRI(mf.getRegInfo()), TII(*mf.getSubtarget().getInstrInfo()), @@ -159,7 +157,6 @@ class InlineSpiller : public Spiller { MachineFunction &MF; LiveIntervals &LIS; LiveStacks &LSS; - AliasAnalysis *AA; MachineDominatorTree &MDT; MachineLoopInfo &Loops; VirtRegMap &VRM; @@ -200,7 +197,6 @@ public: VirtRegAuxInfo &VRAI) : MF(MF), LIS(Pass.getAnalysis<LiveIntervals>()), LSS(Pass.getAnalysis<LiveStacks>()), - AA(&Pass.getAnalysis<AAResultsWrapperPass>().getAAResults()), MDT(Pass.getAnalysis<MachineDominatorTree>()), Loops(Pass.getAnalysis<MachineLoopInfo>()), VRM(VRM), MRI(MF.getRegInfo()), TII(*MF.getSubtarget().getInstrInfo()), @@ -659,7 +655,7 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) { /// reMaterializeAll - Try to rematerialize as many uses as possible, /// and trim the live ranges after. void InlineSpiller::reMaterializeAll() { - if (!Edit->anyRematerializable(AA)) + if (!Edit->anyRematerializable()) return; UsedValues.clear(); @@ -702,7 +698,7 @@ void InlineSpiller::reMaterializeAll() { if (DeadDefs.empty()) return; LLVM_DEBUG(dbgs() << "Remat created " << DeadDefs.size() << " dead defs.\n"); - Edit->eliminateDeadDefs(DeadDefs, RegsToSpill, AA); + Edit->eliminateDeadDefs(DeadDefs, RegsToSpill); // LiveRangeEdit::eliminateDeadDef is used to remove dead define instructions // after rematerialization. To remove a VNI for a vreg from its LiveInterval, @@ -1180,7 +1176,7 @@ void InlineSpiller::spillAll() { // Hoisted spills may cause dead code. if (!DeadDefs.empty()) { LLVM_DEBUG(dbgs() << "Eliminating " << DeadDefs.size() << " dead defs\n"); - Edit->eliminateDeadDefs(DeadDefs, RegsToSpill, AA); + Edit->eliminateDeadDefs(DeadDefs, RegsToSpill); } // Finally delete the SnippetCopies. @@ -1298,7 +1294,7 @@ void HoistSpillHelper::rmRedundantSpills( // For each spill saw, check SpillBBToSpill[] and see if its BB already has // another spill inside. If a BB contains more than one spill, only keep the // earlier spill with smaller SlotIndex. - for (const auto CurrentSpill : Spills) { + for (auto *const CurrentSpill : Spills) { MachineBasicBlock *Block = CurrentSpill->getParent(); MachineDomTreeNode *Node = MDT.getBase().getNode(Block); MachineInstr *PrevSpill = SpillBBToSpill[Node]; @@ -1313,7 +1309,7 @@ void HoistSpillHelper::rmRedundantSpills( SpillBBToSpill[MDT.getBase().getNode(Block)] = CurrentSpill; } } - for (const auto SpillToRm : SpillsToRm) + for (auto *const SpillToRm : SpillsToRm) Spills.erase(SpillToRm); } @@ -1347,7 +1343,7 @@ void HoistSpillHelper::getVisitOrders( // the path starting from the first node with non-redundant spill to the Root // node will be added to the WorkSet, which will contain all the possible // locations where spills may be hoisted to after the loop below is done. - for (const auto Spill : Spills) { + for (auto *const Spill : Spills) { MachineBasicBlock *Block = Spill->getParent(); MachineDomTreeNode *Node = MDT[Block]; MachineInstr *SpillToRm = nullptr; @@ -1492,7 +1488,7 @@ void HoistSpillHelper::runHoistSpills( : BranchProbability(1, 1); if (SubTreeCost > MBFI.getBlockFreq(Block) * MarginProb) { // Hoist: Move spills to current Block. - for (const auto SpillBB : SpillsInSubTree) { + for (auto *const SpillBB : SpillsInSubTree) { // When SpillBB is a BB contains original spill, insert the spill // to SpillsToRm. if (SpillsToKeep.find(SpillBB) != SpillsToKeep.end() && @@ -1609,7 +1605,7 @@ void HoistSpillHelper::hoistAllSpills() { // Remove redundant spills or change them to dead instructions. NumSpills -= SpillsToRm.size(); - for (auto const RMEnt : SpillsToRm) { + for (auto *const RMEnt : SpillsToRm) { RMEnt->setDesc(TII.get(TargetOpcode::KILL)); for (unsigned i = RMEnt->getNumOperands(); i; --i) { MachineOperand &MO = RMEnt->getOperand(i - 1); @@ -1617,7 +1613,7 @@ void HoistSpillHelper::hoistAllSpills() { RMEnt->removeOperand(i - 1); } } - Edit.eliminateDeadDefs(SpillsToRm, None, AA); + Edit.eliminateDeadDefs(SpillsToRm, None); } } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/InterleavedAccessPass.cpp index 55f3ad796291..0582378be4cd 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/InterleavedAccessPass.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/InterleavedAccessPass.cpp @@ -541,7 +541,7 @@ bool InterleavedAccess::runOnFunction(Function &F) { Changed |= lowerInterleavedStore(SI, DeadInsts); } - for (auto I : DeadInsts) + for (auto *I : DeadInsts) I->eraseFromParent(); return Changed; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp index 43858071025a..a0f304659bca 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp @@ -528,8 +528,8 @@ public: if (B.size() != o.B.size()) return false; - auto ob = o.B.begin(); - for (auto &b : B) { + auto *ob = o.B.begin(); + for (const auto &b : B) { if (b != *ob) return false; ob++; @@ -1154,7 +1154,7 @@ bool InterleavedLoadCombineImpl::combine(std::list<VectorInfo> &InterleavedLoad, // Test if all participating instruction will be dead after the // transformation. If intermediate results are used, no performance gain can // be expected. Also sum the cost of the Instructions beeing left dead. - for (auto &I : Is) { + for (const auto &I : Is) { // Compute the old cost InstructionCost += TTI.getInstructionCost(I, CostKind); @@ -1182,7 +1182,7 @@ bool InterleavedLoadCombineImpl::combine(std::list<VectorInfo> &InterleavedLoad, // that the corresponding defining access dominates first LI. This guarantees // that there are no aliasing stores in between the loads. auto FMA = MSSA.getMemoryAccess(First); - for (auto LI : LIs) { + for (auto *LI : LIs) { auto MADef = MSSA.getMemoryAccess(LI)->getDefiningAccess(); if (!MSSA.dominates(MADef, FMA)) return false; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp index 43c12c67939e..ef49d3888f2b 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp @@ -284,7 +284,7 @@ public: // Initialized the preferred-location map with illegal locations, to be // filled in later. - for (auto &VLoc : VLocs) + for (const auto &VLoc : VLocs) if (VLoc.second.Kind == DbgValue::Def) ValueToLoc.insert({VLoc.second.ID, LocIdx::MakeIllegalLoc()}); @@ -507,7 +507,7 @@ public: // date. Wipe old tracking data for the location if it's been clobbered in // the meantime. if (MTracker->readMLoc(NewLoc) != VarLocs[NewLoc.asU64()]) { - for (auto &P : ActiveMLocs[NewLoc]) { + for (const auto &P : ActiveMLocs[NewLoc]) { ActiveVLocs.erase(P); } ActiveMLocs[NewLoc.asU64()].clear(); @@ -560,7 +560,7 @@ public: // explicitly undef, then stop here. if (!NewLoc && !MakeUndef) { // Try and recover a few more locations with entry values. - for (auto &Var : ActiveMLocIt->second) { + for (const auto &Var : ActiveMLocIt->second) { auto &Prop = ActiveVLocs.find(Var)->second.Properties; recoverAsEntryValue(Var, Prop, OldValue); } @@ -570,7 +570,7 @@ public: // Examine all the variables based on this location. DenseSet<DebugVariable> NewMLocs; - for (auto &Var : ActiveMLocIt->second) { + for (const auto &Var : ActiveMLocIt->second) { auto ActiveVLocIt = ActiveVLocs.find(Var); // Re-state the variable location: if there's no replacement then NewLoc // is None and a $noreg DBG_VALUE will be created. Otherwise, a DBG_VALUE @@ -623,7 +623,7 @@ public: VarLocs[Dst.asU64()] = VarLocs[Src.asU64()]; // For each variable based on Src; create a location at Dst. - for (auto &Var : MovingVars) { + for (const auto &Var : MovingVars) { auto ActiveVLocIt = ActiveVLocs.find(Var); assert(ActiveVLocIt != ActiveVLocs.end()); ActiveVLocIt->second.Loc = Dst; @@ -1224,7 +1224,7 @@ bool InstrRefBasedLDV::transferDebugInstrRef(MachineInstr &MI, // FIXME: no index for this? Register Reg = MTracker->LocIdxToLocID[L]; const TargetRegisterClass *TRC = nullptr; - for (auto *TRCI : TRI->regclasses()) + for (const auto *TRCI : TRI->regclasses()) if (TRCI->contains(Reg)) TRC = TRCI; assert(TRC && "Couldn't find target register class?"); @@ -1454,7 +1454,7 @@ void InstrRefBasedLDV::transferRegisterDef(MachineInstr &MI) { for (uint32_t DeadReg : DeadRegs) MTracker->defReg(DeadReg, CurBB, CurInst); - for (auto *MO : RegMaskPtrs) + for (const auto *MO : RegMaskPtrs) MTracker->writeRegMask(MO, CurBB, CurInst); // If this instruction writes to a spill slot, def that slot. @@ -1493,7 +1493,7 @@ void InstrRefBasedLDV::transferRegisterDef(MachineInstr &MI) { if (IgnoreSPAlias(Reg)) continue; - for (auto *MO : RegMaskPtrs) + for (const auto *MO : RegMaskPtrs) if (MO->clobbersPhysReg(Reg)) TTracker->clobberMloc(L.Idx, MI.getIterator(), false); } @@ -1822,7 +1822,7 @@ void InstrRefBasedLDV::accumulateFragmentMap(MachineInstr &MI) { // Otherwise, examine all other seen fragments for this variable, with "this" // fragment being a previously unseen fragment. Record any pair of // overlapping fragments. - for (auto &ASeenFragment : AllSeenFragments) { + for (const auto &ASeenFragment : AllSeenFragments) { // Does this previously seen fragment overlap? if (DIExpression::fragmentsOverlap(ThisFragment, ASeenFragment)) { // Yes: Mark the current fragment as being overlapped. @@ -1993,7 +1993,7 @@ bool InstrRefBasedLDV::mlocJoin( // redundant PHI that we can eliminate. SmallVector<const MachineBasicBlock *, 8> BlockOrders; - for (auto Pred : MBB.predecessors()) + for (auto *Pred : MBB.predecessors()) BlockOrders.push_back(Pred); // Visit predecessors in RPOT order. @@ -2313,7 +2313,7 @@ void InstrRefBasedLDV::buildMLocValueMap( // All successors should be visited: put any back-edges on the pending // list for the next pass-through, and any other successors to be // visited this pass, if they're not going to be already. - for (auto s : MBB->successors()) { + for (auto *s : MBB->successors()) { // Does branching to this successor represent a back-edge? if (BBToOrder[s] > BBToOrder[MBB]) { // No: visit it during this dataflow iteration. @@ -2367,7 +2367,7 @@ Optional<ValueIDNum> InstrRefBasedLDV::pickVPHILoc( if (BlockOrders.empty()) return None; - for (auto p : BlockOrders) { + for (const auto *p : BlockOrders) { unsigned ThisBBNum = p->getNumber(); auto OutValIt = LiveOuts.find(p); if (OutValIt == LiveOuts.end()) @@ -2422,7 +2422,7 @@ Optional<ValueIDNum> InstrRefBasedLDV::pickVPHILoc( // Check that all properties are the same. We can't pick a location if they're // not. const DbgValueProperties *Properties0 = Properties[0]; - for (auto *Prop : Properties) + for (const auto *Prop : Properties) if (*Prop != *Properties0) return None; @@ -2472,7 +2472,7 @@ bool InstrRefBasedLDV::vlocJoin( SmallVector<InValueT, 8> Values; bool Bail = false; int BackEdgesStart = 0; - for (auto p : BlockOrders) { + for (auto *p : BlockOrders) { // If the predecessor isn't in scope / to be explored, we'll never be // able to join any locations. if (!BlocksToExplore.contains(p)) { @@ -2577,7 +2577,7 @@ void InstrRefBasedLDV::getBlocksForScope( // instructions in scope at all. To accurately replicate VarLoc // LiveDebugValues, this means exploring all artificial successors too. // Perform a depth-first-search to enumerate those blocks. - for (auto *MBB : BlocksToExplore) { + for (const auto *MBB : BlocksToExplore) { // Depth-first-search state: each node is a block and which successor // we're currently exploring. SmallVector<std::pair<const MachineBasicBlock *, @@ -2662,7 +2662,7 @@ void InstrRefBasedLDV::buildVLocValueMap( MutBlocksToExplore.insert(const_cast<MachineBasicBlock *>(MBB)); // Picks out relevants blocks RPO order and sort them. - for (auto *MBB : BlocksToExplore) + for (const auto *MBB : BlocksToExplore) BlockOrders.push_back(const_cast<MachineBasicBlock *>(MBB)); llvm::sort(BlockOrders, Cmp); @@ -2696,7 +2696,7 @@ void InstrRefBasedLDV::buildVLocValueMap( // between blocks. This keeps the locality of working on one lexical scope at // at time, but avoids re-processing variable values because some other // variable has been assigned. - for (auto &Var : VarsWeCareAbout) { + for (const auto &Var : VarsWeCareAbout) { // Re-initialize live-ins and live-outs, to clear the remains of previous // variables live-ins / live-outs. for (unsigned int I = 0; I < NumBlocks; ++I) { @@ -2823,7 +2823,7 @@ void InstrRefBasedLDV::buildVLocValueMap( // We should visit all successors. Ensure we'll visit any non-backedge // successors during this dataflow iteration; book backedge successors // to be visited next time around. - for (auto s : MBB->successors()) { + for (auto *s : MBB->successors()) { // Ignore out of scope / not-to-be-explored successors. if (LiveInIdx.find(s) == LiveInIdx.end()) continue; @@ -2906,7 +2906,7 @@ void InstrRefBasedLDV::placePHIsForSingleVarDefinition( #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void InstrRefBasedLDV::dump_mloc_transfer( const MLocTransferMap &mloc_transfer) const { - for (auto &P : mloc_transfer) { + for (const auto &P : mloc_transfer) { std::string foo = MTracker->LocIdxToName(P.first); std::string bar = MTracker->IDAsString(P.second); dbgs() << "Loc " << foo << " --> " << bar << "\n"; @@ -2993,7 +2993,7 @@ void InstrRefBasedLDV::makeDepthFirstEjectionMap( if (DILocationIt != ScopeToDILocation.end()) { getBlocksForScope(DILocationIt->second, BlocksToExplore, ScopeToAssignBlocks.find(WS)->second); - for (auto *MBB : BlocksToExplore) { + for (const auto *MBB : BlocksToExplore) { unsigned BBNum = MBB->getNumber(); if (EjectionMap[BBNum] == 0) EjectionMap[BBNum] = WS->getDFSOut(); @@ -3100,7 +3100,7 @@ bool InstrRefBasedLDV::depthFirstVLocAndEmit( getBlocksForScope(DILocationIt->second, BlocksToExplore, ScopeToAssignBlocks.find(WS)->second); - for (auto *MBB : BlocksToExplore) + for (const auto *MBB : BlocksToExplore) if (WS->getDFSOut() == EjectionMap[MBB->getNumber()]) EjectBlock(const_cast<MachineBasicBlock &>(*MBB)); @@ -3709,10 +3709,9 @@ Optional<ValueIDNum> InstrRefBasedLDV::resolveDbgPHIsImpl( for (auto &PHI : CreatedPHIs) SortedPHIs.push_back(PHI); - std::sort( - SortedPHIs.begin(), SortedPHIs.end(), [&](LDVSSAPhi *A, LDVSSAPhi *B) { - return BBToOrder[&A->getParent()->BB] < BBToOrder[&B->getParent()->BB]; - }); + llvm::sort(SortedPHIs, [&](LDVSSAPhi *A, LDVSSAPhi *B) { + return BBToOrder[&A->getParent()->BB] < BBToOrder[&B->getParent()->BB]; + }); for (auto &PHI : SortedPHIs) { ValueIDNum ThisBlockValueNum = diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp index 24c00b8a10ec..32e07eb77efe 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp @@ -1874,7 +1874,7 @@ void VarLocBasedLDV::accumulateFragmentMap(MachineInstr &MI, // Otherwise, examine all other seen fragments for this variable, with "this" // fragment being a previously unseen fragment. Record any pair of // overlapping fragments. - for (auto &ASeenFragment : AllSeenFragments) { + for (const auto &ASeenFragment : AllSeenFragments) { // Does this previously seen fragment overlap? if (DIExpression::fragmentsOverlap(ThisFragment, ASeenFragment)) { // Yes: Mark the current fragment as being overlapped. @@ -1922,7 +1922,7 @@ bool VarLocBasedLDV::join( // For all predecessors of this MBB, find the set of VarLocs that // can be joined. int NumVisited = 0; - for (auto p : MBB.predecessors()) { + for (auto *p : MBB.predecessors()) { // Ignore backedges if we have not visited the predecessor yet. As the // predecessor hasn't yet had locations propagated into it, most locations // will not yet be valid, so treat them as all being uninitialized and @@ -2246,7 +2246,7 @@ bool VarLocBasedLDV::ExtendRanges(MachineFunction &MF, if (OLChanged) { OLChanged = false; - for (auto s : MBB->successors()) + for (auto *s : MBB->successors()) if (OnPending.insert(s).second) { Pending.push(BBToOrder[s]); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp index 35cf25330186..574c0f98161e 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp @@ -1891,7 +1891,7 @@ void LDVImpl::emitDebugValues(VirtRegMap *VRM) { // insert position, insert all instructions at the same SlotIdx. They are // guaranteed to appear in-sequence in StashedDebugInstrs because we insert // them in order. - for (auto StashIt = StashedDebugInstrs.begin(); + for (auto *StashIt = StashedDebugInstrs.begin(); StashIt != StashedDebugInstrs.end(); ++StashIt) { SlotIndex Idx = StashIt->Idx; MachineBasicBlock *MBB = StashIt->MBB; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp index 1242ce20b732..8a76048bb8c4 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp @@ -19,7 +19,6 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/iterator_range.h" -#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/LiveIntervalCalc.h" #include "llvm/CodeGen/LiveVariables.h" @@ -60,9 +59,8 @@ using namespace llvm; char LiveIntervals::ID = 0; char &llvm::LiveIntervalsID = LiveIntervals::ID; -INITIALIZE_PASS_BEGIN(LiveIntervals, "liveintervals", - "Live Interval Analysis", false, false) -INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_BEGIN(LiveIntervals, "liveintervals", "Live Interval Analysis", + false, false) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(SlotIndexes) INITIALIZE_PASS_END(LiveIntervals, "liveintervals", @@ -87,8 +85,6 @@ cl::opt<bool> UseSegmentSetForPhysRegs( void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); - AU.addRequired<AAResultsWrapperPass>(); - AU.addPreserved<AAResultsWrapperPass>(); AU.addPreserved<LiveVariables>(); AU.addPreservedID(MachineLoopInfoID); AU.addRequiredTransitiveID(MachineDominatorsID); @@ -126,7 +122,6 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) { MRI = &MF->getRegInfo(); TRI = MF->getSubtarget().getRegisterInfo(); TII = MF->getSubtarget().getInstrInfo(); - AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); Indexes = &getAnalysis<SlotIndexes>(); DomTree = &getAnalysis<MachineDominatorTree>(); @@ -1417,7 +1412,7 @@ private: NewIdxDef.getRegSlot(), (NewIdxOut + 1)->end, OldIdxVNI); OldIdxVNI->def = NewIdxDef; // Modify subsequent segments to be defined by the moved def OldIdxVNI. - for (auto Idx = NewIdxOut + 2; Idx <= OldIdxOut; ++Idx) + for (auto *Idx = NewIdxOut + 2; Idx <= OldIdxOut; ++Idx) Idx->valno = OldIdxVNI; // Aggressively remove all dead flags from the former dead definition. // Kill/dead flags shouldn't be used while live intervals exist; they @@ -1662,7 +1657,7 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB, ArrayRef<Register> OrigRegs) { // Find anchor points, which are at the beginning/end of blocks or at // instructions that already have indexes. - while (Begin != MBB->begin() && !Indexes->hasIndex(*Begin)) + while (Begin != MBB->begin() && !Indexes->hasIndex(*std::prev(Begin))) --Begin; while (End != MBB->end() && !Indexes->hasIndex(*End)) ++End; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp index 58eb4110f153..2aafb746aa2c 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp @@ -68,17 +68,16 @@ Register LiveRangeEdit::createFrom(Register OldReg) { } bool LiveRangeEdit::checkRematerializable(VNInfo *VNI, - const MachineInstr *DefMI, - AAResults *aa) { + const MachineInstr *DefMI) { assert(DefMI && "Missing instruction"); ScannedRemattable = true; - if (!TII.isTriviallyReMaterializable(*DefMI, aa)) + if (!TII.isTriviallyReMaterializable(*DefMI)) return false; Remattable.insert(VNI); return true; } -void LiveRangeEdit::scanRemattable(AAResults *aa) { +void LiveRangeEdit::scanRemattable() { for (VNInfo *VNI : getParent().valnos) { if (VNI->isUnused()) continue; @@ -90,14 +89,14 @@ void LiveRangeEdit::scanRemattable(AAResults *aa) { MachineInstr *DefMI = LIS.getInstructionFromIndex(OrigVNI->def); if (!DefMI) continue; - checkRematerializable(OrigVNI, DefMI, aa); + checkRematerializable(OrigVNI, DefMI); } ScannedRemattable = true; } -bool LiveRangeEdit::anyRematerializable(AAResults *aa) { +bool LiveRangeEdit::anyRematerializable() { if (!ScannedRemattable) - scanRemattable(aa); + scanRemattable(); return !Remattable.empty(); } @@ -274,8 +273,7 @@ bool LiveRangeEdit::useIsKill(const LiveInterval &LI, } /// Find all live intervals that need to shrink, then remove the instruction. -void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink, - AAResults *AA) { +void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink) { assert(MI->allDefsAreDead() && "Def isn't really dead"); SlotIndex Idx = LIS.getInstructionIndex(*MI).getRegSlot(); @@ -384,7 +382,7 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink, // register uses. That may provoke RA to split an interval at the KILL // and later result in an invalid live segment end. if (isOrigDef && DeadRemats && !HasLiveVRegUses && - TII.isTriviallyReMaterializable(*MI, AA)) { + TII.isTriviallyReMaterializable(*MI)) { LiveInterval &NewLI = createEmptyIntervalFrom(Dest, false); VNInfo *VNI = NewLI.getNextValue(Idx, LIS.getVNInfoAllocator()); NewLI.addSegment(LiveInterval::Segment(Idx, Idx.getDeadSlot(), VNI)); @@ -414,14 +412,13 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink, } void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr *> &Dead, - ArrayRef<Register> RegsBeingSpilled, - AAResults *AA) { + ArrayRef<Register> RegsBeingSpilled) { ToShrinkSet ToShrink; for (;;) { // Erase all dead defs. while (!Dead.empty()) - eliminateDeadDef(Dead.pop_back_val(), ToShrink, AA); + eliminateDeadDef(Dead.pop_back_val(), ToShrink); if (ToShrink.empty()) break; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp index 94bdfab5e5e0..40250171fe1e 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp @@ -758,8 +758,7 @@ void LiveVariables::replaceKillInstruction(Register Reg, MachineInstr &OldMI, /// removeVirtualRegistersKilled - Remove all killed info for the specified /// instruction. void LiveVariables::removeVirtualRegistersKilled(MachineInstr &MI) { - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI.getOperand(i); + for (MachineOperand &MO : MI.operands()) { if (MO.isReg() && MO.isKill()) { MO.setIsKill(false); Register Reg = MO.getReg(); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LowerEmuTLS.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LowerEmuTLS.cpp index 984dc452fbfd..a517ee3794ca 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LowerEmuTLS.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LowerEmuTLS.cpp @@ -78,7 +78,7 @@ bool LowerEmuTLS::runOnModule(Module &M) { if (G.isThreadLocal()) TlsVars.append({&G}); } - for (const auto G : TlsVars) + for (const auto *const G : TlsVars) Changed |= addEmuTlsVar(M, G); return Changed; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp index eea24d8e9353..3e7b4dbc9d71 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp @@ -129,7 +129,7 @@ static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount, // Calculates the distance of MI from the beginning of its parent BB. auto getInstrIdx = [](const MachineInstr &MI) { unsigned i = 0; - for (auto &CurMI : *MI.getParent()) { + for (const auto &CurMI : *MI.getParent()) { if (&CurMI == &MI) return i; i++; @@ -416,7 +416,7 @@ bool MIRCanonicalizer::runOnMachineFunction(MachineFunction &MF) { bool Changed = false; MachineRegisterInfo &MRI = MF.getRegInfo(); VRegRenamer Renamer(MRI); - for (auto MBB : RPOList) + for (auto *MBB : RPOList) Changed |= runOnBasicBlock(MBB, BBNum++, Renamer); return Changed; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp index 0c94e1f7e474..e3d6b59c5077 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp @@ -3383,7 +3383,7 @@ static void initSlots2BasicBlocks( DenseMap<unsigned, const BasicBlock *> &Slots2BasicBlocks) { ModuleSlotTracker MST(F.getParent(), /*ShouldInitializeAllMetadata=*/false); MST.incorporateFunction(F); - for (auto &BB : F) { + for (const auto &BB : F) { if (BB.hasName()) continue; int Slot = MST.getLocalSlot(&BB); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp index 4944cb46c5b5..aa9522bc3459 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp @@ -437,7 +437,7 @@ void MIRParserImpl::setupDebugValueTracking( MF.setDebugInstrNumberingCount(MaxInstrNum); // Load any substitutions. - for (auto &Sub : YamlMF.DebugValueSubstitutions) { + for (const auto &Sub : YamlMF.DebugValueSubstitutions) { MF.makeDebugValueSubstitution({Sub.SrcInst, Sub.SrcOp}, {Sub.DstInst, Sub.DstOp}, Sub.Subreg); } @@ -975,7 +975,7 @@ bool MIRParserImpl::parseMachineMetadata(PerFunctionMIParsingState &PFS, bool MIRParserImpl::parseMachineMetadataNodes( PerFunctionMIParsingState &PFS, MachineFunction &MF, const yaml::MachineFunction &YMF) { - for (auto &MDS : YMF.MachineMetadataNodes) { + for (const auto &MDS : YMF.MachineMetadataNodes) { if (parseMachineMetadata(PFS, MDS)) return true; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp index 7daf9025d303..d21d552227cf 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp @@ -13,10 +13,9 @@ #include "AllocationOrder.h" #include "RegAllocEvictionAdvisor.h" #include "RegAllocGreedy.h" -#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/MLModelRunner.h" #include "llvm/Analysis/TensorSpec.h" -#if defined(LLVM_HAVE_TF_AOT_REGALLOCEVICTMODEL) || defined(LLVM_HAVE_TF_API) +#if defined(LLVM_HAVE_TF_AOT_REGALLOCEVICTMODEL) || defined(LLVM_HAVE_TF_API) #include "llvm/Analysis/ModelUnderTrainingRunner.h" #include "llvm/Analysis/NoInferenceModelRunner.h" #endif @@ -91,7 +90,6 @@ public: AU.setPreservesAll(); AU.addRequired<RegAllocEvictionAdvisorAnalysis>(); AU.addRequired<MachineBlockFrequencyInfo>(); - AU.addRequired<AAResultsWrapperPass>(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -891,9 +889,7 @@ bool RegAllocScoring::runOnMachineFunction(MachineFunction &MF) { &getAnalysis<RegAllocEvictionAdvisorAnalysis>())) if (auto *Log = DevModeAnalysis->getLogger(MF)) Log->logFloatFinalReward(static_cast<float>( - calculateRegAllocScore( - MF, getAnalysis<MachineBlockFrequencyInfo>(), - getAnalysis<AAResultsWrapperPass>().getAAResults()) + calculateRegAllocScore(MF, getAnalysis<MachineBlockFrequencyInfo>()) .getScore())); return false; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp index 02c44fa85cd9..7381c7e6b09c 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -1436,7 +1436,7 @@ MachineBasicBlock::getSuccProbability(const_succ_iterator Succ) const { // ditribute the complemental of the sum to each unknown probability. unsigned KnownProbNum = 0; auto Sum = BranchProbability::getZero(); - for (auto &P : Probs) { + for (const auto &P : Probs) { if (!P.isUnknown()) { Sum += P; KnownProbNum++; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp index 4cc84f22bdde..9ff5c37627b4 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -965,7 +965,7 @@ bool MachineBlockPlacement::isTrellis( for (MachineBasicBlock *Succ : ViableSuccs) { int PredCount = 0; - for (auto SuccPred : Succ->predecessors()) { + for (auto *SuccPred : Succ->predecessors()) { // Allow triangle successors, but don't count them. if (Successors.count(SuccPred)) { // Make sure that it is actually a triangle. @@ -1063,7 +1063,7 @@ MachineBlockPlacement::getBestTrellisSuccessor( // Collect the edge frequencies of all edges that form the trellis. SmallVector<WeightedEdge, 8> Edges[2]; int SuccIndex = 0; - for (auto Succ : ViableSuccs) { + for (auto *Succ : ViableSuccs) { for (MachineBasicBlock *SuccPred : Succ->predecessors()) { // Skip any placed predecessors that are not BB if (SuccPred != BB) @@ -2451,7 +2451,7 @@ void MachineBlockPlacement::rotateLoopWithProfile( // as the sum of frequencies of exit edges we collect here, excluding the exit // edge from the tail of the loop chain. SmallVector<std::pair<MachineBasicBlock *, BlockFrequency>, 4> ExitsWithFreq; - for (auto BB : LoopChain) { + for (auto *BB : LoopChain) { auto LargestExitEdgeProb = BranchProbability::getZero(); for (auto *Succ : BB->successors()) { BlockChain *SuccChain = BlockToChain[Succ]; @@ -2561,7 +2561,7 @@ MachineBlockPlacement::collectLoopBlockSet(const MachineLoop &L) { // profile data is available. if (F->getFunction().hasProfileData() || ForceLoopColdBlock) { BlockFrequency LoopFreq(0); - for (auto LoopPred : L.getHeader()->predecessors()) + for (auto *LoopPred : L.getHeader()->predecessors()) if (!L.contains(LoopPred)) LoopFreq += MBFI->getBlockFreq(LoopPred) * MBPI->getEdgeProbability(LoopPred, L.getHeader()); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineCSE.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineCSE.cpp index e60fd9f7883a..c6756b1d3737 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineCSE.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineCSE.cpp @@ -415,7 +415,7 @@ bool MachineCSE::isCSECandidate(MachineInstr *MI) { // Okay, this instruction does a load. As a refinement, we allow the target // to decide whether the loaded value is actually a constant. If so, we can // actually use it as a load. - if (!MI->isDereferenceableInvariantLoad(AA)) + if (!MI->isDereferenceableInvariantLoad()) // FIXME: we should be able to hoist loads with no other side effects if // there are no other instructions which can change memory in this loop. // This is a trivial form of alias analysis. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineCombiner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineCombiner.cpp index 722a709af240..57e2cd20bdd0 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineCombiner.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineCombiner.cpp @@ -92,6 +92,7 @@ private: bool doSubstitute(unsigned NewSize, unsigned OldSize, bool OptForSize); bool combineInstructions(MachineBasicBlock *); MachineInstr *getOperandDef(const MachineOperand &MO); + bool isTransientMI(const MachineInstr *MI); unsigned getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs, DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, MachineTraceMetrics::Trace BlockTrace); @@ -158,6 +159,43 @@ MachineInstr *MachineCombiner::getOperandDef(const MachineOperand &MO) { return DefInstr; } +/// Return true if MI is unlikely to generate an actual target instruction. +bool MachineCombiner::isTransientMI(const MachineInstr *MI) { + if (!MI->isCopy()) + return MI->isTransient(); + + // If MI is a COPY, check if its src and dst registers can be coalesced. + Register Dst = MI->getOperand(0).getReg(); + Register Src = MI->getOperand(1).getReg(); + + if (!MI->isFullCopy()) { + // If src RC contains super registers of dst RC, it can also be coalesced. + if (MI->getOperand(0).getSubReg() || Src.isPhysical() || Dst.isPhysical()) + return false; + + auto SrcSub = MI->getOperand(1).getSubReg(); + auto SrcRC = MRI->getRegClass(Src); + auto DstRC = MRI->getRegClass(Dst); + return TRI->getMatchingSuperRegClass(SrcRC, DstRC, SrcSub) != nullptr; + } + + if (Src.isPhysical() && Dst.isPhysical()) + return Src == Dst; + + if (Src.isVirtual() && Dst.isVirtual()) { + auto SrcRC = MRI->getRegClass(Src); + auto DstRC = MRI->getRegClass(Dst); + return SrcRC->hasSuperClassEq(DstRC) || SrcRC->hasSubClassEq(DstRC); + } + + if (Src.isVirtual()) + std::swap(Src, Dst); + + // Now Src is physical register, Dst is virtual register. + auto DstRC = MRI->getRegClass(Dst); + return DstRC->contains(Src); +} + /// Computes depth of instructions in vector \InsInstr. /// /// \param InsInstrs is a vector of machine instructions @@ -204,9 +242,10 @@ MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs, MachineInstr *DefInstr = getOperandDef(MO); if (DefInstr) { DepthOp = BlockTrace.getInstrCycles(*DefInstr).Depth; - LatencyOp = TSchedModel.computeOperandLatency( - DefInstr, DefInstr->findRegisterDefOperandIdx(MO.getReg()), - InstrPtr, InstrPtr->findRegisterUseOperandIdx(MO.getReg())); + if (!isTransientMI(DefInstr)) + LatencyOp = TSchedModel.computeOperandLatency( + DefInstr, DefInstr->findRegisterDefOperandIdx(MO.getReg()), + InstrPtr, InstrPtr->findRegisterUseOperandIdx(MO.getReg())); } } IDepth = std::max(IDepth, DepthOp + LatencyOp); @@ -305,7 +344,7 @@ std::pair<unsigned, unsigned> MachineCombiner::getLatenciesForInstrSequences( NewRootLatency += getLatency(&MI, NewRoot, BlockTrace); unsigned RootLatency = 0; - for (auto I : DelInstrs) + for (auto *I : DelInstrs) RootLatency += TSchedModel.computeInstrLatency(I); return {NewRootLatency, RootLatency}; @@ -488,7 +527,7 @@ static void insertDeleteInstructions(MachineBasicBlock *MBB, MachineInstr &MI, for (auto *InstrPtr : DelInstrs) { InstrPtr->eraseFromParent(); // Erase all LiveRegs defined by the removed instruction - for (auto I = RegUnits.begin(); I != RegUnits.end(); ) { + for (auto *I = RegUnits.begin(); I != RegUnits.end();) { if (I->MI == InstrPtr) I = RegUnits.erase(I); else diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineFrameInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineFrameInfo.cpp index ca5936a14779..f0190812389f 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineFrameInfo.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineFrameInfo.cpp @@ -127,7 +127,7 @@ BitVector MachineFrameInfo::getPristineRegs(const MachineFunction &MF) const { BV.set(*CSR); // Saved CSRs are not pristine. - for (auto &I : getCalleeSavedInfo()) + for (const auto &I : getCalleeSavedInfo()) for (MCSubRegIterator S(I.getReg(), TRI, true); S.isValid(); ++S) BV.reset(*S); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp index f58996ea90c6..6b481a374382 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp @@ -911,8 +911,8 @@ static const MachineInstr *getCallInstr(const MachineInstr *MI) { if (!MI->isBundle()) return MI; - for (auto &BMI : make_range(getBundleStart(MI->getIterator()), - getBundleEnd(MI->getIterator()))) + for (const auto &BMI : make_range(getBundleStart(MI->getIterator()), + getBundleEnd(MI->getIterator()))) if (BMI.isCandidateForCallSiteEntry()) return &BMI; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionSplitter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionSplitter.cpp index 867a7ed584b2..3e1aace855a5 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionSplitter.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionSplitter.cpp @@ -146,7 +146,7 @@ bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) { return X.getSectionID().Type < Y.getSectionID().Type; }; llvm::sortBasicBlocksAndUpdateBranches(MF, Comparator); - + llvm::avoidZeroOffsetLandingPad(MF); return true; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp index 31f45e194a97..e92dec5bea48 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp @@ -1203,7 +1203,7 @@ bool MachineInstr::isSafeToMove(AAResults *AA, bool &SawStore) const { // destination. The check for isInvariantLoad gives the target the chance to // classify the load as always returning a constant, e.g. a constant pool // load. - if (mayLoad() && !isDereferenceableInvariantLoad(AA)) + if (mayLoad() && !isDereferenceableInvariantLoad()) // Otherwise, this is a real load. If there is a store between the load and // end of block, we can't move it. return !SawStore; @@ -1348,7 +1348,7 @@ bool MachineInstr::hasOrderedMemoryRef() const { /// isDereferenceableInvariantLoad - Return true if this instruction will never /// trap and is loading from a location whose value is invariant across a run of /// this function. -bool MachineInstr::isDereferenceableInvariantLoad(AAResults *AA) const { +bool MachineInstr::isDereferenceableInvariantLoad() const { // If the instruction doesn't load at all, it isn't an invariant load. if (!mayLoad()) return false; @@ -1374,12 +1374,6 @@ bool MachineInstr::isDereferenceableInvariantLoad(AAResults *AA) const { if (const PseudoSourceValue *PSV = MMO->getPseudoValue()) { if (PSV->isConstant(&MFI)) continue; - } else if (const Value *V = MMO->getValue()) { - // If we have an AliasAnalysis, ask it whether the memory is constant. - if (AA && - AA->pointsToConstantMemory( - MemoryLocation(V, MMO->getSize(), MMO->getAAInfo()))) - continue; } // Otherwise assume conservatively. @@ -2273,7 +2267,7 @@ using MMOList = SmallVector<const MachineMemOperand *, 2>; static unsigned getSpillSlotSize(const MMOList &Accesses, const MachineFrameInfo &MFI) { unsigned Size = 0; - for (auto A : Accesses) + for (const auto *A : Accesses) if (MFI.isSpillSlotObjectIndex( cast<FixedStackPseudoSourceValue>(A->getPseudoValue()) ->getFrameIndex())) diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp index 00d75f8231c7..df7b6c782b91 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp @@ -230,8 +230,7 @@ namespace { bool IsGuaranteedToExecute(MachineBasicBlock *BB); - bool isTriviallyReMaterializable(const MachineInstr &MI, - AAResults *AA) const; + bool isTriviallyReMaterializable(const MachineInstr &MI) const; void EnterScope(MachineBasicBlock *MBB); @@ -666,9 +665,9 @@ bool MachineLICMBase::IsGuaranteedToExecute(MachineBasicBlock *BB) { /// virtual register uses. Even though rematerializable RA might not actually /// rematerialize it in this scenario. In that case we do not want to hoist such /// instruction out of the loop in a belief RA will sink it back if needed. -bool MachineLICMBase::isTriviallyReMaterializable(const MachineInstr &MI, - AAResults *AA) const { - if (!TII->isTriviallyReMaterializable(MI, AA)) +bool MachineLICMBase::isTriviallyReMaterializable( + const MachineInstr &MI) const { + if (!TII->isTriviallyReMaterializable(MI)) return false; for (const MachineOperand &MO : MI.operands()) { @@ -1174,7 +1173,7 @@ bool MachineLICMBase::IsProfitableToHoist(MachineInstr &MI) { // Rematerializable instructions should always be hoisted providing the // register allocator can just pull them down again when needed. - if (isTriviallyReMaterializable(MI, AA)) + if (isTriviallyReMaterializable(MI)) return true; // FIXME: If there are long latency loop-invariant instructions inside the @@ -1227,8 +1226,8 @@ bool MachineLICMBase::IsProfitableToHoist(MachineInstr &MI) { // High register pressure situation, only hoist if the instruction is going // to be remat'ed. - if (!isTriviallyReMaterializable(MI, AA) && - !MI.isDereferenceableInvariantLoad(AA)) { + if (!isTriviallyReMaterializable(MI) && + !MI.isDereferenceableInvariantLoad()) { LLVM_DEBUG(dbgs() << "Can't remat / high reg-pressure: " << MI); return false; } @@ -1247,7 +1246,7 @@ MachineInstr *MachineLICMBase::ExtractHoistableLoad(MachineInstr *MI) { // If not, we may be able to unfold a load and hoist that. // First test whether the instruction is loading from an amenable // memory location. - if (!MI->isDereferenceableInvariantLoad(AA)) + if (!MI->isDereferenceableInvariantLoad()) return nullptr; // Next determine the register class for a temporary register. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp index 8d500398f55e..52501ca7c871 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp @@ -219,7 +219,7 @@ bool MachinePipeliner::runOnMachineFunction(MachineFunction &mf) { TII = MF->getSubtarget().getInstrInfo(); RegClassInfo.runOnMachineFunction(*MF); - for (auto &L : *MLI) + for (const auto &L : *MLI) scheduleLoop(*L); return false; @@ -231,7 +231,7 @@ bool MachinePipeliner::runOnMachineFunction(MachineFunction &mf) { /// the loop. bool MachinePipeliner::scheduleLoop(MachineLoop &L) { bool Changed = false; - for (auto &InnerLoop : L) + for (const auto &InnerLoop : L) Changed |= scheduleLoop(*InnerLoop); #ifndef NDEBUG @@ -689,7 +689,7 @@ static bool isSuccOrder(SUnit *SUa, SUnit *SUb) { Worklist.push_back(SUa); while (!Worklist.empty()) { const SUnit *SU = Worklist.pop_back_val(); - for (auto &SI : SU->Succs) { + for (const auto &SI : SU->Succs) { SUnit *SuccSU = SI.getSUnit(); if (SI.getKind() == SDep::Order) { if (Visited.count(SuccSU)) @@ -706,11 +706,11 @@ static bool isSuccOrder(SUnit *SUa, SUnit *SUb) { /// Return true if the instruction causes a chain between memory /// references before and after it. -static bool isDependenceBarrier(MachineInstr &MI, AliasAnalysis *AA) { +static bool isDependenceBarrier(MachineInstr &MI) { return MI.isCall() || MI.mayRaiseFPException() || MI.hasUnmodeledSideEffects() || (MI.hasOrderedMemoryRef() && - (!MI.mayLoad() || !MI.isDereferenceableInvariantLoad(AA))); + (!MI.mayLoad() || !MI.isDereferenceableInvariantLoad())); } /// Return the underlying objects for the memory references of an instruction. @@ -743,14 +743,14 @@ void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) { UndefValue::get(Type::getVoidTy(MF.getFunction().getContext())); for (auto &SU : SUnits) { MachineInstr &MI = *SU.getInstr(); - if (isDependenceBarrier(MI, AA)) + if (isDependenceBarrier(MI)) PendingLoads.clear(); else if (MI.mayLoad()) { SmallVector<const Value *, 4> Objs; ::getUnderlyingObjects(&MI, Objs); if (Objs.empty()) Objs.push_back(UnknownValue); - for (auto V : Objs) { + for (const auto *V : Objs) { SmallVector<SUnit *, 4> &SUs = PendingLoads[V]; SUs.push_back(&SU); } @@ -759,12 +759,12 @@ void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) { ::getUnderlyingObjects(&MI, Objs); if (Objs.empty()) Objs.push_back(UnknownValue); - for (auto V : Objs) { + for (const auto *V : Objs) { MapVector<const Value *, SmallVector<SUnit *, 4>>::iterator I = PendingLoads.find(V); if (I == PendingLoads.end()) continue; - for (auto Load : I->second) { + for (auto *Load : I->second) { if (isSuccOrder(Load, &SU)) continue; MachineInstr &LdMI = *Load->getInstr(); @@ -1407,8 +1407,8 @@ void SwingSchedulerDAG::CopyToPhiMutation::apply(ScheduleDAGInstrs *DAG) { SwingSchedulerDAG *SDAG = cast<SwingSchedulerDAG>(DAG); // Add the artificial dependencies if it does not form a cycle. - for (auto I : UseSUs) { - for (auto Src : SrcSUs) { + for (auto *I : UseSUs) { + for (auto *Src : SrcSUs) { if (!SDAG->Topo.IsReachable(I, Src) && Src != I) { Src->addPred(SDep(I, SDep::Artificial)); SDAG->Topo.AddPred(Src, I); @@ -1878,7 +1878,7 @@ void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) { Order = TopDown; LLVM_DEBUG(dbgs() << " Top down (intersect) "); } else if (NodeSets.size() == 1) { - for (auto &N : Nodes) + for (const auto &N : Nodes) if (N->Succs.size() == 0) R.insert(N); Order = BottomUp; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp index 5f80445a5a34..96131dc2983e 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp @@ -1698,7 +1698,7 @@ void BaseMemOpClusterMutation::collectMemOpRecords( << ", Width: " << Width << "\n"); } #ifndef NDEBUG - for (auto *Op : BaseOps) + for (const auto *Op : BaseOps) assert(Op); #endif } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp index 006ba9273dfb..0568bc6a4600 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp @@ -446,7 +446,7 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { MadeChange |= ProcessBlock(MBB); // If we have anything we marked as toSplit, split it now. - for (auto &Pair : ToSplit) { + for (const auto &Pair : ToSplit) { auto NewSucc = Pair.first->SplitCriticalEdge(Pair.second, *this); if (NewSucc != nullptr) { LLVM_DEBUG(dbgs() << " *** Splitting critical edge: " diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineStableHash.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineStableHash.cpp index a85dbf1de1ee..b546a5082b07 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineStableHash.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineStableHash.cpp @@ -200,7 +200,7 @@ stable_hash llvm::stableHashValue(const MachineInstr &MI, bool HashVRegs, stable_hash llvm::stableHashValue(const MachineBasicBlock &MBB) { SmallVector<stable_hash> HashComponents; // TODO: Hash more stuff like block alignment and branch probabilities. - for (auto &MI : MBB) + for (const auto &MI : MBB) HashComponents.push_back(stableHashValue(MI)); return stable_hash_combine_range(HashComponents.begin(), HashComponents.end()); @@ -209,7 +209,7 @@ stable_hash llvm::stableHashValue(const MachineBasicBlock &MBB) { stable_hash llvm::stableHashValue(const MachineFunction &MF) { SmallVector<stable_hash> HashComponents; // TODO: Hash lots more stuff like function alignment and stack objects. - for (auto &MBB : MF) + for (const auto &MBB : MF) HashComponents.push_back(stableHashValue(MBB)); return stable_hash_combine_range(HashComponents.begin(), HashComponents.end()); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineTraceMetrics.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineTraceMetrics.cpp index 0a5ff276fedc..715e5da26989 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineTraceMetrics.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineTraceMetrics.cpp @@ -484,7 +484,7 @@ void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) { // Run an upwards post-order search for the trace start. Bounds.Downward = false; Bounds.Visited.clear(); - for (auto I : inverse_post_order_ext(MBB, Bounds)) { + for (const auto *I : inverse_post_order_ext(MBB, Bounds)) { LLVM_DEBUG(dbgs() << " pred for " << printMBBReference(*I) << ": "); TraceBlockInfo &TBI = BlockInfo[I->getNumber()]; // All the predecessors have been visited, pick the preferred one. @@ -502,7 +502,7 @@ void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) { // Run a downwards post-order search for the trace end. Bounds.Downward = true; Bounds.Visited.clear(); - for (auto I : post_order_ext(MBB, Bounds)) { + for (const auto *I : post_order_ext(MBB, Bounds)) { LLVM_DEBUG(dbgs() << " succ for " << printMBBReference(*I) << ": "); TraceBlockInfo &TBI = BlockInfo[I->getNumber()]; // All the successors have been visited, pick the preferred one. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp index 7a008bae726e..93e68918b632 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp @@ -2802,8 +2802,8 @@ void MachineVerifier::visitMachineFunctionAfter() { // tracking numbers. if (MF->getFunction().getSubprogram()) { DenseSet<unsigned> SeenNumbers; - for (auto &MBB : *MF) { - for (auto &MI : MBB) { + for (const auto &MBB : *MF) { + for (const auto &MI : MBB) { if (auto Num = MI.peekDebugInstrNum()) { auto Result = SeenNumbers.insert((unsigned)Num); if (!Result.second) diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RDFGraph.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RDFGraph.cpp index ec383b9b1c65..51de99b81057 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RDFGraph.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RDFGraph.cpp @@ -1395,7 +1395,7 @@ void DataFlowGraph::recordDefsForDF(BlockRefsMap &PhiM, // Finally, add the set of defs to each block in the iterated dominance // frontier. - for (auto DB : IDF) { + for (auto *DB : IDF) { NodeAddr<BlockNode*> DBA = findBlock(DB); PhiM[DBA.Id].insert(Defs.begin(), Defs.end()); } @@ -1657,7 +1657,7 @@ void DataFlowGraph::linkBlockRefs(DefStackMap &DefM, NodeAddr<BlockNode*> BA) { // Recursively process all children in the dominator tree. MachineDomTreeNode *N = MDT.getNode(BA.Addr->getCode()); - for (auto I : *N) { + for (auto *I : *N) { MachineBasicBlock *SB = I->getBlock(); NodeAddr<BlockNode*> SBA = findBlock(SB); linkBlockRefs(DefM, SBA); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RDFLiveness.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RDFLiveness.cpp index 2fd947086b4d..d8eac20d16b6 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RDFLiveness.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RDFLiveness.cpp @@ -61,7 +61,7 @@ namespace rdf { raw_ostream &operator<< (raw_ostream &OS, const Print<Liveness::RefMap> &P) { OS << '{'; - for (auto &I : P.Obj) { + for (const auto &I : P.Obj) { OS << ' ' << printReg(I.first, &P.G.getTRI()) << '{'; for (auto J = I.second.begin(), E = I.second.end(); J != E; ) { OS << Print<NodeId>(J->first, P.G) << PrintLaneMaskOpt(J->second); @@ -767,7 +767,7 @@ void Liveness::computeLiveIns() { } for (auto I : IDF) - for (auto S : I.second) + for (auto *S : I.second) IIDF[S].insert(I.first); computePhiInfo(); @@ -926,7 +926,7 @@ void Liveness::resetKills(MachineBasicBlock *B) { BitVector LiveIn(TRI.getNumRegs()), Live(TRI.getNumRegs()); CopyLiveIns(B, LiveIn); - for (auto SI : B->successors()) + for (auto *SI : B->successors()) CopyLiveIns(SI, Live); for (MachineInstr &MI : llvm::reverse(*B)) { @@ -1003,7 +1003,7 @@ void Liveness::traverse(MachineBasicBlock *B, RefMap &LiveIn) { // Go up the dominator tree (depth-first). MachineDomTreeNode *N = MDT.getNode(B); - for (auto I : *N) { + for (auto *I : *N) { RefMap L; MachineBasicBlock *SB = I->getBlock(); traverse(SB, L); @@ -1015,7 +1015,7 @@ void Liveness::traverse(MachineBasicBlock *B, RefMap &LiveIn) { if (Trace) { dbgs() << "\n-- " << printMBBReference(*B) << ": " << __func__ << " after recursion into: {"; - for (auto I : *N) + for (auto *I : *N) dbgs() << ' ' << I->getBlock()->getNumber(); dbgs() << " }\n"; dbgs() << " LiveIn: " << Print<RefMap>(LiveIn, DFG) << '\n'; @@ -1155,7 +1155,7 @@ void Liveness::traverse(MachineBasicBlock *B, RefMap &LiveIn) { dbgs() << " Local: " << Print<RegisterAggr>(Local, DFG) << '\n'; } - for (auto C : IIDF[B]) { + for (auto *C : IIDF[B]) { RegisterAggr &LiveC = LiveMap[C]; for (const std::pair<const RegisterId, NodeRefSet> &S : LiveIn) for (auto R : S.second) diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ReachingDefAnalysis.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ReachingDefAnalysis.cpp index 69db8bad54f9..d9ced9191fae 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/ReachingDefAnalysis.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/ReachingDefAnalysis.cpp @@ -635,7 +635,7 @@ ReachingDefAnalysis::isSafeToRemove(MachineInstr *MI, InstSet &Visited, SmallPtrSet<MachineInstr*, 4> Uses; getGlobalUses(MI, MO.getReg(), Uses); - for (auto I : Uses) { + for (auto *I : Uses) { if (Ignore.count(I) || ToRemove.count(I)) continue; if (!isSafeToRemove(I, Visited, ToRemove, Ignore)) diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.cpp index 0c18814189eb..990dd84c829d 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.cpp @@ -166,7 +166,7 @@ void RegAllocBase::allocatePhysRegs() { void RegAllocBase::postOptimization() { spiller().postOptimization(); - for (auto DeadInst : DeadRemats) { + for (auto *DeadInst : DeadRemats) { LIS->RemoveMachineInstrFromMaps(*DeadInst); DeadInst->eraseFromParent(); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBasic.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBasic.cpp index 7defdf04aec8..91795f3d27fe 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBasic.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBasic.cpp @@ -135,6 +135,7 @@ INITIALIZE_PASS_DEPENDENCY(LiveIntervals) INITIALIZE_PASS_DEPENDENCY(RegisterCoalescer) INITIALIZE_PASS_DEPENDENCY(MachineScheduler) INITIALIZE_PASS_DEPENDENCY(LiveStacks) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) INITIALIZE_PASS_DEPENDENCY(VirtRegMap) diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp index 72ceaa768803..9e4e26f1392e 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp @@ -1478,7 +1478,7 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) { RegUnitStates.assign(TRI->getNumRegUnits(), regFree); assert(LiveVirtRegs.empty() && "Mapping not cleared from last block?"); - for (auto &LiveReg : MBB.liveouts()) + for (const auto &LiveReg : MBB.liveouts()) setPhysRegState(LiveReg.PhysReg, regPreAssigned); Coalesced.clear(); @@ -1580,8 +1580,7 @@ FunctionPass *llvm::createFastRegisterAllocator() { return new RegAllocFast(); } -FunctionPass *llvm::createFastRegisterAllocator( - std::function<bool(const TargetRegisterInfo &TRI, - const TargetRegisterClass &RC)> Ftor, bool ClearVirtRegs) { +FunctionPass *llvm::createFastRegisterAllocator(RegClassFilterFunc Ftor, + bool ClearVirtRegs) { return new RegAllocFast(Ftor, ClearVirtRegs); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp index 2efb98ae200d..4a54d7ebf8a9 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -180,16 +180,7 @@ FunctionPass* llvm::createGreedyRegisterAllocator() { return new RAGreedy(); } -namespace llvm { -FunctionPass* createGreedyRegisterAllocator( - std::function<bool(const TargetRegisterInfo &TRI, - const TargetRegisterClass &RC)> Ftor); - -} - -FunctionPass* llvm::createGreedyRegisterAllocator( - std::function<bool(const TargetRegisterInfo &TRI, - const TargetRegisterClass &RC)> Ftor) { +FunctionPass *llvm::createGreedyRegisterAllocator(RegClassFilterFunc Ftor) { return new RAGreedy(Ftor); } @@ -202,8 +193,6 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); AU.addRequired<MachineBlockFrequencyInfo>(); AU.addPreserved<MachineBlockFrequencyInfo>(); - AU.addRequired<AAResultsWrapperPass>(); - AU.addPreserved<AAResultsWrapperPass>(); AU.addRequired<LiveIntervals>(); AU.addPreserved<LiveIntervals>(); AU.addRequired<SlotIndexes>(); @@ -2530,7 +2519,6 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { Bundles = &getAnalysis<EdgeBundles>(); SpillPlacer = &getAnalysis<SpillPlacement>(); DebugVars = &getAnalysis<LiveDebugVariables>(); - AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); initializeCSRCost(); @@ -2552,7 +2540,7 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { LLVM_DEBUG(LIS->dump()); SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops)); - SE.reset(new SplitEditor(*SA, *AA, *LIS, *VRM, *DomTree, *MBFI, *VRAI)); + SE.reset(new SplitEditor(*SA, *LIS, *VRM, *DomTree, *MBFI, *VRAI)); IntfCache.init(MF, Matrix->getLiveUnions(), Indexes, LIS, TRI); GlobalCand.resize(32); // This will grow as needed. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.h b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.h index 358e74541a54..316b12d0213b 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.h @@ -25,7 +25,6 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" -#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/LiveRangeEdit.h" @@ -54,7 +53,6 @@ class MachineLoop; class MachineLoopInfo; class MachineOptimizationRemarkEmitter; class MachineOptimizationRemarkMissed; -class SlotIndex; class SlotIndexes; class TargetInstrInfo; class VirtRegMap; @@ -174,7 +172,6 @@ private: EdgeBundles *Bundles; SpillPlacement *SpillPlacer; LiveDebugVariables *DebugVars; - AliasAnalysis *AA; // state std::unique_ptr<Spiller> SpillerInstance; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocPBQP.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocPBQP.cpp index 8c262130fb70..b3d926eeb552 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocPBQP.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocPBQP.cpp @@ -783,7 +783,7 @@ void RegAllocPBQP::finalizeAlloc(MachineFunction &MF, void RegAllocPBQP::postOptimization(Spiller &VRegSpiller, LiveIntervals &LIS) { VRegSpiller.postOptimization(); /// Remove dead defs because of rematerialization. - for (auto DeadInst : DeadRemats) { + for (auto *DeadInst : DeadRemats) { LIS.RemoveMachineInstrFromMaps(*DeadInst); DeadInst->eraseFromParent(); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocScore.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocScore.cpp index 32fa5e07dd16..17e3eeef664b 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocScore.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocScore.cpp @@ -74,8 +74,7 @@ double RegAllocScore::getScore() const { RegAllocScore llvm::calculateRegAllocScore(const MachineFunction &MF, - const MachineBlockFrequencyInfo &MBFI, - AAResults &AAResults) { + const MachineBlockFrequencyInfo &MBFI) { return calculateRegAllocScore( MF, [&](const MachineBasicBlock &MBB) { @@ -83,7 +82,7 @@ llvm::calculateRegAllocScore(const MachineFunction &MF, }, [&](const MachineInstr &MI) { return MF.getSubtarget().getInstrInfo()->isTriviallyReMaterializable( - MI, &AAResults); + MI); }); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocScore.h b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocScore.h index 2bcd0b5895bf..b80adae29f23 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocScore.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocScore.h @@ -19,7 +19,6 @@ namespace llvm { -class AAResults; class MachineBasicBlock; class MachineBlockFrequencyInfo; class MachineFunction; @@ -62,8 +61,7 @@ public: /// different policies, the better policy would have a smaller score. /// The implementation is the overload below (which is also easily unittestable) RegAllocScore calculateRegAllocScore(const MachineFunction &MF, - const MachineBlockFrequencyInfo &MBFI, - AAResults &AAResults); + const MachineBlockFrequencyInfo &MBFI); /// Implementation of the above, which is also more easily unittestable. RegAllocScore calculateRegAllocScore( diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp index 930d05324440..8a6f823c8a0c 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -1148,7 +1148,7 @@ bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP, // we need to keep the copy of B = A at the end of Pred if we remove // B = A from MBB. bool ValB_Changed = false; - for (auto VNI : IntB.valnos) { + for (auto *VNI : IntB.valnos) { if (VNI->isUnused()) continue; if (PVal->def < VNI->def && VNI->def < LIS->getMBBEndIdx(Pred)) { @@ -1306,7 +1306,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, } if (!TII->isAsCheapAsAMove(*DefMI)) return false; - if (!TII->isTriviallyReMaterializable(*DefMI, AA)) + if (!TII->isTriviallyReMaterializable(*DefMI)) return false; if (!definesFullReg(*DefMI, SrcReg)) return false; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegisterPressure.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterPressure.cpp index 62a459fca611..b14a36e4eeb4 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegisterPressure.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterPressure.cpp @@ -581,7 +581,7 @@ void RegisterOperands::collect(const MachineInstr &MI, void RegisterOperands::detectDeadDefs(const MachineInstr &MI, const LiveIntervals &LIS) { SlotIndex SlotIdx = LIS.getInstructionIndex(MI); - for (auto RI = Defs.begin(); RI != Defs.end(); /*empty*/) { + for (auto *RI = Defs.begin(); RI != Defs.end(); /*empty*/) { Register Reg = RI->RegUnit; const LiveRange *LR = getLiveRange(LIS, Reg); if (LR != nullptr) { @@ -602,7 +602,7 @@ void RegisterOperands::adjustLaneLiveness(const LiveIntervals &LIS, const MachineRegisterInfo &MRI, SlotIndex Pos, MachineInstr *AddFlagsMI) { - for (auto I = Defs.begin(); I != Defs.end(); ) { + for (auto *I = Defs.begin(); I != Defs.end();) { LaneBitmask LiveAfter = getLiveLanesAt(LIS, MRI, true, I->RegUnit, Pos.getDeadSlot()); // If the def is all that is live after the instruction, then in case @@ -620,7 +620,7 @@ void RegisterOperands::adjustLaneLiveness(const LiveIntervals &LIS, ++I; } } - for (auto I = Uses.begin(); I != Uses.end(); ) { + for (auto *I = Uses.begin(); I != Uses.end();) { LaneBitmask LiveBefore = getLiveLanesAt(LIS, MRI, true, I->RegUnit, Pos.getBaseIndex()); LaneBitmask LaneMask = I->LaneMask & LiveBefore; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp index e7116ec3ea28..00a551ade213 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp @@ -340,7 +340,7 @@ bool SafeStack::IsSafeStackAlloca(const Value *AllocaPtr, uint64_t AllocaSize) { // analysis here, which would look at all uses of an argument inside // the function being called. auto B = CS.arg_begin(), E = CS.arg_end(); - for (auto A = B; A != E; ++A) + for (const auto *A = B; A != E; ++A) if (A->get() == V) if (!(CS.doesNotCapture(A - B) && (CS.doesNotAccessMemory(A - B) || CS.doesNotAccessMemory()))) { @@ -498,7 +498,7 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack( if (ClColoring) SSC.run(); - for (auto *I : SSC.getMarkers()) { + for (const auto *I : SSC.getMarkers()) { auto *Op = dyn_cast<Instruction>(I->getOperand(1)); const_cast<IntrinsicInst *>(I)->eraseFromParent(); // Remove the operand bitcast, too, if it has no more uses left. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp index 07dcc34fbf15..4fc9399c2b9e 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -530,9 +530,9 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) { /// Returns true if MI is an instruction we are unable to reason about /// (like a call or something with unmodeled side effects). -static inline bool isGlobalMemoryObject(AAResults *AA, MachineInstr *MI) { +static inline bool isGlobalMemoryObject(MachineInstr *MI) { return MI->isCall() || MI->hasUnmodeledSideEffects() || - (MI->hasOrderedMemoryRef() && !MI->isDereferenceableInvariantLoad(AA)); + (MI->hasOrderedMemoryRef() && !MI->isDereferenceableInvariantLoad()); } void ScheduleDAGInstrs::addChainDependency (SUnit *SUa, SUnit *SUb, @@ -880,7 +880,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AAResults *AA, // actual addresses). // This is a barrier event that acts as a pivotal node in the DAG. - if (isGlobalMemoryObject(AA, &MI)) { + if (isGlobalMemoryObject(&MI)) { // Become the barrier chain. if (BarrierChain) @@ -917,7 +917,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AAResults *AA, // If it's not a store or a variant load, we're done. if (!MI.mayStore() && - !(MI.mayLoad() && !MI.isDereferenceableInvariantLoad(AA))) + !(MI.mayLoad() && !MI.isDereferenceableInvariantLoad())) continue; // Always add dependecy edge to BarrierChain if present. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectOptimize.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectOptimize.cpp index d627519a34aa..011f55efce1d 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectOptimize.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectOptimize.cpp @@ -433,7 +433,7 @@ void SelectOptimize::convertProfitableSIGroups(SelectGroups &ProfSIGroups) { DebugPseudoINS.push_back(&*DIt); DIt++; } - for (auto DI : DebugPseudoINS) { + for (auto *DI : DebugPseudoINS) { DI->moveBefore(&*EndBlock->getFirstInsertionPt()); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 2654c00929d8..edb0756e8c3b 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -1868,8 +1868,7 @@ SDValue DAGCombiner::combine(SDNode *N) { // If N is a commutative binary node, try to eliminate it if the commuted // version is already present in the DAG. - if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) && - N->getNumValues() == 1) { + if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode())) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -4159,6 +4158,10 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { if (SDValue RMUL = reassociateOps(ISD::MUL, DL, N0, N1, N->getFlags())) return RMUL; + // Simplify the operands using demanded-bits information. + if (SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + return SDValue(); } @@ -5978,44 +5981,64 @@ static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) { if (!TLI.isTypeLegal(VT)) return SDValue(); - // Look through an optional extension and find a 'not'. - // TODO: Should we favor test+set even without the 'not' op? - SDValue Not = And->getOperand(0), And1 = And->getOperand(1); - if (Not.getOpcode() == ISD::ANY_EXTEND) - Not = Not.getOperand(0); - if (!isBitwiseNot(Not) || !Not.hasOneUse() || !isOneConstant(And1)) + // Look through an optional extension. + SDValue And0 = And->getOperand(0), And1 = And->getOperand(1); + if (And0.getOpcode() == ISD::ANY_EXTEND && And0.hasOneUse()) + And0 = And0.getOperand(0); + if (!isOneConstant(And1) || !And0.hasOneUse()) return SDValue(); - // Look though an optional truncation. The source operand may not be the same - // type as the original 'and', but that is ok because we are masking off - // everything but the low bit. - SDValue Srl = Not.getOperand(0); - if (Srl.getOpcode() == ISD::TRUNCATE) - Srl = Srl.getOperand(0); + SDValue Src = And0; + + // Attempt to find a 'not' op. + // TODO: Should we favor test+set even without the 'not' op? + bool FoundNot = false; + if (isBitwiseNot(Src)) { + FoundNot = true; + Src = Src.getOperand(0); + + // Look though an optional truncation. The source operand may not be the + // same type as the original 'and', but that is ok because we are masking + // off everything but the low bit. + if (Src.getOpcode() == ISD::TRUNCATE && Src.hasOneUse()) + Src = Src.getOperand(0); + } // Match a shift-right by constant. - if (Srl.getOpcode() != ISD::SRL || !Srl.hasOneUse() || - !isa<ConstantSDNode>(Srl.getOperand(1))) + if (Src.getOpcode() != ISD::SRL || !Src.hasOneUse()) return SDValue(); // We might have looked through casts that make this transform invalid. // TODO: If the source type is wider than the result type, do the mask and // compare in the source type. - const APInt &ShiftAmt = Srl.getConstantOperandAPInt(1); - unsigned VTBitWidth = VT.getSizeInBits(); - if (ShiftAmt.uge(VTBitWidth)) + unsigned VTBitWidth = VT.getScalarSizeInBits(); + SDValue ShiftAmt = Src.getOperand(1); + auto *ShiftAmtC = dyn_cast<ConstantSDNode>(ShiftAmt); + if (!ShiftAmtC || !ShiftAmtC->getAPIntValue().ult(VTBitWidth)) return SDValue(); - if (!TLI.hasBitTest(Srl.getOperand(0), Srl.getOperand(1))) + // Set source to shift source. + Src = Src.getOperand(0); + + // Try again to find a 'not' op. + // TODO: Should we favor test+set even with two 'not' ops? + if (!FoundNot) { + if (!isBitwiseNot(Src)) + return SDValue(); + Src = Src.getOperand(0); + } + + if (!TLI.hasBitTest(Src, ShiftAmt)) return SDValue(); // Turn this into a bit-test pattern using mask op + setcc: // and (not (srl X, C)), 1 --> (and X, 1<<C) == 0 + // and (srl (not X), C)), 1 --> (and X, 1<<C) == 0 SDLoc DL(And); - SDValue X = DAG.getZExtOrTrunc(Srl.getOperand(0), DL, VT); + SDValue X = DAG.getZExtOrTrunc(Src, DL, VT); EVT CCVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); SDValue Mask = DAG.getConstant( - APInt::getOneBitSet(VTBitWidth, ShiftAmt.getZExtValue()), DL, VT); + APInt::getOneBitSet(VTBitWidth, ShiftAmtC->getZExtValue()), DL, VT); SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, Mask); SDValue Zero = DAG.getConstant(0, DL, VT); SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ); @@ -6229,7 +6252,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // This can be a pure constant or a vector splat, in which case we treat the // vector as a scalar and use the splat value. APInt Constant = APInt::getZero(1); - if (const ConstantSDNode *C = isConstOrConstSplat(N1)) { + if (const ConstantSDNode *C = isConstOrConstSplat( + N1, /*AllowUndef=*/false, /*AllowTruncation=*/true)) { Constant = C->getAPIntValue(); } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) { APInt SplatValue, SplatUndef; @@ -6339,18 +6363,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // fold (and (load x), 255) -> (zextload x, i8) // fold (and (extload x, i16), 255) -> (zextload x, i8) - // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8) - if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD || - (N0.getOpcode() == ISD::ANY_EXTEND && - N0.getOperand(0).getOpcode() == ISD::LOAD))) { - if (SDValue Res = reduceLoadWidth(N)) { - LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND - ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0); - AddToWorklist(N); - DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 0), Res); - return SDValue(N, 0); - } - } + if (N1C && N0.getOpcode() == ISD::LOAD && !VT.isVector()) + if (SDValue Res = reduceLoadWidth(N)) + return Res; if (LegalTypes) { // Attempt to propagate the AND back up to the leaves which, if they're @@ -6856,20 +6871,23 @@ SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) { } /// OR combines for which the commuted variant will be tried as well. -static SDValue visitORCommutative( - SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N) { +static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1, + SDNode *N) { EVT VT = N0.getValueType(); if (N0.getOpcode() == ISD::AND) { + SDValue N00 = N0.getOperand(0); + SDValue N01 = N0.getOperand(1); + // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y) // TODO: Set AllowUndefs = true. - if (getBitwiseNotOperand(N0.getOperand(1), N0.getOperand(0), + if (getBitwiseNotOperand(N01, N00, /* AllowUndefs */ false) == N1) - return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(0), N1); + return DAG.getNode(ISD::OR, SDLoc(N), VT, N00, N1); // fold (or (and (xor Y, -1), X), Y) -> (or X, Y) - if (getBitwiseNotOperand(N0.getOperand(0), N0.getOperand(1), + if (getBitwiseNotOperand(N00, N01, /* AllowUndefs */ false) == N1) - return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(1), N1); + return DAG.getNode(ISD::OR, SDLoc(N), VT, N01, N1); } if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG)) @@ -7915,7 +7933,7 @@ SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) { int64_t FirstOffset = INT64_MAX; StoreSDNode *FirstStore = nullptr; Optional<BaseIndexOffset> Base; - for (auto Store : Stores) { + for (auto *Store : Stores) { // All the stores store different parts of the CombinedValue. A truncate is // required to get the partial value. SDValue Trunc = Store->getValue(); @@ -8488,28 +8506,6 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { return DAG.getNode(ISD::AND, DL, VT, NotX, N1); } - if ((N0Opcode == ISD::SRL || N0Opcode == ISD::SHL) && N0.hasOneUse()) { - ConstantSDNode *XorC = isConstOrConstSplat(N1); - ConstantSDNode *ShiftC = isConstOrConstSplat(N0.getOperand(1)); - unsigned BitWidth = VT.getScalarSizeInBits(); - if (XorC && ShiftC) { - // Don't crash on an oversized shift. We can not guarantee that a bogus - // shift has been simplified to undef. - uint64_t ShiftAmt = ShiftC->getLimitedValue(); - if (ShiftAmt < BitWidth) { - APInt Ones = APInt::getAllOnes(BitWidth); - Ones = N0Opcode == ISD::SHL ? Ones.shl(ShiftAmt) : Ones.lshr(ShiftAmt); - if (XorC->getAPIntValue() == Ones) { - // If the xor constant is a shifted -1, do a 'not' before the shift: - // xor (X << ShiftC), XorC --> (not X) << ShiftC - // xor (X >> ShiftC), XorC --> (not X) >> ShiftC - SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT); - return DAG.getNode(N0Opcode, DL, VT, Not, N0.getOperand(1)); - } - } - } - } - // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X) if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) { SDValue A = N0Opcode == ISD::ADD ? N0 : N1; @@ -11817,6 +11813,9 @@ SDValue DAGCombiner::foldSextSetcc(SDNode *N) { EVT N00VT = N00.getValueType(); SDLoc DL(N); + // Propagate fast-math-flags. + SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags()); + // On some architectures (such as SSE/NEON/etc) the SETCC result type is // the same size as the compared operands. Try to optimize sext(setcc()) // if this is the case. @@ -12358,6 +12357,9 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { return V; if (N0.getOpcode() == ISD::SETCC) { + // Propagate fast-math-flags. + SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags()); + // Only do this before legalize for now. if (!LegalOperations && VT.isVector() && N0.getValueType().getVectorElementType() == MVT::i1) { @@ -12549,6 +12551,9 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { } if (N0.getOpcode() == ISD::SETCC) { + // Propagate fast-math-flags. + SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags()); + // For vectors: // aext(setcc) -> vsetcc // aext(setcc) -> truncate(vsetcc) @@ -13155,6 +13160,19 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { return N0.getOperand(0); } + // Try to narrow a truncate-of-sext_in_reg to the destination type: + // trunc (sign_ext_inreg X, iM) to iN --> sign_ext_inreg (trunc X to iN), iM + if (!LegalTypes && N0.getOpcode() == ISD::SIGN_EXTEND_INREG && + N0.hasOneUse()) { + SDValue X = N0.getOperand(0); + SDValue ExtVal = N0.getOperand(1); + EVT ExtVT = cast<VTSDNode>(ExtVal)->getVT(); + if (ExtVT.bitsLT(VT)) { + SDValue TrX = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, X); + return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, TrX, ExtVal); + } + } + // If this is anyext(trunc), don't fold it, allow ourselves to be folded. if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND)) return SDValue(); @@ -19478,7 +19496,7 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { return Shuf; // Handle <1 x ???> vector insertion special cases. - if (VT.getVectorNumElements() == 1) { + if (NumElts == 1) { // insert_vector_elt(x, extract_vector_elt(y, 0), 0) -> y if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT && InVal.getOperand(0).getValueType() == VT && @@ -19506,80 +19524,77 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { } } - // Attempt to fold the insertion into a legal BUILD_VECTOR. + // Attempt to convert an insert_vector_elt chain into a legal build_vector. if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) { - auto UpdateBuildVector = [&](SmallVectorImpl<SDValue> &Ops) { - assert(Ops.size() == NumElts && "Unexpected vector size"); - - // Insert the element - if (Elt < Ops.size()) { - // All the operands of BUILD_VECTOR must have the same type; - // we enforce that here. - EVT OpVT = Ops[0].getValueType(); - Ops[Elt] = - OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal; + // vXi1 vector - we don't need to recurse. + if (NumElts == 1) + return DAG.getBuildVector(VT, DL, {InVal}); + + // If we haven't already collected the element, insert into the op list. + EVT MaxEltVT = InVal.getValueType(); + auto AddBuildVectorOp = [&](SmallVectorImpl<SDValue> &Ops, SDValue Elt, + unsigned Idx) { + if (!Ops[Idx]) { + Ops[Idx] = Elt; + if (VT.isInteger()) { + EVT EltVT = Elt.getValueType(); + MaxEltVT = MaxEltVT.bitsGE(EltVT) ? MaxEltVT : EltVT; + } } + }; - // Return the new vector + // Ensure all the operands are the same value type, fill any missing + // operands with UNDEF and create the BUILD_VECTOR. + auto CanonicalizeBuildVector = [&](SmallVectorImpl<SDValue> &Ops) { + assert(Ops.size() == NumElts && "Unexpected vector size"); + for (SDValue &Op : Ops) { + if (Op) + Op = VT.isInteger() ? DAG.getAnyExtOrTrunc(Op, DL, MaxEltVT) : Op; + else + Op = DAG.getUNDEF(MaxEltVT); + } return DAG.getBuildVector(VT, DL, Ops); }; - // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially - // be converted to a BUILD_VECTOR). Fill in the Ops vector with the - // vector elements. - SmallVector<SDValue, 8> Ops; + SmallVector<SDValue, 8> Ops(NumElts, SDValue()); + Ops[Elt] = InVal; - // Do not combine these two vectors if the output vector will not replace - // the input vector. - if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) { - Ops.append(InVec->op_begin(), InVec->op_end()); - return UpdateBuildVector(Ops); - } + // Recurse up a INSERT_VECTOR_ELT chain to build a BUILD_VECTOR. + for (SDValue CurVec = InVec; CurVec;) { + // UNDEF - build new BUILD_VECTOR from already inserted operands. + if (CurVec.isUndef()) + return CanonicalizeBuildVector(Ops); - if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR && InVec.hasOneUse()) { - Ops.push_back(InVec.getOperand(0)); - Ops.append(NumElts - 1, DAG.getUNDEF(InVec.getOperand(0).getValueType())); - return UpdateBuildVector(Ops); - } + // BUILD_VECTOR - insert unused operands and build new BUILD_VECTOR. + if (CurVec.getOpcode() == ISD::BUILD_VECTOR && CurVec.hasOneUse()) { + for (unsigned I = 0; I != NumElts; ++I) + AddBuildVectorOp(Ops, CurVec.getOperand(I), I); + return CanonicalizeBuildVector(Ops); + } - if (InVec.isUndef()) { - Ops.append(NumElts, DAG.getUNDEF(InVal.getValueType())); - return UpdateBuildVector(Ops); - } + // SCALAR_TO_VECTOR - insert unused scalar and build new BUILD_VECTOR. + if (CurVec.getOpcode() == ISD::SCALAR_TO_VECTOR && CurVec.hasOneUse()) { + AddBuildVectorOp(Ops, CurVec.getOperand(0), 0); + return CanonicalizeBuildVector(Ops); + } - // If we're inserting into the end of a vector as part of an sequence, see - // if we can create a BUILD_VECTOR by following the sequence back up the - // chain. - if (Elt == (NumElts - 1)) { - SmallVector<SDValue> ReverseInsertions; - ReverseInsertions.push_back(InVal); - - EVT MaxEltVT = InVal.getValueType(); - SDValue CurVec = InVec; - for (unsigned I = 1; I != NumElts; ++I) { - if (CurVec.getOpcode() != ISD::INSERT_VECTOR_ELT || !CurVec.hasOneUse()) - break; + // INSERT_VECTOR_ELT - insert operand and continue up the chain. + if (CurVec.getOpcode() == ISD::INSERT_VECTOR_ELT && CurVec.hasOneUse()) + if (auto *CurIdx = dyn_cast<ConstantSDNode>(CurVec.getOperand(2))) + if (CurIdx->getAPIntValue().ult(NumElts)) { + unsigned Idx = CurIdx->getZExtValue(); + AddBuildVectorOp(Ops, CurVec.getOperand(1), Idx); - auto *CurIdx = dyn_cast<ConstantSDNode>(CurVec.getOperand(2)); - if (!CurIdx || CurIdx->getAPIntValue() != ((NumElts - 1) - I)) - break; - SDValue CurVal = CurVec.getOperand(1); - ReverseInsertions.push_back(CurVal); - if (VT.isInteger()) { - EVT CurValVT = CurVal.getValueType(); - MaxEltVT = MaxEltVT.bitsGE(CurValVT) ? MaxEltVT : CurValVT; - } - CurVec = CurVec.getOperand(0); - } + // Found entire BUILD_VECTOR. + if (all_of(Ops, [](SDValue Op) { return !!Op; })) + return CanonicalizeBuildVector(Ops); - if (ReverseInsertions.size() == NumElts) { - for (unsigned I = 0; I != NumElts; ++I) { - SDValue Val = ReverseInsertions[(NumElts - 1) - I]; - Val = VT.isInteger() ? DAG.getAnyExtOrTrunc(Val, DL, MaxEltVT) : Val; - Ops.push_back(Val); - } - return DAG.getBuildVector(VT, DL, Ops); - } + CurVec = CurVec->getOperand(0); + continue; + } + + // Failed to find a match in the chain - bail. + break; } } @@ -22643,6 +22658,56 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { } } + // If we're not performing a select/blend shuffle, see if we can convert the + // shuffle into a AND node, with all the out-of-lane elements are known zero. + if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) { + bool IsInLaneMask = true; + ArrayRef<int> Mask = SVN->getMask(); + SmallVector<int, 16> ClearMask(NumElts, -1); + APInt DemandedLHS = APInt::getNullValue(NumElts); + APInt DemandedRHS = APInt::getNullValue(NumElts); + for (int I = 0; I != (int)NumElts; ++I) { + int M = Mask[I]; + if (M < 0) + continue; + ClearMask[I] = M == I ? I : (I + NumElts); + IsInLaneMask &= (M == I) || (M == (int)(I + NumElts)); + if (M != I) { + APInt &Demanded = M < (int)NumElts ? DemandedLHS : DemandedRHS; + Demanded.setBit(M % NumElts); + } + } + // TODO: Should we try to mask with N1 as well? + if (!IsInLaneMask && + (!DemandedLHS.isNullValue() || !DemandedRHS.isNullValue()) && + (DemandedLHS.isNullValue() || + DAG.MaskedVectorIsZero(N0, DemandedLHS)) && + (DemandedRHS.isNullValue() || + DAG.MaskedVectorIsZero(N1, DemandedRHS))) { + SDLoc DL(N); + EVT IntVT = VT.changeVectorElementTypeToInteger(); + EVT IntSVT = VT.getVectorElementType().changeTypeToInteger(); + SDValue ZeroElt = DAG.getConstant(0, DL, IntSVT); + SDValue AllOnesElt = DAG.getAllOnesConstant(DL, IntSVT); + SmallVector<SDValue, 16> AndMask(NumElts, DAG.getUNDEF(IntSVT)); + for (int I = 0; I != (int)NumElts; ++I) + if (0 <= Mask[I]) + AndMask[I] = Mask[I] == I ? AllOnesElt : ZeroElt; + + // See if a clear mask is legal instead of going via + // XformToShuffleWithZero which loses UNDEF mask elements. + if (TLI.isVectorClearMaskLegal(ClearMask, IntVT)) + return DAG.getBitcast( + VT, DAG.getVectorShuffle(IntVT, DL, DAG.getBitcast(IntVT, N0), + DAG.getConstant(0, DL, IntVT), ClearMask)); + + if (TLI.isOperationLegalOrCustom(ISD::AND, IntVT)) + return DAG.getBitcast( + VT, DAG.getNode(ISD::AND, DL, IntVT, DAG.getBitcast(IntVT, N0), + DAG.getBuildVector(IntVT, DL, AndMask))); + } + } + // Attempt to combine a shuffle of 2 inputs of 'scalar sources' - // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR. if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) @@ -23385,10 +23450,14 @@ static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG, int Index0, Index1; SDValue Src0 = DAG.getSplatSourceVector(N0, Index0); SDValue Src1 = DAG.getSplatSourceVector(N1, Index1); + // Extract element from splat_vector should be free. + // TODO: use DAG.isSplatValue instead? + bool IsBothSplatVector = N0.getOpcode() == ISD::SPLAT_VECTOR && + N1.getOpcode() == ISD::SPLAT_VECTOR; if (!Src0 || !Src1 || Index0 != Index1 || Src0.getValueType().getVectorElementType() != EltVT || Src1.getValueType().getVectorElementType() != EltVT || - !TLI.isExtractVecEltCheap(VT, Index0) || + !(IsBothSplatVector || TLI.isExtractVecEltCheap(VT, Index0)) || !TLI.isOperationLegalOrCustom(Opcode, EltVT)) return SDValue(); @@ -23410,6 +23479,8 @@ static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG, } // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index + if (VT.isScalableVector()) + return DAG.getSplatVector(VT, DL, ScalarBO); SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), ScalarBO); return DAG.getBuildVector(VT, DL, Ops); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 8bdc9410d131..56d35dfe8701 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1404,17 +1404,21 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { } SDValue NewLoad; + Align ElementAlignment = + std::min(cast<StoreSDNode>(Ch)->getAlign(), + DAG.getDataLayout().getPrefTypeAlign( + Op.getValueType().getTypeForEVT(*DAG.getContext()))); if (Op.getValueType().isVector()) { StackPtr = TLI.getVectorSubVecPointer(DAG, StackPtr, VecVT, Op.getValueType(), Idx); - NewLoad = - DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, MachinePointerInfo()); + NewLoad = DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, + MachinePointerInfo(), ElementAlignment); } else { StackPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx); NewLoad = DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr, - MachinePointerInfo(), - VecVT.getVectorElementType()); + MachinePointerInfo(), VecVT.getVectorElementType(), + ElementAlignment); } // Replace the chain going out of the store, by the one out of the load. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 6c136bdfc652..b2df67f45c72 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -2918,6 +2918,9 @@ bool DAGTypeLegalizer::SoftPromoteHalfOperand(SDNode *N, unsigned OpNo) { case ISD::STACKMAP: Res = SoftPromoteHalfOp_STACKMAP(N, OpNo); break; + case ISD::PATCHPOINT: + Res = SoftPromoteHalfOp_PATCHPOINT(N, OpNo); + break; } if (!Res.getNode()) @@ -3059,3 +3062,18 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfOp_STACKMAP(SDNode *N, unsigned OpNo) { return SDValue(); // Signal that we replaced the node ourselves. } + +SDValue DAGTypeLegalizer::SoftPromoteHalfOp_PATCHPOINT(SDNode *N, + unsigned OpNo) { + assert(OpNo >= 7); + SmallVector<SDValue> NewOps(N->ops().begin(), N->ops().end()); + SDValue Op = N->getOperand(OpNo); + NewOps[OpNo] = GetSoftPromotedHalf(Op); + SDValue NewNode = + DAG.getNode(N->getOpcode(), SDLoc(N), N->getVTList(), NewOps); + + for (unsigned ResNum = 0; ResNum < N->getNumValues(); ResNum++) + ReplaceValueWith(SDValue(N, ResNum), NewNode.getValue(ResNum)); + + return SDValue(); // Signal that we replaced the node ourselves. +} diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 343722a97c3c..228d4a43ccde 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -1727,6 +1727,13 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::STACKMAP: Res = PromoteIntOp_STACKMAP(N, OpNo); break; + case ISD::PATCHPOINT: + Res = PromoteIntOp_PATCHPOINT(N, OpNo); + break; + case ISD::EXPERIMENTAL_VP_STRIDED_LOAD: + case ISD::EXPERIMENTAL_VP_STRIDED_STORE: + Res = PromoteIntOp_VP_STRIDED(N, OpNo); + break; } // If the result is null, the sub-method took care of registering results etc. @@ -2341,6 +2348,25 @@ SDValue DAGTypeLegalizer::PromoteIntOp_STACKMAP(SDNode *N, unsigned OpNo) { return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); } +SDValue DAGTypeLegalizer::PromoteIntOp_PATCHPOINT(SDNode *N, unsigned OpNo) { + assert(OpNo >= 7); + SmallVector<SDValue> NewOps(N->ops().begin(), N->ops().end()); + SDValue Operand = N->getOperand(OpNo); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Operand.getValueType()); + NewOps[OpNo] = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), NVT, Operand); + return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_VP_STRIDED(SDNode *N, unsigned OpNo) { + assert((N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_LOAD && OpNo == 3) || + (N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_STORE && OpNo == 4)); + + SmallVector<SDValue, 8> NewOps(N->op_begin(), N->op_end()); + NewOps[OpNo] = SExtPromotedInteger(N->getOperand(OpNo)); + + return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); +} + //===----------------------------------------------------------------------===// // Integer Result Expansion //===----------------------------------------------------------------------===// @@ -2886,11 +2912,15 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, if (N->getOpcode() == ISD::ADD) { Lo = DAG.getNode(ISD::UADDO, dl, VTList, LoOps); HiOps[2] = Lo.getValue(1); - Hi = DAG.getNode(ISD::ADDCARRY, dl, VTList, HiOps); + Hi = DAG.computeKnownBits(HiOps[2]).isZero() + ? DAG.getNode(ISD::UADDO, dl, VTList, makeArrayRef(HiOps, 2)) + : DAG.getNode(ISD::ADDCARRY, dl, VTList, HiOps); } else { Lo = DAG.getNode(ISD::USUBO, dl, VTList, LoOps); HiOps[2] = Lo.getValue(1); - Hi = DAG.getNode(ISD::SUBCARRY, dl, VTList, HiOps); + Hi = DAG.computeKnownBits(HiOps[2]).isZero() + ? DAG.getNode(ISD::USUBO, dl, VTList, makeArrayRef(HiOps, 2)) + : DAG.getNode(ISD::SUBCARRY, dl, VTList, HiOps); } return; } @@ -4693,6 +4723,13 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::STACKMAP: Res = ExpandIntOp_STACKMAP(N, OpNo); break; + case ISD::PATCHPOINT: + Res = ExpandIntOp_PATCHPOINT(N, OpNo); + break; + case ISD::EXPERIMENTAL_VP_STRIDED_LOAD: + case ISD::EXPERIMENTAL_VP_STRIDED_STORE: + Res = ExpandIntOp_VP_STRIDED(N, OpNo); + break; } // If the result is null, the sub-method took care of registering results etc. @@ -5108,6 +5145,17 @@ SDValue DAGTypeLegalizer::ExpandIntOp_ATOMIC_STORE(SDNode *N) { return Swap.getValue(1); } +SDValue DAGTypeLegalizer::ExpandIntOp_VP_STRIDED(SDNode *N, unsigned OpNo) { + assert((N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_LOAD && OpNo == 3) || + (N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_STORE && OpNo == 4)); + + SDValue Hi; // The upper half is dropped out. + SmallVector<SDValue, 8> NewOps(N->op_begin(), N->op_end()); + GetExpandedInteger(NewOps[OpNo], NewOps[OpNo], Hi); + + return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); +} + SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_SPLICE(SDNode *N) { SDLoc dl(N); @@ -5253,21 +5301,28 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_VECTOR(SDNode *N) { assert(NOutVT.isVector() && "This type must be promoted to a vector type"); unsigned NumElems = N->getNumOperands(); EVT NOutVTElem = NOutVT.getVectorElementType(); - + TargetLoweringBase::BooleanContent NOutBoolType = TLI.getBooleanContents(NOutVT); + unsigned NOutExtOpc = TargetLowering::getExtendForContent(NOutBoolType); SDLoc dl(N); SmallVector<SDValue, 8> Ops; Ops.reserve(NumElems); for (unsigned i = 0; i != NumElems; ++i) { - SDValue Op; + SDValue Op = N->getOperand(i); + EVT OpVT = Op.getValueType(); // BUILD_VECTOR integer operand types are allowed to be larger than the // result's element type. This may still be true after the promotion. For // example, we might be promoting (<v?i1> = BV <i32>, <i32>, ...) to // (v?i16 = BV <i32>, <i32>, ...), and we can't any_extend <i32> to <i16>. - if (N->getOperand(i).getValueType().bitsLT(NOutVTElem)) - Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, N->getOperand(i)); - else - Op = N->getOperand(i); + if (OpVT.bitsLT(NOutVTElem)) { + unsigned ExtOpc = ISD::ANY_EXTEND; + // Attempt to extend constant bool vectors to match target's BooleanContent. + // While not necessary, this improves chances of the constant correctly + // folding with compare results (e.g. for NOT patterns). + if (OpVT == MVT::i1 && Op.getOpcode() == ISD::Constant) + ExtOpc = NOutExtOpc; + Op = DAG.getNode(ExtOpc, dl, NOutVTElem, Op); + } Ops.push_back(Op); } @@ -5524,30 +5579,67 @@ SDValue DAGTypeLegalizer::PromoteIntOp_CONCAT_VECTORS(SDNode *N) { SDValue DAGTypeLegalizer::ExpandIntOp_STACKMAP(SDNode *N, unsigned OpNo) { assert(OpNo > 1); - SDValue Op = N->getOperand(OpNo); - SDLoc DL = SDLoc(N); + + // FIXME: Non-constant operands are not yet handled: + // - https://github.com/llvm/llvm-project/issues/26431 + // - https://github.com/llvm/llvm-project/issues/55957 + ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op); + if (!CN) + return SDValue(); + + // Copy operands before the one being expanded. SmallVector<SDValue> NewOps; + for (unsigned I = 0; I < OpNo; I++) + NewOps.push_back(N->getOperand(I)); + + EVT Ty = Op.getValueType(); + SDLoc DL = SDLoc(N); + if (CN->getConstantIntValue()->getValue().getActiveBits() < 64) { + NewOps.push_back( + DAG.getTargetConstant(StackMaps::ConstantOp, DL, MVT::i64)); + NewOps.push_back(DAG.getTargetConstant(CN->getZExtValue(), DL, Ty)); + } else { + // FIXME: https://github.com/llvm/llvm-project/issues/55609 + return SDValue(); + } + + // Copy remaining operands. + for (unsigned I = OpNo + 1; I < N->getNumOperands(); I++) + NewOps.push_back(N->getOperand(I)); + + SDValue NewNode = DAG.getNode(N->getOpcode(), DL, N->getVTList(), NewOps); + + for (unsigned ResNum = 0; ResNum < N->getNumValues(); ResNum++) + ReplaceValueWith(SDValue(N, ResNum), NewNode.getValue(ResNum)); + + return SDValue(); // Signal that we have replaced the node already. +} + +SDValue DAGTypeLegalizer::ExpandIntOp_PATCHPOINT(SDNode *N, unsigned OpNo) { + assert(OpNo >= 7); + SDValue Op = N->getOperand(OpNo); + + // FIXME: Non-constant operands are not yet handled: + // - https://github.com/llvm/llvm-project/issues/26431 + // - https://github.com/llvm/llvm-project/issues/55957 + ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op); + if (!CN) + return SDValue(); // Copy operands before the one being expanded. + SmallVector<SDValue> NewOps; for (unsigned I = 0; I < OpNo; I++) NewOps.push_back(N->getOperand(I)); - if (Op->getOpcode() == ISD::Constant) { - ConstantSDNode *CN = cast<ConstantSDNode>(Op); - EVT Ty = Op.getValueType(); - if (CN->getConstantIntValue()->getValue().getActiveBits() < 64) { - NewOps.push_back( - DAG.getTargetConstant(StackMaps::ConstantOp, DL, MVT::i64)); - NewOps.push_back(DAG.getTargetConstant(CN->getZExtValue(), DL, Ty)); - } else { - // FIXME: https://github.com/llvm/llvm-project/issues/55609 - return SDValue(); - } + EVT Ty = Op.getValueType(); + SDLoc DL = SDLoc(N); + if (CN->getConstantIntValue()->getValue().getActiveBits() < 64) { + NewOps.push_back( + DAG.getTargetConstant(StackMaps::ConstantOp, DL, MVT::i64)); + NewOps.push_back(DAG.getTargetConstant(CN->getZExtValue(), DL, Ty)); } else { - // FIXME: Non-constant operands are not yet handled: - // - https://github.com/llvm/llvm-project/issues/26431 - // - https://github.com/llvm/llvm-project/issues/55957 + // FIXME: https://github.com/llvm/llvm-project/issues/55609 return SDValue(); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 2807b7f5ae68..6696b79cf885 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -403,6 +403,8 @@ private: SDValue PromoteIntOp_VP_REDUCE(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_SET_ROUNDING(SDNode *N); SDValue PromoteIntOp_STACKMAP(SDNode *N, unsigned OpNo); + SDValue PromoteIntOp_PATCHPOINT(SDNode *N, unsigned OpNo); + SDValue PromoteIntOp_VP_STRIDED(SDNode *N, unsigned OpNo); void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code); @@ -495,6 +497,8 @@ private: SDValue ExpandIntOp_ATOMIC_STORE(SDNode *N); SDValue ExpandIntOp_SPLAT_VECTOR(SDNode *N); SDValue ExpandIntOp_STACKMAP(SDNode *N, unsigned OpNo); + SDValue ExpandIntOp_PATCHPOINT(SDNode *N, unsigned OpNo); + SDValue ExpandIntOp_VP_STRIDED(SDNode *N, unsigned OpNo); void IntegerExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &dl); @@ -744,6 +748,7 @@ private: SDValue SoftPromoteHalfOp_SELECT_CC(SDNode *N, unsigned OpNo); SDValue SoftPromoteHalfOp_STORE(SDNode *N, unsigned OpNo); SDValue SoftPromoteHalfOp_STACKMAP(SDNode *N, unsigned OpNo); + SDValue SoftPromoteHalfOp_PATCHPOINT(SDNode *N, unsigned OpNo); //===--------------------------------------------------------------------===// // Scalarization Support: LegalizeVectorTypes.cpp diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 842ffa2aa23e..f5a1eae1e7fe 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -737,6 +737,20 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) { case ISD::SELECT: Results.push_back(ExpandSELECT(Node)); return; + case ISD::SELECT_CC: { + if (Node->getValueType(0).isScalableVector()) { + EVT CondVT = TLI.getSetCCResultType( + DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0)); + SDValue SetCC = + DAG.getNode(ISD::SETCC, SDLoc(Node), CondVT, Node->getOperand(0), + Node->getOperand(1), Node->getOperand(4)); + Results.push_back(DAG.getSelect(SDLoc(Node), Node->getValueType(0), SetCC, + Node->getOperand(2), + Node->getOperand(3))); + return; + } + break; + } case ISD::FP_TO_UINT: ExpandFP_TO_UINT(Node, Results); return; @@ -833,6 +847,16 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) { return; } break; + case ISD::FP_TO_SINT_SAT: + case ISD::FP_TO_UINT_SAT: + // Expand the fpsosisat if it is scalable to prevent it from unrolling below. + if (Node->getValueType(0).isScalableVector()) { + if (SDValue Expanded = TLI.expandFP_TO_INT_SAT(Node, DAG)) { + Results.push_back(Expanded); + return; + } + } + break; case ISD::SMULFIX: case ISD::UMULFIX: if (SDValue Expanded = TLI.expandFixedPointMul(Node, DAG)) { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index 78fc407e9573..3ac2a7bddc5a 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -793,7 +793,7 @@ ScheduleDAGLinearize::EmitSchedule(MachineBasicBlock::iterator &InsertPos) { // Emit any debug values associated with the node. if (N->getHasDebugValue()) { MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos(); - for (auto DV : DAG->GetDbgValues(N)) { + for (auto *DV : DAG->GetDbgValues(N)) { if (!DV->isEmitted()) if (auto *DbgMI = Emitter.EmitDbgValue(DV, VRBaseMap)) BB->insert(InsertPos, DbgMI); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 2a10157b404e..5166db033c62 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -749,7 +749,7 @@ ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, // source order number as N. MachineBasicBlock *BB = Emitter.getBlock(); MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos(); - for (auto DV : DAG->GetDbgValues(N)) { + for (auto *DV : DAG->GetDbgValues(N)) { if (DV->isEmitted()) continue; unsigned DVOrder = DV->getOrder(); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index c8d0f5faf647..441437351852 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -24,6 +24,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/Analysis.h" @@ -602,7 +603,7 @@ static void AddNodeIDValueTypes(FoldingSetNodeID &ID, SDVTList VTList) { /// AddNodeIDOperands - Various routines for adding operands to the NodeID data. static void AddNodeIDOperands(FoldingSetNodeID &ID, ArrayRef<SDValue> Ops) { - for (auto& Op : Ops) { + for (const auto &Op : Ops) { ID.AddPointer(Op.getNode()); ID.AddInteger(Op.getResNo()); } @@ -611,7 +612,7 @@ static void AddNodeIDOperands(FoldingSetNodeID &ID, /// AddNodeIDOperands - Various routines for adding operands to the NodeID data. static void AddNodeIDOperands(FoldingSetNodeID &ID, ArrayRef<SDUse> Ops) { - for (auto& Op : Ops) { + for (const auto &Op : Ops) { ID.AddPointer(Op.getNode()); ID.AddInteger(Op.getResNo()); } @@ -2711,16 +2712,9 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts, SubDemandedElts &= ScaledDemandedElts; if (!isSplatValue(Src, SubDemandedElts, SubUndefElts, Depth + 1)) return false; - - // Here we can't do "MatchAnyBits" operation merge for undef bits. - // Because some operation only use part value of the source. - // Take llvm.fshl.* for example: - // t1: v4i32 = Constant:i32<12>, undef:i32, Constant:i32<12>, undef:i32 - // t2: v2i64 = bitcast t1 - // t5: v2i64 = fshl t3, t4, t2 - // We can not convert t2 to {i64 undef, i64 undef} - UndefElts |= APIntOps::ScaleBitMask(SubUndefElts, NumElts, - /*MatchAllBits=*/true); + // TODO: Add support for merging sub undef elements. + if (!SubUndefElts.isZero()) + return false; } return true; } @@ -2947,6 +2941,9 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, unsigned Opcode = Op.getOpcode(); switch (Opcode) { + case ISD::MERGE_VALUES: + return computeKnownBits(Op.getOperand(Op.getResNo()), DemandedElts, + Depth + 1); case ISD::BUILD_VECTOR: // Collect the known bits that are shared by every demanded vector element. Known.Zero.setAllBits(); Known.One.setAllBits(); @@ -3219,12 +3216,6 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known = KnownBits::mulhs(Known, Known2); break; } - case ISD::UDIV: { - Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); - Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); - Known = KnownBits::udiv(Known, Known2); - break; - } case ISD::AVGCEILU: { Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); @@ -3339,6 +3330,38 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known.Zero |= Known2.Zero; } break; + case ISD::SHL_PARTS: + case ISD::SRA_PARTS: + case ISD::SRL_PARTS: { + assert((Op.getResNo() == 0 || Op.getResNo() == 1) && "Unknown result"); + + // Collect lo/hi source values and concatenate. + // TODO: Would a KnownBits::concatBits helper be useful? + unsigned LoBits = Op.getOperand(0).getScalarValueSizeInBits(); + unsigned HiBits = Op.getOperand(1).getScalarValueSizeInBits(); + Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); + Known = Known.anyext(LoBits + HiBits); + Known.insertBits(Known2, LoBits); + + // Collect shift amount. + Known2 = computeKnownBits(Op.getOperand(2), DemandedElts, Depth + 1); + + if (Opcode == ISD::SHL_PARTS) + Known = KnownBits::shl(Known, Known2); + else if (Opcode == ISD::SRA_PARTS) + Known = KnownBits::ashr(Known, Known2); + else // if (Opcode == ISD::SRL_PARTS) + Known = KnownBits::lshr(Known, Known2); + + // TODO: Minimum shift low/high bits are known zero. + + if (Op.getResNo() == 0) + Known = Known.extractBits(LoBits, 0); + else + Known = Known.extractBits(HiBits, LoBits); + break; + } case ISD::SIGN_EXTEND_INREG: { Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); @@ -3570,6 +3593,12 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known = KnownBits::computeForAddCarry(Known, Known2, Carry); break; } + case ISD::UDIV: { + Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); + Known = KnownBits::udiv(Known, Known2); + break; + } case ISD::SREM: { Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); @@ -3925,7 +3954,9 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, case ISD::AssertZext: Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getSizeInBits(); return VTBits-Tmp; - + case ISD::MERGE_VALUES: + return ComputeNumSignBits(Op.getOperand(Op.getResNo()), DemandedElts, + Depth + 1); case ISD::BUILD_VECTOR: Tmp = VTBits; for (unsigned i = 0, e = Op.getNumOperands(); (i < e) && (Tmp > 1); ++i) { @@ -6105,8 +6136,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, assert(N1.getValueType().isVector() == VT.isVector() && "FP_TO_*INT_SAT type should be vector iff the operand type is " "vector!"); - assert((!VT.isVector() || VT.getVectorNumElements() == - N1.getValueType().getVectorNumElements()) && + assert((!VT.isVector() || VT.getVectorElementCount() == + N1.getValueType().getVectorElementCount()) && "Vector element counts must match in FP_TO_*INT_SAT"); assert(!cast<VTSDNode>(N2)->getVT().isVector() && "Type to saturate to must be a scalar."); @@ -6719,7 +6750,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, bool isVol, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, - const AAMDNodes &AAInfo) { + const AAMDNodes &AAInfo, AAResults *AA) { // Turn a memcpy of undef to nop. // FIXME: We need to honor volatile even is Src is undef. if (Src.isUndef()) @@ -6782,6 +6813,11 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, AAMDNodes NewAAInfo = AAInfo; NewAAInfo.TBAA = NewAAInfo.TBAAStruct = nullptr; + const Value *SrcVal = SrcPtrInfo.V.dyn_cast<const Value *>(); + bool isConstant = + AA && SrcVal && + AA->pointsToConstantMemory(MemoryLocation(SrcVal, Size, AAInfo)); + MachineMemOperand::Flags MMOFlags = isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone; SmallVector<SDValue, 16> OutLoadChains; @@ -6843,6 +6879,8 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, MachineMemOperand::Flags SrcMMOFlags = MMOFlags; if (isDereferenceable) SrcMMOFlags |= MachineMemOperand::MODereferenceable; + if (isConstant) + SrcMMOFlags |= MachineMemOperand::MOInvariant; Value = DAG.getExtLoad( ISD::EXTLOAD, dl, NVT, Chain, @@ -7131,7 +7169,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, - const AAMDNodes &AAInfo) { + const AAMDNodes &AAInfo, AAResults *AA) { // Check to see if we should lower the memcpy to loads and stores first. // For cases within the target-specified limits, this is the best choice. ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); @@ -7142,7 +7180,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Result = getMemcpyLoadsAndStores( *this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), Alignment, - isVol, false, DstPtrInfo, SrcPtrInfo, AAInfo); + isVol, false, DstPtrInfo, SrcPtrInfo, AAInfo, AA); if (Result.getNode()) return Result; } @@ -7161,9 +7199,9 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, // use a (potentially long) sequence of loads and stores. if (AlwaysInline) { assert(ConstantSize && "AlwaysInline requires a constant size!"); - return getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src, - ConstantSize->getZExtValue(), Alignment, - isVol, true, DstPtrInfo, SrcPtrInfo, AAInfo); + return getMemcpyLoadsAndStores( + *this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), Alignment, + isVol, true, DstPtrInfo, SrcPtrInfo, AAInfo, AA); } checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace()); @@ -7245,7 +7283,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst, bool isVol, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, - const AAMDNodes &AAInfo) { + const AAMDNodes &AAInfo, AAResults *AA) { // Check to see if we should lower the memmove to loads and stores first. // For cases within the target-specified limits, this is the best choice. ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); @@ -8904,7 +8942,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, } #ifndef NDEBUG - for (auto &Op : Ops) + for (const auto &Op : Ops) assert(Op.getOpcode() != ISD::DELETED_NODE && "Operand is DELETED_NODE!"); #endif @@ -8928,6 +8966,11 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, "True and False arms of SelectCC must have same type!"); assert(Ops[2].getValueType() == VT && "select_cc node must be of same type as true and false value!"); + assert((!Ops[0].getValueType().isVector() || + Ops[0].getValueType().getVectorElementCount() == + VT.getVectorElementCount()) && + "Expected select_cc with vector result to have the same sized " + "comparison type!"); break; case ISD::BR_CC: assert(NumOps == 5 && "BR_CC takes 5 operands!"); @@ -9018,12 +9061,34 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, return getNode(Opcode, DL, VTList.VTs[0], Ops, Flags); #ifndef NDEBUG - for (auto &Op : Ops) + for (const auto &Op : Ops) assert(Op.getOpcode() != ISD::DELETED_NODE && "Operand is DELETED_NODE!"); #endif switch (Opcode) { + case ISD::SADDO: + case ISD::UADDO: + case ISD::SSUBO: + case ISD::USUBO: { + assert(VTList.NumVTs == 2 && Ops.size() == 2 && + "Invalid add/sub overflow op!"); + assert(VTList.VTs[0].isInteger() && VTList.VTs[1].isInteger() && + Ops[0].getValueType() == Ops[1].getValueType() && + Ops[0].getValueType() == VTList.VTs[0] && + "Binary operator types must match!"); + SDValue N1 = Ops[0], N2 = Ops[1]; + canonicalizeCommutativeBinop(Opcode, N1, N2); + + // (X +- 0) -> X with zero-overflow. + ConstantSDNode *N2CV = isConstOrConstSplat(N2, /*AllowUndefs*/ false, + /*AllowTruncation*/ true); + if (N2CV && N2CV->isZero()) { + SDValue ZeroOverFlow = getConstant(0, DL, VTList.VTs[1]); + return getNode(ISD::MERGE_VALUES, DL, VTList, {N1, ZeroOverFlow}, Flags); + } + break; + } case ISD::STRICT_FP_EXTEND: assert(VTList.NumVTs == 2 && Ops.size() == 2 && "Invalid STRICT_FP_EXTEND!"); @@ -9914,7 +9979,7 @@ void SelectionDAG::salvageDebugInfo(SDNode &N) { return; SmallVector<SDDbgValue *, 2> ClonedDVs; - for (auto DV : GetDbgValues(&N)) { + for (auto *DV : GetDbgValues(&N)) { if (DV->isInvalidated()) continue; switch (N.getOpcode()) { @@ -10268,7 +10333,7 @@ bool SelectionDAG::calculateDivergence(SDNode *N) { } if (TLI->isSDNodeSourceOfDivergence(N, FLI, DA)) return true; - for (auto &Op : N->ops()) { + for (const auto &Op : N->ops()) { if (Op.Val.getValueType() != MVT::Other && Op.getNode()->isDivergent()) return true; } @@ -10298,7 +10363,7 @@ void SelectionDAG::CreateTopologicalOrder(std::vector<SDNode *> &Order) { } for (size_t I = 0; I != Order.size(); ++I) { SDNode *N = Order[I]; - for (auto U : N->uses()) { + for (auto *U : N->uses()) { unsigned &UnsortedOps = Degree[U]; if (0 == --UnsortedOps) Order.push_back(U); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index fe3c38ec590d..35650b9bd00e 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -1789,7 +1789,7 @@ static void findWasmUnwindDestinations( UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob); UnwindDests.back().first->setIsEHScopeEntry(); break; - } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) { + } else if (const auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) { // Add the catchpad handlers to the possible destinations. We don't // continue to the unwind destination of the catchswitch for wasm. for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) { @@ -1844,7 +1844,7 @@ static void findUnwindDestinations( UnwindDests.back().first->setIsEHScopeEntry(); UnwindDests.back().first->setIsEHFuncletEntry(); break; - } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) { + } else if (const auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) { // Add the catchpad handlers to the possible destinations. for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) { UnwindDests.emplace_back(FuncInfo.MBBMap[CatchPadBB], Prob); @@ -2990,14 +2990,20 @@ void SelectionDAGBuilder::visitCallBr(const CallBrInst &I) { CopyToExportRegsIfNeeded(&I); // Retrieve successors. + SmallPtrSet<BasicBlock *, 8> Dests; + Dests.insert(I.getDefaultDest()); MachineBasicBlock *Return = FuncInfo.MBBMap[I.getDefaultDest()]; // Update successor info. addSuccessorWithProb(CallBrMBB, Return, BranchProbability::getOne()); for (unsigned i = 0, e = I.getNumIndirectDests(); i < e; ++i) { - MachineBasicBlock *Target = FuncInfo.MBBMap[I.getIndirectDest(i)]; - addSuccessorWithProb(CallBrMBB, Target, BranchProbability::getZero()); + BasicBlock *Dest = I.getIndirectDest(i); + MachineBasicBlock *Target = FuncInfo.MBBMap[Dest]; Target->setIsInlineAsmBrIndirectTarget(); + Target->setHasAddressTaken(); + // Don't add duplicate machine successors. + if (Dests.insert(Dest).second) + addSuccessorWithProb(CallBrMBB, Target, BranchProbability::getZero()); } CallBrMBB->normalizeSuccProbs(); @@ -4075,6 +4081,8 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { return; bool isVolatile = I.isVolatile(); + MachineMemOperand::Flags MMOFlags = + TLI.getLoadMemOperandFlags(I, DAG.getDataLayout()); SDValue Root; bool ConstantMemory = false; @@ -4091,6 +4099,12 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { // Do not serialize (non-volatile) loads of constant memory with anything. Root = DAG.getEntryNode(); ConstantMemory = true; + MMOFlags |= MachineMemOperand::MOInvariant; + + // FIXME: pointsToConstantMemory probably does not imply dereferenceable, + // but the previous usage implied it did. Probably should check + // isDereferenceableAndAlignedPointer. + MMOFlags |= MachineMemOperand::MODereferenceable; } else { // Do not serialize non-volatile loads against each other. Root = DAG.getRoot(); @@ -4110,9 +4124,6 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues)); EVT PtrVT = Ptr.getValueType(); - MachineMemOperand::Flags MMOFlags - = TLI.getLoadMemOperandFlags(I, DAG.getDataLayout()); - unsigned ChainI = 0; for (unsigned i = 0; i != NumValues; ++i, ++ChainI) { // Serializing loads here may result in excessive register pressure, and @@ -5766,7 +5777,7 @@ static const CallBase *FindPreallocatedCall(const Value *PreallocatedSetup) { ->getCalledFunction() ->getIntrinsicID() == Intrinsic::call_preallocated_setup && "expected call_preallocated_setup Value"); - for (auto *U : PreallocatedSetup->users()) { + for (const auto *U : PreallocatedSetup->users()) { auto *UseCall = cast<CallBase>(U); const Function *Fn = UseCall->getCalledFunction(); if (!Fn || Fn->getIntrinsicID() != Intrinsic::call_preallocated_arg) { @@ -5859,11 +5870,10 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, // FIXME: Support passing different dest/src alignments to the memcpy DAG // node. SDValue Root = isVol ? getRoot() : getMemoryRoot(); - SDValue MC = DAG.getMemcpy(Root, sdl, Op1, Op2, Op3, Alignment, isVol, - /* AlwaysInline */ false, isTC, - MachinePointerInfo(I.getArgOperand(0)), - MachinePointerInfo(I.getArgOperand(1)), - I.getAAMetadata()); + SDValue MC = DAG.getMemcpy( + Root, sdl, Op1, Op2, Op3, Alignment, isVol, + /* AlwaysInline */ false, isTC, MachinePointerInfo(I.getArgOperand(0)), + MachinePointerInfo(I.getArgOperand(1)), I.getAAMetadata(), AA); updateDAGForMaybeTailCall(MC); return; } @@ -5881,11 +5891,10 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget()); // FIXME: Support passing different dest/src alignments to the memcpy DAG // node. - SDValue MC = DAG.getMemcpy(getRoot(), sdl, Dst, Src, Size, Alignment, isVol, - /* AlwaysInline */ true, isTC, - MachinePointerInfo(I.getArgOperand(0)), - MachinePointerInfo(I.getArgOperand(1)), - I.getAAMetadata()); + SDValue MC = DAG.getMemcpy( + getRoot(), sdl, Dst, Src, Size, Alignment, isVol, + /* AlwaysInline */ true, isTC, MachinePointerInfo(I.getArgOperand(0)), + MachinePointerInfo(I.getArgOperand(1)), I.getAAMetadata(), AA); updateDAGForMaybeTailCall(MC); return; } @@ -5940,7 +5949,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, SDValue MM = DAG.getMemmove(Root, sdl, Op1, Op2, Op3, Alignment, isVol, isTC, MachinePointerInfo(I.getArgOperand(0)), MachinePointerInfo(I.getArgOperand(1)), - I.getAAMetadata()); + I.getAAMetadata(), AA); updateDAGForMaybeTailCall(MM); return; } @@ -8855,7 +8864,8 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call, } break; - case InlineAsm::isInput: { + case InlineAsm::isInput: + case InlineAsm::isLabel: { SDValue InOperandVal = OpInfo.CallOperand; if (OpInfo.isMatchingInputConstraint()) { @@ -9295,19 +9305,18 @@ void SelectionDAGBuilder::populateCallLoweringInfo( static void addStackMapLiveVars(const CallBase &Call, unsigned StartIdx, const SDLoc &DL, SmallVectorImpl<SDValue> &Ops, SelectionDAGBuilder &Builder) { - for (unsigned i = StartIdx, e = Call.arg_size(); i != e; ++i) { - SDValue OpVal = Builder.getValue(Call.getArgOperand(i)); - if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(OpVal)) { - Ops.push_back( - Builder.DAG.getTargetConstant(StackMaps::ConstantOp, DL, MVT::i64)); - Ops.push_back( - Builder.DAG.getTargetConstant(C->getSExtValue(), DL, MVT::i64)); - } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(OpVal)) { - const TargetLowering &TLI = Builder.DAG.getTargetLoweringInfo(); - Ops.push_back(Builder.DAG.getTargetFrameIndex( - FI->getIndex(), TLI.getFrameIndexTy(Builder.DAG.getDataLayout()))); - } else - Ops.push_back(OpVal); + SelectionDAG &DAG = Builder.DAG; + for (unsigned I = StartIdx; I < Call.arg_size(); I++) { + SDValue Op = Builder.getValue(Call.getArgOperand(I)); + + // Things on the stack are pointer-typed, meaning that they are already + // legal and can be emitted directly to target nodes. + if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op)) { + Ops.push_back(DAG.getTargetFrameIndex(FI->getIndex(), Op.getValueType())); + } else { + // Otherwise emit a target independent node to be legalised. + Ops.push_back(Builder.getValue(Call.getArgOperand(I))); + } } } @@ -9359,20 +9368,7 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { Ops.push_back(ShadConst); // Add the live variables. - for (unsigned I = 2; I < CI.arg_size(); I++) { - SDValue Op = getValue(CI.getArgOperand(I)); - - // Things on the stack are pointer-typed, meaning that they are already - // legal and can be emitted directly to target nodes. - if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op)) { - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - Ops.push_back(DAG.getTargetFrameIndex( - FI->getIndex(), TLI.getFrameIndexTy(DAG.getDataLayout()))); - } else { - // Otherwise emit a target independent node to be legalised. - Ops.push_back(getValue(CI.getArgOperand(I))); - } - } + addStackMapLiveVars(CI, 2, DL, Ops, *this); // Create the STACKMAP node. SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); @@ -9449,6 +9445,19 @@ void SelectionDAGBuilder::visitPatchpoint(const CallBase &CB, // Replace the target specific call node with the patchable intrinsic. SmallVector<SDValue, 8> Ops; + // Push the chain. + Ops.push_back(*(Call->op_begin())); + + // Optionally, push the glue (if any). + if (HasGlue) + Ops.push_back(*(Call->op_end() - 1)); + + // Push the register mask info. + if (HasGlue) + Ops.push_back(*(Call->op_end() - 2)); + else + Ops.push_back(*(Call->op_end() - 1)); + // Add the <id> and <numBytes> constants. SDValue IDVal = getValue(CB.getArgOperand(PatchPointOpers::IDPos)); Ops.push_back(DAG.getTargetConstant( @@ -9477,27 +9486,13 @@ void SelectionDAGBuilder::visitPatchpoint(const CallBase &CB, for (unsigned i = NumMetaOpers, e = NumMetaOpers + NumArgs; i != e; ++i) Ops.push_back(getValue(CB.getArgOperand(i))); - // Push the arguments from the call instruction up to the register mask. + // Push the arguments from the call instruction. SDNode::op_iterator e = HasGlue ? Call->op_end()-2 : Call->op_end()-1; Ops.append(Call->op_begin() + 2, e); // Push live variables for the stack map. addStackMapLiveVars(CB, NumMetaOpers + NumArgs, dl, Ops, *this); - // Push the register mask info. - if (HasGlue) - Ops.push_back(*(Call->op_end()-2)); - else - Ops.push_back(*(Call->op_end()-1)); - - // Push the chain (this is originally the first operand of the call, but - // becomes now the last or second to last operand). - Ops.push_back(*(Call->op_begin())); - - // Push the glue flag (last operand). - if (HasGlue) - Ops.push_back(*(Call->op_end()-1)); - SDVTList NodeTys; if (IsAnyRegCC && HasDef) { // Create the return types based on the intrinsic definition @@ -9514,13 +9509,12 @@ void SelectionDAGBuilder::visitPatchpoint(const CallBase &CB, NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); // Replace the target specific call node with a PATCHPOINT node. - MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::PATCHPOINT, - dl, NodeTys, Ops); + SDValue PPV = DAG.getNode(ISD::PATCHPOINT, dl, NodeTys, Ops); // Update the NodeMap. if (HasDef) { if (IsAnyRegCC) - setValue(&CB, SDValue(MN, 0)); + setValue(&CB, SDValue(PPV.getNode(), 0)); else setValue(&CB, Result.first); } @@ -9531,10 +9525,10 @@ void SelectionDAGBuilder::visitPatchpoint(const CallBase &CB, // value. if (IsAnyRegCC && HasDef) { SDValue From[] = {SDValue(Call, 0), SDValue(Call, 1)}; - SDValue To[] = {SDValue(MN, 1), SDValue(MN, 2)}; + SDValue To[] = {PPV.getValue(1), PPV.getValue(2)}; DAG.ReplaceAllUsesOfValuesWith(From, To, 2); } else - DAG.ReplaceAllUsesWith(Call, MN); + DAG.ReplaceAllUsesWith(Call, PPV.getNode()); DAG.DeleteNode(Call); // Inform the Frame Information that we have a patchpoint in this function. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 9df0b64c26c3..6ba01664e756 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -488,6 +488,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::VECREDUCE_FMIN: return "vecreduce_fmin"; case ISD::STACKMAP: return "stackmap"; + case ISD::PATCHPOINT: + return "patchpoint"; // Vector Predication #define BEGIN_REGISTER_VP_SDNODE(SDID, LEGALARG, NAME, ...) \ diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 7f453f081982..d46a0a23cca3 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -2193,8 +2193,27 @@ void SelectionDAGISel::Select_ARITH_FENCE(SDNode *N) { N->getOperand(0)); } +void SelectionDAGISel::pushStackMapLiveVariable(SmallVectorImpl<SDValue> &Ops, + SDValue OpVal, SDLoc DL) { + SDNode *OpNode = OpVal.getNode(); + + // FrameIndex nodes should have been directly emitted to TargetFrameIndex + // nodes at DAG-construction time. + assert(OpNode->getOpcode() != ISD::FrameIndex); + + if (OpNode->getOpcode() == ISD::Constant) { + Ops.push_back( + CurDAG->getTargetConstant(StackMaps::ConstantOp, DL, MVT::i64)); + Ops.push_back( + CurDAG->getTargetConstant(cast<ConstantSDNode>(OpNode)->getZExtValue(), + DL, OpVal.getValueType())); + } else { + Ops.push_back(OpVal); + } +} + void SelectionDAGISel::Select_STACKMAP(SDNode *N) { - std::vector<SDValue> Ops; + SmallVector<SDValue, 32> Ops; auto *It = N->op_begin(); SDLoc DL(N); @@ -2213,24 +2232,8 @@ void SelectionDAGISel::Select_STACKMAP(SDNode *N) { Ops.push_back(Shad); // Live variable operands. - for (; It != N->op_end(); It++) { - SDNode *OpNode = It->getNode(); - SDValue O; - - // FrameIndex nodes should have been directly emitted to TargetFrameIndex - // nodes at DAG-construction time. - assert(OpNode->getOpcode() != ISD::FrameIndex); - - if (OpNode->getOpcode() == ISD::Constant) { - Ops.push_back( - CurDAG->getTargetConstant(StackMaps::ConstantOp, DL, MVT::i64)); - O = CurDAG->getTargetConstant( - cast<ConstantSDNode>(OpNode)->getZExtValue(), DL, It->getValueType()); - } else { - O = *It; - } - Ops.push_back(O); - } + for (; It != N->op_end(); It++) + pushStackMapLiveVariable(Ops, *It, DL); Ops.push_back(Chain); Ops.push_back(InFlag); @@ -2239,6 +2242,57 @@ void SelectionDAGISel::Select_STACKMAP(SDNode *N) { CurDAG->SelectNodeTo(N, TargetOpcode::STACKMAP, NodeTys, Ops); } +void SelectionDAGISel::Select_PATCHPOINT(SDNode *N) { + SmallVector<SDValue, 32> Ops; + auto *It = N->op_begin(); + SDLoc DL(N); + + // Cache arguments that will be moved to the end in the target node. + SDValue Chain = *It++; + Optional<SDValue> Glue; + if (It->getValueType() == MVT::Glue) + Glue = *It++; + SDValue RegMask = *It++; + + // <id> operand. + SDValue ID = *It++; + assert(ID.getValueType() == MVT::i64); + Ops.push_back(ID); + + // <numShadowBytes> operand. + SDValue Shad = *It++; + assert(Shad.getValueType() == MVT::i32); + Ops.push_back(Shad); + + // Add the callee. + Ops.push_back(*It++); + + // Add <numArgs>. + SDValue NumArgs = *It++; + assert(NumArgs.getValueType() == MVT::i32); + Ops.push_back(NumArgs); + + // Calling convention. + Ops.push_back(*It++); + + // Push the args for the call. + for (uint64_t I = cast<ConstantSDNode>(NumArgs)->getZExtValue(); I != 0; I--) + Ops.push_back(*It++); + + // Now push the live variables. + for (; It != N->op_end(); It++) + pushStackMapLiveVariable(Ops, *It, DL); + + // Finally, the regmask, chain and (if present) glue are moved to the end. + Ops.push_back(RegMask); + Ops.push_back(Chain); + if (Glue.has_value()) + Ops.push_back(Glue.value()); + + SDVTList NodeTys = N->getVTList(); + CurDAG->SelectNodeTo(N, TargetOpcode::PATCHPOINT, NodeTys, Ops); +} + /// GetVBR - decode a vbr encoding whose top bit is set. LLVM_ATTRIBUTE_ALWAYS_INLINE static uint64_t GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) { @@ -2796,6 +2850,9 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, case ISD::STACKMAP: Select_STACKMAP(NodeToMatch); return; + case ISD::PATCHPOINT: + Select_PATCHPOINT(NodeToMatch); + return; } assert(!NodeToMatch->isMachineOpcode() && "Node already selected!"); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp index 3061158eea30..c5c093ae228f 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -169,8 +169,14 @@ static Optional<int> findPreviousSpillSlot(const Value *Val, // Spill location is known for gc relocates if (const auto *Relocate = dyn_cast<GCRelocateInst>(Val)) { - const auto &RelocationMap = - Builder.FuncInfo.StatepointRelocationMaps[Relocate->getStatepoint()]; + const Value *Statepoint = Relocate->getStatepoint(); + assert((isa<GCStatepointInst>(Statepoint) || isa<UndefValue>(Statepoint)) && + "GetStatepoint must return one of two types"); + if (isa<UndefValue>(Statepoint)) + return None; + + const auto &RelocationMap = Builder.FuncInfo.StatepointRelocationMaps + [cast<GCStatepointInst>(Statepoint)]; auto It = RelocationMap.find(Relocate); if (It == RelocationMap.end()) @@ -193,7 +199,7 @@ static Optional<int> findPreviousSpillSlot(const Value *Val, if (const PHINode *Phi = dyn_cast<PHINode>(Val)) { Optional<int> MergedResult = None; - for (auto &IncomingValue : Phi->incoming_values()) { + for (const auto &IncomingValue : Phi->incoming_values()) { Optional<int> SpillSlot = findPreviousSpillSlot(IncomingValue, Builder, LookUpDepth - 1); if (!SpillSlot) @@ -569,9 +575,10 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, // We cannot assing them to VRegs. SmallSet<SDValue, 8> LPadPointers; if (!UseRegistersForGCPointersInLandingPad) - if (auto *StInvoke = dyn_cast_or_null<InvokeInst>(SI.StatepointInstr)) { + if (const auto *StInvoke = + dyn_cast_or_null<InvokeInst>(SI.StatepointInstr)) { LandingPadInst *LPI = StInvoke->getLandingPadInst(); - for (auto *Relocate : SI.GCRelocates) + for (const auto *Relocate : SI.GCRelocates) if (Relocate->getOperand(0) == LPI) { LPadPointers.insert(Builder.getValue(Relocate->getBasePtr())); LPadPointers.insert(Builder.getValue(Relocate->getDerivedPtr())); @@ -739,7 +746,7 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT( LLVM_DEBUG(dbgs() << "Lowering statepoint " << *SI.StatepointInstr << "\n"); #ifndef NDEBUG - for (auto *Reloc : SI.GCRelocates) + for (const auto *Reloc : SI.GCRelocates) if (Reloc->getParent() == SI.StatepointInstr->getParent()) StatepointLowering.scheduleRelocCall(*Reloc); #endif @@ -1017,7 +1024,7 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT( static std::pair<const GCResultInst*, const GCResultInst*> getGCResultLocality(const GCStatepointInst &S) { std::pair<const GCResultInst *, const GCResultInst*> Res(nullptr, nullptr); - for (auto *U : S.users()) { + for (const auto *U : S.users()) { auto *GRI = dyn_cast<GCResultInst>(U); if (!GRI) continue; @@ -1195,9 +1202,13 @@ void SelectionDAGBuilder::LowerCallSiteWithDeoptBundle( void SelectionDAGBuilder::visitGCResult(const GCResultInst &CI) { // The result value of the gc_result is simply the result of the actual // call. We've already emitted this, so just grab the value. - const GCStatepointInst *SI = CI.getStatepoint(); + const Value *SI = CI.getStatepoint(); + assert((isa<GCStatepointInst>(SI) || isa<UndefValue>(SI)) && + "GetStatepoint must return one of two types"); + if (isa<UndefValue>(SI)) + return; - if (SI->getParent() == CI.getParent()) { + if (cast<GCStatepointInst>(SI)->getParent() == CI.getParent()) { setValue(&CI, getValue(SI)); return; } @@ -1215,12 +1226,18 @@ void SelectionDAGBuilder::visitGCResult(const GCResultInst &CI) { } void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) { + const Value *Statepoint = Relocate.getStatepoint(); #ifndef NDEBUG // Consistency check // We skip this check for relocates not in the same basic block as their // statepoint. It would be too expensive to preserve validation info through // different basic blocks. - if (Relocate.getStatepoint()->getParent() == Relocate.getParent()) + assert((isa<GCStatepointInst>(Statepoint) || isa<UndefValue>(Statepoint)) && + "GetStatepoint must return one of two types"); + if (isa<UndefValue>(Statepoint)) + return; + + if (cast<GCStatepointInst>(Statepoint)->getParent() == Relocate.getParent()) StatepointLowering.relocCallVisited(Relocate); auto *Ty = Relocate.getType()->getScalarType(); @@ -1230,14 +1247,15 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) { const Value *DerivedPtr = Relocate.getDerivedPtr(); auto &RelocationMap = - FuncInfo.StatepointRelocationMaps[Relocate.getStatepoint()]; + FuncInfo.StatepointRelocationMaps[cast<GCStatepointInst>(Statepoint)]; auto SlotIt = RelocationMap.find(&Relocate); assert(SlotIt != RelocationMap.end() && "Relocating not lowered gc value"); const RecordType &Record = SlotIt->second; // If relocation was done via virtual register.. if (Record.type == RecordType::SDValueNode) { - assert(Relocate.getStatepoint()->getParent() == Relocate.getParent() && + assert(cast<GCStatepointInst>(Statepoint)->getParent() == + Relocate.getParent() && "Nonlocal gc.relocate mapped via SDValue"); SDValue SDV = StatepointLowering.getLocation(getValue(DerivedPtr)); assert(SDV.getNode() && "empty SDValue"); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 66389a57f780..cd4f0ae42bcd 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1056,13 +1056,13 @@ bool TargetLowering::SimplifyDemandedBits( // TODO: We can probably do more work on calculating the known bits and // simplifying the operations for scalable vectors, but for now we just // bail out. - if (Op.getValueType().isScalableVector()) + EVT VT = Op.getValueType(); + if (VT.isScalableVector()) return false; bool IsLE = TLO.DAG.getDataLayout().isLittleEndian(); unsigned NumElts = OriginalDemandedElts.getBitWidth(); - assert((!Op.getValueType().isVector() || - NumElts == Op.getValueType().getVectorNumElements()) && + assert((!VT.isVector() || NumElts == VT.getVectorNumElements()) && "Unexpected vector size"); APInt DemandedBits = OriginalDemandedBits; @@ -1088,7 +1088,6 @@ bool TargetLowering::SimplifyDemandedBits( } // Other users may use these bits. - EVT VT = Op.getValueType(); if (!Op.getNode()->hasOneUse() && !AssumeSingleUse) { if (Depth != 0) { // If not at the root, Just compute the Known bits to @@ -1468,6 +1467,33 @@ bool TargetLowering::SimplifyDemandedBits( } } + // (or (and X, C1), (and (or X, Y), C2)) -> (or (and X, C1|C2), (and Y, C2)) + // TODO: Use SimplifyMultipleUseDemandedBits to peek through masks. + if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::AND && + Op0->hasOneUse() && Op1->hasOneUse()) { + // Attempt to match all commutations - m_c_Or would've been useful! + for (int I = 0; I != 2; ++I) { + SDValue X = Op.getOperand(I).getOperand(0); + SDValue C1 = Op.getOperand(I).getOperand(1); + SDValue Alt = Op.getOperand(1 - I).getOperand(0); + SDValue C2 = Op.getOperand(1 - I).getOperand(1); + if (Alt.getOpcode() == ISD::OR) { + for (int J = 0; J != 2; ++J) { + if (X == Alt.getOperand(J)) { + SDValue Y = Alt.getOperand(1 - J); + if (SDValue C12 = TLO.DAG.FoldConstantArithmetic(ISD::OR, dl, VT, + {C1, C2})) { + SDValue MaskX = TLO.DAG.getNode(ISD::AND, dl, VT, X, C12); + SDValue MaskY = TLO.DAG.getNode(ISD::AND, dl, VT, Y, C2); + return TLO.CombineTo( + Op, TLO.DAG.getNode(ISD::OR, dl, VT, MaskX, MaskY)); + } + } + } + } + } + } + Known |= Known2; break; } @@ -1500,7 +1526,7 @@ bool TargetLowering::SimplifyDemandedBits( if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero)) return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1)); - ConstantSDNode* C = isConstOrConstSplat(Op1, DemandedElts); + ConstantSDNode *C = isConstOrConstSplat(Op1, DemandedElts); if (C) { // If one side is a constant, and all of the set bits in the constant are // also known set on the other side, turn this into an AND, as we know @@ -1521,6 +1547,32 @@ bool TargetLowering::SimplifyDemandedBits( SDValue New = TLO.DAG.getNOT(dl, Op0, VT); return TLO.CombineTo(Op, New); } + + unsigned Op0Opcode = Op0.getOpcode(); + if ((Op0Opcode == ISD::SRL || Op0Opcode == ISD::SHL) && Op0.hasOneUse()) { + if (ConstantSDNode *ShiftC = + isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) { + // Don't crash on an oversized shift. We can not guarantee that a + // bogus shift has been simplified to undef. + if (ShiftC->getAPIntValue().ult(BitWidth)) { + uint64_t ShiftAmt = ShiftC->getZExtValue(); + APInt Ones = APInt::getAllOnes(BitWidth); + Ones = Op0Opcode == ISD::SHL ? Ones.shl(ShiftAmt) + : Ones.lshr(ShiftAmt); + const TargetLowering &TLI = TLO.DAG.getTargetLoweringInfo(); + if ((DemandedBits & C->getAPIntValue()) == (DemandedBits & Ones) && + TLI.isDesirableToCommuteXorWithShift(Op.getNode())) { + // If the xor constant is a demanded mask, do a 'not' before the + // shift: + // xor (X << ShiftC), XorC --> (not X) << ShiftC + // xor (X >> ShiftC), XorC --> (not X) >> ShiftC + SDValue Not = TLO.DAG.getNOT(dl, Op0.getOperand(0), VT); + return TLO.CombineTo(Op, TLO.DAG.getNode(Op0Opcode, dl, VT, Not, + Op0.getOperand(1))); + } + } + } + } } // If we can't turn this into a 'not', try to shrink the constant. @@ -1723,6 +1775,26 @@ bool TargetLowering::SimplifyDemandedBits( if ((ShAmt < DemandedBits.getActiveBits()) && ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) return true; + } else { + // This is a variable shift, so we can't shift the demand mask by a known + // amount. But if we are not demanding high bits, then we are not + // demanding those bits from the pre-shifted operand either. + if (unsigned CTLZ = DemandedBits.countLeadingZeros()) { + APInt DemandedFromOp(APInt::getLowBitsSet(BitWidth, BitWidth - CTLZ)); + if (SimplifyDemandedBits(Op0, DemandedFromOp, DemandedElts, Known, TLO, + Depth + 1)) { + SDNodeFlags Flags = Op.getNode()->getFlags(); + if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) { + // Disable the nsw and nuw flags. We can no longer guarantee that we + // won't wrap after simplification. + Flags.setNoSignedWrap(false); + Flags.setNoUnsignedWrap(false); + Op->setFlags(Flags); + } + return true; + } + Known.resetAll(); + } } // If we are only demanding sign bits then we can use the shift source @@ -3292,6 +3364,12 @@ bool TargetLowering::SimplifyDemandedVectorElts( TLO, Depth + 1)) return true; + // If every element pair has a zero/undef then just fold to zero. + // fold (and x, undef) -> 0 / (and x, 0) -> 0 + // fold (mul x, undef) -> 0 / (mul x, 0) -> 0 + if (DemandedElts.isSubsetOf(SrcZero | KnownZero | SrcUndef | KnownUndef)) + return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT)); + // If either side has a zero element, then the result element is zero, even // if the other is an UNDEF. // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros @@ -3301,7 +3379,6 @@ bool TargetLowering::SimplifyDemandedVectorElts( KnownUndef &= ~KnownZero; // Attempt to avoid multi-use ops if we don't need anything from them. - // TODO - use KnownUndef to relax the demandedelts? if (!DemandedElts.isAllOnes()) if (SimplifyDemandedVectorEltsBinOp(Op0, Op1)) return true; @@ -5204,6 +5281,7 @@ TargetLowering::ParseConstraints(const DataLayout &DL, // ConstraintOperands list. unsigned ArgNo = 0; // ArgNo - The argument of the CallInst. unsigned ResNo = 0; // ResNo - The result number of the next output. + unsigned LabelNo = 0; // LabelNo - CallBr indirect dest number. for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) { ConstraintOperands.emplace_back(std::move(CI)); @@ -5240,6 +5318,14 @@ TargetLowering::ParseConstraints(const DataLayout &DL, case InlineAsm::isInput: OpInfo.CallOperandVal = Call.getArgOperand(ArgNo); break; + case InlineAsm::isLabel: + OpInfo.CallOperandVal = + cast<CallBrInst>(&Call)->getBlockAddressForIndirectDest(LabelNo); + OpInfo.ConstraintVT = + getAsmOperandValueType(DL, OpInfo.CallOperandVal->getType()) + .getSimpleVT(); + ++LabelNo; + continue; case InlineAsm::isClobber: // Nothing to do. break; @@ -5852,22 +5938,22 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, // FIXME: We should use a narrower constant when the upper // bits are known to be zero. const APInt& Divisor = C->getAPIntValue(); - UnsignedDivisonByConstantInfo magics = UnsignedDivisonByConstantInfo::get(Divisor); + UnsignedDivisionByConstantInfo magics = + UnsignedDivisionByConstantInfo::get(Divisor); unsigned PreShift = 0, PostShift = 0; // If the divisor is even, we can avoid using the expensive fixup by // shifting the divided value upfront. - if (magics.IsAdd != 0 && !Divisor[0]) { + if (magics.IsAdd && !Divisor[0]) { PreShift = Divisor.countTrailingZeros(); // Get magic number for the shifted divisor. - magics = UnsignedDivisonByConstantInfo::get(Divisor.lshr(PreShift), PreShift); - assert(magics.IsAdd == 0 && "Should use cheap fixup now"); + magics = + UnsignedDivisionByConstantInfo::get(Divisor.lshr(PreShift), PreShift); + assert(!magics.IsAdd && "Should use cheap fixup now"); } - APInt Magic = magics.Magic; - unsigned SelNPQ; - if (magics.IsAdd == 0 || Divisor.isOne()) { + if (!magics.IsAdd || Divisor.isOne()) { assert(magics.ShiftAmount < Divisor.getBitWidth() && "We shouldn't generate an undefined shift!"); PostShift = magics.ShiftAmount; @@ -5878,7 +5964,7 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, } PreShifts.push_back(DAG.getConstant(PreShift, dl, ShSVT)); - MagicFactors.push_back(DAG.getConstant(Magic, dl, SVT)); + MagicFactors.push_back(DAG.getConstant(magics.Magic, dl, SVT)); NPQFactors.push_back( DAG.getConstant(SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1) : APInt::getZero(EltBits), diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SlotIndexes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SlotIndexes.cpp index ffac68a223bf..ee3a0164564e 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SlotIndexes.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SlotIndexes.cpp @@ -179,21 +179,12 @@ void SlotIndexes::renumberIndexes(IndexList::iterator curItr) { void SlotIndexes::repairIndexesInRange(MachineBasicBlock *MBB, MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End) { - // FIXME: Is this really necessary? The only caller repairIntervalsForRange() - // does the same thing. - // Find anchor points, which are at the beginning/end of blocks or at - // instructions that already have indexes. - while (Begin != MBB->begin() && !hasIndex(*Begin)) - --Begin; - while (End != MBB->end() && !hasIndex(*End)) - ++End; - bool includeStart = (Begin == MBB->begin()); SlotIndex startIdx; if (includeStart) startIdx = getMBBStartIdx(MBB); else - startIdx = getInstructionIndex(*Begin); + startIdx = getInstructionIndex(*--Begin); SlotIndex endIdx; if (End == MBB->end()) diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.cpp index 140a91ae342b..94149f56e703 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.cpp @@ -347,13 +347,11 @@ void SplitAnalysis::analyze(const LiveInterval *li) { //===----------------------------------------------------------------------===// /// Create a new SplitEditor for editing the LiveInterval analyzed by SA. -SplitEditor::SplitEditor(SplitAnalysis &SA, AliasAnalysis &AA, - LiveIntervals &LIS, VirtRegMap &VRM, +SplitEditor::SplitEditor(SplitAnalysis &SA, LiveIntervals &LIS, VirtRegMap &VRM, MachineDominatorTree &MDT, MachineBlockFrequencyInfo &MBFI, VirtRegAuxInfo &VRAI) - : SA(SA), AA(AA), LIS(LIS), VRM(VRM), - MRI(VRM.getMachineFunction().getRegInfo()), MDT(MDT), - TII(*VRM.getMachineFunction().getSubtarget().getInstrInfo()), + : SA(SA), LIS(LIS), VRM(VRM), MRI(VRM.getMachineFunction().getRegInfo()), + MDT(MDT), TII(*VRM.getMachineFunction().getSubtarget().getInstrInfo()), TRI(*VRM.getMachineFunction().getSubtarget().getRegisterInfo()), MBFI(MBFI), VRAI(VRAI), RegAssign(Allocator) {} @@ -371,9 +369,7 @@ void SplitEditor::reset(LiveRangeEdit &LRE, ComplementSpillMode SM) { LICalc[1].reset(&VRM.getMachineFunction(), LIS.getSlotIndexes(), &MDT, &LIS.getVNInfoAllocator()); - // We don't need an AliasAnalysis since we will only be performing - // cheap-as-a-copy remats anyway. - Edit->anyRematerializable(nullptr); + Edit->anyRematerializable(); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -1454,7 +1450,7 @@ void SplitEditor::deleteRematVictims() { if (Dead.empty()) return; - Edit->eliminateDeadDefs(Dead, None, &AA); + Edit->eliminateDeadDefs(Dead, None); } void SplitEditor::forceRecomputeVNI(const VNInfo &ParentVNI) { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.h b/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.h index 4400a797d38e..556b022b93fb 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.h @@ -257,7 +257,6 @@ public: /// class LLVM_LIBRARY_VISIBILITY SplitEditor { SplitAnalysis &SA; - AAResults &AA; LiveIntervals &LIS; VirtRegMap &VRM; MachineRegisterInfo &MRI; @@ -436,9 +435,9 @@ private: public: /// Create a new SplitEditor for editing the LiveInterval analyzed by SA. /// Newly created intervals will be appended to newIntervals. - SplitEditor(SplitAnalysis &SA, AAResults &AA, LiveIntervals &LIS, - VirtRegMap &VRM, MachineDominatorTree &MDT, - MachineBlockFrequencyInfo &MBFI, VirtRegAuxInfo &VRAI); + SplitEditor(SplitAnalysis &SA, LiveIntervals &LIS, VirtRegMap &VRM, + MachineDominatorTree &MDT, MachineBlockFrequencyInfo &MBFI, + VirtRegAuxInfo &VRAI); /// reset - Prepare for a new split. void reset(LiveRangeEdit&, ComplementSpillMode = SM_Partition); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/StackMaps.cpp b/contrib/llvm-project/llvm/lib/CodeGen/StackMaps.cpp index 6757d6ca4f88..ccaff862fa3f 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/StackMaps.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/StackMaps.cpp @@ -365,7 +365,7 @@ StackMaps::parseRegisterLiveOutMask(const uint32_t *Mask) const { }); for (auto I = LiveOuts.begin(), E = LiveOuts.end(); I != E; ++I) { - for (auto II = std::next(I); II != E; ++II) { + for (auto *II = std::next(I); II != E; ++II) { if (I->DwarfRegNum != II->DwarfRegNum) { // Skip all the now invalid entries. I = --II; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SwiftErrorValueTracking.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SwiftErrorValueTracking.cpp index 4408011c95c0..2282d53e8ffd 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SwiftErrorValueTracking.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SwiftErrorValueTracking.cpp @@ -267,7 +267,7 @@ void SwiftErrorValueTracking::preassignVRegs( if (auto *CB = dyn_cast<CallBase>(&*It)) { // A call-site with a swifterror argument is both use and def. const Value *SwiftErrorAddr = nullptr; - for (auto &Arg : CB->args()) { + for (const auto &Arg : CB->args()) { if (!Arg->isSwiftError()) continue; // Use of swifterror. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp index ba533a491b9c..18507b8fa84f 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp @@ -653,7 +653,7 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple, // demonstrated by test/CodeGen/Hexagon/tail-dup-subreg-abort.ll. // Disable tail duplication for this case for now, until the problem is // fixed. - for (auto SB : TailBB.successors()) { + for (auto *SB : TailBB.successors()) { for (auto &I : *SB) { if (!I.isPHI()) break; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp index 2a987ee3eedf..4116231c005f 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp @@ -916,7 +916,7 @@ void TargetInstrInfo::genAlternativeCodeSequence( } bool TargetInstrInfo::isReallyTriviallyReMaterializableGeneric( - const MachineInstr &MI, AAResults *AA) const { + const MachineInstr &MI) const { const MachineFunction &MF = *MI.getMF(); const MachineRegisterInfo &MRI = MF.getRegInfo(); @@ -952,7 +952,7 @@ bool TargetInstrInfo::isReallyTriviallyReMaterializableGeneric( return false; // Avoid instructions which load from potentially varying memory. - if (MI.mayLoad() && !MI.isDereferenceableInvariantLoad(AA)) + if (MI.mayLoad() && !MI.isDereferenceableInvariantLoad()) return false; // If any of the registers accessed are non-constant, conservatively assume diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp index f7f4a4e3db6a..9b965109745c 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -1145,7 +1145,7 @@ static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT, /// specified register class are all legal. bool TargetLoweringBase::isLegalRC(const TargetRegisterInfo &TRI, const TargetRegisterClass &RC) const { - for (auto I = TRI.legalclasstypes_begin(RC); *I != MVT::Other; ++I) + for (const auto *I = TRI.legalclasstypes_begin(RC); *I != MVT::Other; ++I) if (isTypeLegal(*I)) return true; return false; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp index c44fd9f97383..17fe819fa900 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -1450,9 +1450,9 @@ void TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, TiedPairList &TiedPairs, unsigned &Dist) { - bool IsEarlyClobber = llvm::find_if(TiedPairs, [MI](auto const &TP) { - return MI->getOperand(TP.second).isEarlyClobber(); - }) != TiedPairs.end(); + bool IsEarlyClobber = llvm::any_of(TiedPairs, [MI](auto const &TP) { + return MI->getOperand(TP.second).isEarlyClobber(); + }); bool RemovedKillFlag = false; bool AllUsesCopied = true; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp index 166a3c413f6a..8dc8d381ad16 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp @@ -446,7 +446,7 @@ void IRPromoter::ExtendSources() { // Now, insert extending instructions between the sources and their users. LLVM_DEBUG(dbgs() << "IR Promotion: Promoting sources:\n"); - for (auto V : Sources) { + for (auto *V : Sources) { LLVM_DEBUG(dbgs() << " - " << *V << "\n"); if (auto *I = dyn_cast<Instruction>(V)) InsertZExt(I, I); @@ -524,7 +524,7 @@ void IRPromoter::TruncateSinks() { // Fix up any stores or returns that use the results of the promoted // chain. - for (auto I : Sinks) { + for (auto *I : Sinks) { LLVM_DEBUG(dbgs() << "IR Promotion: For Sink: " << *I << "\n"); // Handle calls separately as we need to iterate over arg operands. @@ -570,7 +570,7 @@ void IRPromoter::Cleanup() { LLVM_DEBUG(dbgs() << "IR Promotion: Cleanup..\n"); // Some zexts will now have become redundant, along with their trunc // operands, so remove them - for (auto V : Visited) { + for (auto *V : Visited) { if (!isa<ZExtInst>(V)) continue; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/VLIWMachineScheduler.cpp b/contrib/llvm-project/llvm/lib/CodeGen/VLIWMachineScheduler.cpp index 8b5b585090f5..8225d4ea6996 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/VLIWMachineScheduler.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/VLIWMachineScheduler.cpp @@ -579,7 +579,7 @@ static inline bool isSingleUnscheduledSucc(SUnit *SU, SUnit *SU2) { /// pressure, then return 0. int ConvergingVLIWScheduler::pressureChange(const SUnit *SU, bool isBotUp) { PressureDiff &PD = DAG->getPressureDiff(SU); - for (auto &P : PD) { + for (const auto &P : PD) { if (!P.isValid()) continue; // The pressure differences are computed bottom-up, so the comparision for diff --git a/contrib/llvm-project/llvm/lib/DWARFLinker/DWARFLinker.cpp b/contrib/llvm-project/llvm/lib/DWARFLinker/DWARFLinker.cpp index 298359dea9af..62b7f629f403 100644 --- a/contrib/llvm-project/llvm/lib/DWARFLinker/DWARFLinker.cpp +++ b/contrib/llvm-project/llvm/lib/DWARFLinker/DWARFLinker.cpp @@ -504,9 +504,14 @@ unsigned DWARFLinker::shouldKeepSubprogramDIE( &DIE); return Flags; } + if (*LowPc > *HighPc) { + reportWarning("low_pc greater than high_pc. Range will be discarded.\n", + File, &DIE); + return Flags; + } // Replace the debug map range with a more accurate one. - Ranges[*LowPc] = ObjFileAddressRange(*HighPc, MyInfo.AddrAdjust); + Ranges.insert({*LowPc, *HighPc}, MyInfo.AddrAdjust); Unit.addFunctionRange(*LowPc, *HighPc, MyInfo.AddrAdjust); return Flags; } @@ -1575,7 +1580,7 @@ void DWARFLinker::patchRangesForUnit(const CompileUnit &Unit, DWARFDataExtractor RangeExtractor(OrigDwarf.getDWARFObj(), OrigDwarf.getDWARFObj().getRangesSection(), OrigDwarf.isLittleEndian(), AddressSize); - auto InvalidRange = FunctionRanges.end(), CurrRange = InvalidRange; + Optional<std::pair<AddressRange, int64_t>> CurrRange; DWARFUnit &OrigUnit = Unit.getOrigUnit(); auto OrigUnitDie = OrigUnit.getUnitDIE(false); uint64_t OrigLowPc = @@ -1598,12 +1603,11 @@ void DWARFLinker::patchRangesForUnit(const CompileUnit &Unit, if (!Entries.empty()) { const DWARFDebugRangeList::RangeListEntry &First = Entries.front(); - if (CurrRange == InvalidRange || - First.StartAddress + OrigLowPc < CurrRange.start() || - First.StartAddress + OrigLowPc >= CurrRange.stop()) { - CurrRange = FunctionRanges.find(First.StartAddress + OrigLowPc); - if (CurrRange == InvalidRange || - CurrRange.start() > First.StartAddress + OrigLowPc) { + if (!CurrRange || + !CurrRange->first.contains(First.StartAddress + OrigLowPc)) { + CurrRange = FunctionRanges.getRangeValueThatContains( + First.StartAddress + OrigLowPc); + if (!CurrRange) { reportWarning("no mapping for range.", File); continue; } @@ -1710,7 +1714,7 @@ void DWARFLinker::patchLineTableForUnit(CompileUnit &Unit, // in NewRows. std::vector<DWARFDebugLine::Row> Seq; const auto &FunctionRanges = Unit.getFunctionRanges(); - auto InvalidRange = FunctionRanges.end(), CurrRange = InvalidRange; + Optional<std::pair<AddressRange, int64_t>> CurrRange; // FIXME: This logic is meant to generate exactly the same output as // Darwin's classic dsymutil. There is a nicer way to implement this @@ -1729,19 +1733,14 @@ void DWARFLinker::patchLineTableForUnit(CompileUnit &Unit, // it is marked as end_sequence in the input (because in that // case, the relocation offset is accurate and that entry won't // serve as the start of another function). - if (CurrRange == InvalidRange || Row.Address.Address < CurrRange.start() || - Row.Address.Address > CurrRange.stop() || - (Row.Address.Address == CurrRange.stop() && !Row.EndSequence)) { + if (!CurrRange || !CurrRange->first.contains(Row.Address.Address) || + (Row.Address.Address == CurrRange->first.end() && !Row.EndSequence)) { // We just stepped out of a known range. Insert a end_sequence // corresponding to the end of the range. - uint64_t StopAddress = CurrRange != InvalidRange - ? CurrRange.stop() + CurrRange.value() - : -1ULL; - CurrRange = FunctionRanges.find(Row.Address.Address); - bool CurrRangeValid = - CurrRange != InvalidRange && CurrRange.start() <= Row.Address.Address; - if (!CurrRangeValid) { - CurrRange = InvalidRange; + uint64_t StopAddress = + CurrRange ? CurrRange->first.end() + CurrRange->second : -1ULL; + CurrRange = FunctionRanges.getRangeValueThatContains(Row.Address.Address); + if (!CurrRange) { if (StopAddress != -1ULL) { // Try harder by looking in the Address ranges map. // There are corner cases where this finds a @@ -1749,14 +1748,9 @@ void DWARFLinker::patchLineTableForUnit(CompileUnit &Unit, // for now do as dsymutil. // FIXME: Understand exactly what cases this addresses and // potentially remove it along with the Ranges map. - auto Range = Ranges.lower_bound(Row.Address.Address); - if (Range != Ranges.begin() && Range != Ranges.end()) - --Range; - - if (Range != Ranges.end() && Range->first <= Row.Address.Address && - Range->second.HighPC >= Row.Address.Address) { - StopAddress = Row.Address.Address + Range->second.Offset; - } + if (Optional<std::pair<AddressRange, int64_t>> Range = + Ranges.getRangeValueThatContains(Row.Address.Address)) + StopAddress = Row.Address.Address + (*Range).second; } } if (StopAddress != -1ULL && !Seq.empty()) { @@ -1772,7 +1766,7 @@ void DWARFLinker::patchLineTableForUnit(CompileUnit &Unit, insertLineSequence(Seq, NewRows); } - if (!CurrRangeValid) + if (!CurrRange) continue; } @@ -1781,7 +1775,7 @@ void DWARFLinker::patchLineTableForUnit(CompileUnit &Unit, continue; // Relocate row address and add it to the current sequence. - Row.Address.Address += CurrRange.value(); + Row.Address.Address += CurrRange->second; Seq.emplace_back(Row); if (Row.EndSequence) @@ -1921,11 +1915,9 @@ void DWARFLinker::patchFrameInfoForObject(const DWARFFile &File, // the function entry point, thus we can't just lookup the address // in the debug map. Use the AddressInfo's range map to see if the FDE // describes something that we can relocate. - auto Range = Ranges.upper_bound(Loc); - if (Range != Ranges.begin()) - --Range; - if (Range == Ranges.end() || Range->first > Loc || - Range->second.HighPC <= Loc) { + Optional<std::pair<AddressRange, int64_t>> Range = + Ranges.getRangeValueThatContains(Loc); + if (!Range) { // The +4 is to account for the size of the InitialLength field itself. InputOffset = EntryOffset + InitialLength + 4; continue; @@ -1953,7 +1945,7 @@ void DWARFLinker::patchFrameInfoForObject(const DWARFFile &File, // fields that will get reconstructed by emitFDE(). unsigned FDERemainingBytes = InitialLength - (4 + AddrSize); TheDwarfEmitter->emitFDE(IteratorInserted.first->getValue(), AddrSize, - Loc + Range->second.Offset, + Loc + Range->second, FrameData.substr(InputOffset, FDERemainingBytes)); InputOffset += FDERemainingBytes; } diff --git a/contrib/llvm-project/llvm/lib/DWARFLinker/DWARFLinkerCompileUnit.cpp b/contrib/llvm-project/llvm/lib/DWARFLinker/DWARFLinkerCompileUnit.cpp index e9e8be7fd008..1cb20c0bb948 100644 --- a/contrib/llvm-project/llvm/lib/DWARFLinker/DWARFLinkerCompileUnit.cpp +++ b/contrib/llvm-project/llvm/lib/DWARFLinker/DWARFLinkerCompileUnit.cpp @@ -105,11 +105,7 @@ void CompileUnit::addLabelLowPc(uint64_t LabelLowPc, int64_t PcOffset) { void CompileUnit::addFunctionRange(uint64_t FuncLowPc, uint64_t FuncHighPc, int64_t PcOffset) { - // Don't add empty ranges to the interval map. They are a problem because - // the interval map expects half open intervals. This is safe because they - // are empty anyway. - if (FuncHighPc != FuncLowPc) - Ranges.insert(FuncLowPc, FuncHighPc, PcOffset); + Ranges.insert({FuncLowPc, FuncHighPc}, PcOffset); this->LowPc = std::min(LowPc, FuncLowPc + PcOffset); this->HighPc = std::max(HighPc, FuncHighPc + PcOffset); } diff --git a/contrib/llvm-project/llvm/lib/DWARFLinker/DWARFStreamer.cpp b/contrib/llvm-project/llvm/lib/DWARFLinker/DWARFStreamer.cpp index 55ff6b14f945..a00e51fcf135 100644 --- a/contrib/llvm-project/llvm/lib/DWARFLinker/DWARFStreamer.cpp +++ b/contrib/llvm-project/llvm/lib/DWARFLinker/DWARFStreamer.cpp @@ -321,13 +321,14 @@ void DwarfStreamer::emitSwiftReflectionSection( /// sized addresses describing the ranges. void DwarfStreamer::emitRangesEntries( int64_t UnitPcOffset, uint64_t OrigLowPc, - const FunctionIntervals::const_iterator &FuncRange, + Optional<std::pair<AddressRange, int64_t>> FuncRange, const std::vector<DWARFDebugRangeList::RangeListEntry> &Entries, unsigned AddressSize) { MS->switchSection(MC->getObjectFileInfo()->getDwarfRangesSection()); // Offset each range by the right amount. - int64_t PcOffset = Entries.empty() ? 0 : FuncRange.value() + UnitPcOffset; + int64_t PcOffset = + (Entries.empty() || !FuncRange) ? 0 : FuncRange->second + UnitPcOffset; for (const auto &Range : Entries) { if (Range.isBaseAddressSelectionEntry(AddressSize)) { warn("unsupported base address selection operation", @@ -339,8 +340,7 @@ void DwarfStreamer::emitRangesEntries( continue; // All range entries should lie in the function range. - if (!(Range.StartAddress + OrigLowPc >= FuncRange.start() && - Range.EndAddress + OrigLowPc <= FuncRange.stop())) + if (!FuncRange->first.contains(Range.StartAddress + OrigLowPc)) warn("inconsistent range data.", "emitting debug_ranges"); MS->emitIntValue(Range.StartAddress + PcOffset, AddressSize); MS->emitIntValue(Range.EndAddress + PcOffset, AddressSize); @@ -365,11 +365,13 @@ void DwarfStreamer::emitUnitRangesEntries(CompileUnit &Unit, // IntervalMap will have coalesced the non-linked ranges, but here // we want to coalesce the linked addresses. std::vector<std::pair<uint64_t, uint64_t>> Ranges; - const auto &FunctionRanges = Unit.getFunctionRanges(); - for (auto Range = FunctionRanges.begin(), End = FunctionRanges.end(); - Range != End; ++Range) - Ranges.push_back(std::make_pair(Range.start() + Range.value(), - Range.stop() + Range.value())); + const RangesTy &FunctionRanges = Unit.getFunctionRanges(); + for (size_t Idx = 0; Idx < FunctionRanges.size(); Idx++) { + std::pair<AddressRange, int64_t> CurRange = FunctionRanges[Idx]; + + Ranges.push_back(std::make_pair(CurRange.first.start() + CurRange.second, + CurRange.first.end() + CurRange.second)); + } // The object addresses where sorted, but again, the linked // addresses might end up in a different order. diff --git a/contrib/llvm-project/llvm/lib/DWP/DWP.cpp b/contrib/llvm-project/llvm/lib/DWP/DWP.cpp index 34615a73e328..44e39c019e0c 100644 --- a/contrib/llvm-project/llvm/lib/DWP/DWP.cpp +++ b/contrib/llvm-project/llvm/lib/DWP/DWP.cpp @@ -16,6 +16,7 @@ #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCTargetOptionsCommandFlags.h" #include "llvm/Object/Decompressor.h" +#include "llvm/Object/ELFObjectFile.h" #include "llvm/Support/MemoryBuffer.h" using namespace llvm; @@ -273,12 +274,16 @@ static Error createError(StringRef Name, Error E) { static Error handleCompressedSection(std::deque<SmallString<32>> &UncompressedSections, - StringRef &Name, StringRef &Contents) { - if (!Decompressor::isGnuStyle(Name)) + SectionRef Sec, StringRef Name, StringRef &Contents) { + auto *Obj = dyn_cast<ELFObjectFileBase>(Sec.getObject()); + if (!Obj || + !(static_cast<ELFSectionRef>(Sec).getFlags() & ELF::SHF_COMPRESSED)) return Error::success(); - - Expected<Decompressor> Dec = - Decompressor::create(Name, Contents, false /*IsLE*/, false /*Is64Bit*/); + bool IsLE = isa<object::ELF32LEObjectFile>(Obj) || + isa<object::ELF64LEObjectFile>(Obj); + bool Is64 = isa<object::ELF64LEObjectFile>(Obj) || + isa<object::ELF64BEObjectFile>(Obj); + Expected<Decompressor> Dec = Decompressor::create(Name, Contents, IsLE, Is64); if (!Dec) return createError(Name, Dec.takeError()); @@ -286,7 +291,6 @@ handleCompressedSection(std::deque<SmallString<32>> &UncompressedSections, if (Error E = Dec->resizeAndDecompress(UncompressedSections.back())) return createError(Name, std::move(E)); - Name = Name.substr(2); // Drop ".z" Contents = UncompressedSections.back(); return Error::success(); } @@ -494,7 +498,8 @@ Error handleSection( return ContentsOrErr.takeError(); StringRef Contents = *ContentsOrErr; - if (auto Err = handleCompressedSection(UncompressedSections, Name, Contents)) + if (auto Err = handleCompressedSection(UncompressedSections, Section, Name, + Contents)) return Err; Name = Name.substr(Name.find_first_not_of("._")); diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/TypeRecordMapping.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/TypeRecordMapping.cpp index 27f63b9edcd0..7f4511258c64 100644 --- a/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/TypeRecordMapping.cpp +++ b/contrib/llvm-project/llvm/lib/DebugInfo/CodeView/TypeRecordMapping.cpp @@ -36,8 +36,10 @@ using namespace llvm::codeview; namespace { #define error(X) \ - if (auto EC = X) \ - return EC; + do { \ + if (auto EC = X) \ + return EC; \ + } while (false) static const EnumEntry<TypeLeafKind> LeafTypeNames[] = { #define CV_TYPE(enum, val) {#enum, enum}, diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp index 2e567d8bc7ee..19d7d659a86a 100644 --- a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp +++ b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp @@ -1645,7 +1645,7 @@ class DWARFObjInMemory final : public DWARFObject { /// provided by Data. Otherwise leaves it unchanged. Error maybeDecompress(const object::SectionRef &Sec, StringRef Name, StringRef &Data) { - if (!Decompressor::isCompressed(Sec)) + if (!Sec.isCompressed()) return Error::success(); Expected<Decompressor> Decompressor = diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp index 33856c12b3c9..d2ed4fe018b5 100644 --- a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp +++ b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp @@ -29,10 +29,6 @@ using namespace llvm; using namespace dwarf; -namespace llvm { -class DwarfContext; -} - using FileLineInfoKind = DILineInfoSpecifier::FileLineInfoKind; namespace { diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/Symbolize/Markup.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/Symbolize/Markup.cpp index 9bc65e763287..aa8a89812227 100644 --- a/contrib/llvm-project/llvm/lib/DebugInfo/Symbolize/Markup.cpp +++ b/contrib/llvm-project/llvm/lib/DebugInfo/Symbolize/Markup.cpp @@ -100,6 +100,9 @@ Optional<MarkupNode> MarkupParser::nextNode() { } void MarkupParser::flush() { + Buffer.clear(); + NextIdx = 0; + Line = {}; if (InProgressMultiline.empty()) return; FinishedMultiline.swap(InProgressMultiline); diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp index 3363fe5e531f..91a51485026e 100644 --- a/contrib/llvm-project/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp +++ b/contrib/llvm-project/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp @@ -10,14 +10,22 @@ /// This file defines the implementation of a filter that replaces symbolizer /// markup with human-readable expressions. /// +/// See https://llvm.org/docs/SymbolizerMarkupFormat.html +/// //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/Symbolize/MarkupFilter.h" #include "llvm/ADT/None.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/DebugInfo/Symbolize/Markup.h" +#include "llvm/Debuginfod/Debuginfod.h" #include "llvm/Demangle/Demangle.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/FormatVariadic.h" #include "llvm/Support/WithColor.h" #include "llvm/Support/raw_ostream.h" @@ -28,30 +36,195 @@ MarkupFilter::MarkupFilter(raw_ostream &OS, Optional<bool> ColorsEnabled) : OS(OS), ColorsEnabled(ColorsEnabled.value_or( WithColor::defaultAutoDetectFunction()(OS))) {} -void MarkupFilter::beginLine(StringRef Line) { +void MarkupFilter::filter(StringRef Line) { this->Line = Line; resetColor(); + + Parser.parseLine(Line); + SmallVector<MarkupNode> DeferredNodes; + // See if the line is a contextual (i.e. contains a contextual element). + // In this case, anything after the contextual element is elided, or the whole + // line may be elided. + while (Optional<MarkupNode> Node = Parser.nextNode()) { + // If this was a contextual line, then summarily stop processing. + if (tryContextualElement(*Node, DeferredNodes)) + return; + // This node may yet be part of an elided contextual line. + DeferredNodes.push_back(*Node); + } + + // This was not a contextual line, so nothing in it should be elided. + endAnyModuleInfoLine(); + for (const MarkupNode &Node : DeferredNodes) + filterNode(Node); } -void MarkupFilter::filter(const MarkupNode &Node) { - if (!checkTag(Node)) - return; +void MarkupFilter::finish() { + Parser.flush(); + while (Optional<MarkupNode> Node = Parser.nextNode()) + filterNode(*Node); + endAnyModuleInfoLine(); + resetColor(); + Modules.clear(); + MMaps.clear(); +} - if (trySGR(Node)) - return; +// See if the given node is a contextual element and handle it if so. This may +// either output or defer the element; in the former case, it will first emit +// any DeferredNodes. +// +// Returns true if the given element was a contextual element. In this case, +// DeferredNodes should be considered handled and should not be emitted. The +// rest of the containing line must also be ignored in case the element was +// deferred to a following line. +bool MarkupFilter::tryContextualElement( + const MarkupNode &Node, const SmallVector<MarkupNode> &DeferredNodes) { + if (tryMMap(Node, DeferredNodes)) + return true; + if (tryReset(Node, DeferredNodes)) + return true; + return tryModule(Node, DeferredNodes); +} - if (Node.Tag == "symbol") { - if (!checkNumFields(Node, 1)) - return; +bool MarkupFilter::tryMMap(const MarkupNode &Node, + const SmallVector<MarkupNode> &DeferredNodes) { + if (Node.Tag != "mmap") + return false; + Optional<MMap> ParsedMMap = parseMMap(Node); + if (!ParsedMMap) + return true; + + if (const MMap *M = overlappingMMap(*ParsedMMap)) { + WithColor::error(errs()) + << formatv("overlapping mmap: #{0:x} [{1:x},{2:x})\n", M->Mod->ID, + M->Addr, M->Addr + M->Size); + reportLocation(Node.Fields[0].begin()); + return true; + } + + auto Res = MMaps.emplace(ParsedMMap->Addr, std::move(*ParsedMMap)); + assert(Res.second && "Overlap check should ensure emplace succeeds."); + MMap &MMap = Res.first->second; + + if (!MIL || MIL->Mod != MMap.Mod) { + endAnyModuleInfoLine(); + for (const MarkupNode &Node : DeferredNodes) + filterNode(Node); + beginModuleInfoLine(MMap.Mod); + OS << "; adds"; + } + MIL->MMaps.push_back(&MMap); + return true; +} + +bool MarkupFilter::tryReset(const MarkupNode &Node, + const SmallVector<MarkupNode> &DeferredNodes) { + if (Node.Tag != "reset") + return false; + if (!checkNumFields(Node, 0)) + return true; + + if (!Modules.empty() || !MMaps.empty()) { + endAnyModuleInfoLine(); + for (const MarkupNode &Node : DeferredNodes) + filterNode(Node); highlight(); - OS << llvm::demangle(Node.Fields.front().str()); + OS << "[[[reset]]]" << lineEnding(); restoreColor(); + + Modules.clear(); + MMaps.clear(); + } + return true; +} + +bool MarkupFilter::tryModule(const MarkupNode &Node, + const SmallVector<MarkupNode> &DeferredNodes) { + if (Node.Tag != "module") + return false; + Optional<Module> ParsedModule = parseModule(Node); + if (!ParsedModule) + return true; + + auto Res = Modules.try_emplace( + ParsedModule->ID, std::make_unique<Module>(std::move(*ParsedModule))); + if (!Res.second) { + WithColor::error(errs()) << "duplicate module ID\n"; + reportLocation(Node.Fields[0].begin()); + return true; + } + Module &Module = *Res.first->second; + + endAnyModuleInfoLine(); + for (const MarkupNode &Node : DeferredNodes) + filterNode(Node); + beginModuleInfoLine(&Module); + OS << "; BuildID="; + highlightValue(); + OS << toHex(Module.BuildID, /*LowerCase=*/true); + highlight(); + return true; +} + +void MarkupFilter::beginModuleInfoLine(const Module *M) { + highlight(); + OS << "[[[ELF module"; + highlightValue(); + OS << formatv(" #{0:x} \"{1}\"", M->ID, M->Name); + highlight(); + MIL = ModuleInfoLine{M}; +} + +void MarkupFilter::endAnyModuleInfoLine() { + if (!MIL) return; + llvm::stable_sort(MIL->MMaps, [](const MMap *A, const MMap *B) { + return A->Addr < B->Addr; + }); + for (const MMap *M : MIL->MMaps) { + OS << (M == MIL->MMaps.front() ? ' ' : '-'); + highlightValue(); + OS << formatv("{0:x}", M->Addr); + highlight(); + OS << '('; + highlightValue(); + OS << M->Mode; + highlight(); + OS << ')'; } + OS << "]]]" << lineEnding(); + restoreColor(); + MIL.reset(); +} + +// Handle a node that is known not to be a contextual element. +void MarkupFilter::filterNode(const MarkupNode &Node) { + if (!checkTag(Node)) + return; + if (tryPresentation(Node)) + return; + if (trySGR(Node)) + return; OS << Node.Text; } +bool MarkupFilter::tryPresentation(const MarkupNode &Node) { + return trySymbol(Node); +} + +bool MarkupFilter::trySymbol(const MarkupNode &Node) { + if (Node.Tag != "symbol") + return false; + if (!checkNumFields(Node, 1)) + return true; + + highlight(); + OS << llvm::demangle(Node.Fields.front().str()); + restoreColor(); + return true; +} + bool MarkupFilter::trySGR(const MarkupNode &Node) { if (Node.Text == "\033[0m") { resetColor(); @@ -93,6 +266,13 @@ void MarkupFilter::highlight() { Bold); } +// Begin highlighting a field within a highlighted markup string. +void MarkupFilter::highlightValue() { + if (!ColorsEnabled) + return; + OS.changeColor(raw_ostream::Colors::GREEN, Bold); +} + // Set the output stream's color to the current color and bold state of the SGR // abstract machine. void MarkupFilter::restoreColor() { @@ -117,6 +297,139 @@ void MarkupFilter::resetColor() { OS.resetColor(); } +// This macro helps reduce the amount of indirection done through Optional +// below, since the usual case upon returning a None Optional is to return None. +#define ASSIGN_OR_RETURN_NONE(TYPE, NAME, EXPR) \ + auto NAME##Opt = (EXPR); \ + if (!NAME##Opt) \ + return None; \ + TYPE NAME = std::move(*NAME##Opt) + +Optional<MarkupFilter::Module> +MarkupFilter::parseModule(const MarkupNode &Element) const { + if (!checkNumFieldsAtLeast(Element, 3)) + return None; + ASSIGN_OR_RETURN_NONE(uint64_t, ID, parseModuleID(Element.Fields[0])); + StringRef Name = Element.Fields[1]; + StringRef Type = Element.Fields[2]; + if (Type != "elf") { + WithColor::error() << "unknown module type\n"; + reportLocation(Type.begin()); + return None; + } + if (!checkNumFields(Element, 4)) + return None; + ASSIGN_OR_RETURN_NONE(SmallVector<uint8_t>, BuildID, + parseBuildID(Element.Fields[3])); + return Module{ID, Name.str(), std::move(BuildID)}; +} + +Optional<MarkupFilter::MMap> +MarkupFilter::parseMMap(const MarkupNode &Element) const { + if (!checkNumFieldsAtLeast(Element, 3)) + return None; + ASSIGN_OR_RETURN_NONE(uint64_t, Addr, parseAddr(Element.Fields[0])); + ASSIGN_OR_RETURN_NONE(uint64_t, Size, parseSize(Element.Fields[1])); + StringRef Type = Element.Fields[2]; + if (Type != "load") { + WithColor::error() << "unknown mmap type\n"; + reportLocation(Type.begin()); + return None; + } + if (!checkNumFields(Element, 6)) + return None; + ASSIGN_OR_RETURN_NONE(uint64_t, ID, parseModuleID(Element.Fields[3])); + ASSIGN_OR_RETURN_NONE(std::string, Mode, parseMode(Element.Fields[4])); + auto It = Modules.find(ID); + if (It == Modules.end()) { + WithColor::error() << "unknown module ID\n"; + reportLocation(Element.Fields[3].begin()); + return None; + } + ASSIGN_OR_RETURN_NONE(uint64_t, ModuleRelativeAddr, + parseAddr(Element.Fields[5])); + return MMap{Addr, Size, It->second.get(), std::move(Mode), + ModuleRelativeAddr}; +} + +// Parse an address (%p in the spec). +Optional<uint64_t> MarkupFilter::parseAddr(StringRef Str) const { + if (Str.empty()) { + reportTypeError(Str, "address"); + return None; + } + if (all_of(Str, [](char C) { return C == '0'; })) + return 0; + if (!Str.startswith("0x")) { + reportTypeError(Str, "address"); + return None; + } + uint64_t Addr; + if (Str.drop_front(2).getAsInteger(16, Addr)) { + reportTypeError(Str, "address"); + return None; + } + return Addr; +} + +// Parse a module ID (%i in the spec). +Optional<uint64_t> MarkupFilter::parseModuleID(StringRef Str) const { + uint64_t ID; + if (Str.getAsInteger(0, ID)) { + reportTypeError(Str, "module ID"); + return None; + } + return ID; +} + +// Parse a size (%i in the spec). +Optional<uint64_t> MarkupFilter::parseSize(StringRef Str) const { + uint64_t ID; + if (Str.getAsInteger(0, ID)) { + reportTypeError(Str, "size"); + return None; + } + return ID; +} + +// Parse a build ID (%x in the spec). +Optional<SmallVector<uint8_t>> MarkupFilter::parseBuildID(StringRef Str) const { + std::string Bytes; + if (Str.empty() || Str.size() % 2 || !tryGetFromHex(Str, Bytes)) { + reportTypeError(Str, "build ID"); + return None; + } + ArrayRef<uint8_t> BuildID(reinterpret_cast<const uint8_t *>(Bytes.data()), + Bytes.size()); + return SmallVector<uint8_t>(BuildID.begin(), BuildID.end()); +} + +// Parses the mode string for an mmap element. +Optional<std::string> MarkupFilter::parseMode(StringRef Str) const { + if (Str.empty()) { + reportTypeError(Str, "mode"); + return None; + } + + // Pop off each of r/R, w/W, and x/X from the front, in that order. + StringRef Remainder = Str; + if (!Remainder.empty() && tolower(Remainder.front()) == 'r') + Remainder = Remainder.drop_front(); + if (!Remainder.empty() && tolower(Remainder.front()) == 'w') + Remainder = Remainder.drop_front(); + if (!Remainder.empty() && tolower(Remainder.front()) == 'x') + Remainder = Remainder.drop_front(); + + // If anything remains, then the string wasn't a mode. + if (!Remainder.empty()) { + reportTypeError(Str, "mode"); + return None; + } + + // Normalize the mode. + return Str.lower(); +} + bool MarkupFilter::checkTag(const MarkupNode &Node) const { if (any_of(Node.Tag, [](char C) { return C < 'a' || C > 'z'; })) { WithColor::error(errs()) << "tags must be all lowercase characters\n"; @@ -126,18 +439,66 @@ bool MarkupFilter::checkTag(const MarkupNode &Node) const { return true; } -bool MarkupFilter::checkNumFields(const MarkupNode &Node, size_t Size) const { - if (Node.Fields.size() != Size) { +bool MarkupFilter::checkNumFields(const MarkupNode &Element, + size_t Size) const { + if (Element.Fields.size() != Size) { WithColor::error(errs()) << "expected " << Size << " fields; found " - << Node.Fields.size() << "\n"; - reportLocation(Node.Tag.end()); + << Element.Fields.size() << "\n"; + reportLocation(Element.Tag.end()); return false; } return true; } +bool MarkupFilter::checkNumFieldsAtLeast(const MarkupNode &Element, + size_t Size) const { + if (Element.Fields.size() < Size) { + WithColor::error(errs()) + << "expected at least " << Size << " fields; found " + << Element.Fields.size() << "\n"; + reportLocation(Element.Tag.end()); + return false; + } + return true; +} + +void MarkupFilter::reportTypeError(StringRef Str, StringRef TypeName) const { + WithColor::error(errs()) << "expected " << TypeName << "; found '" << Str + << "'\n"; + reportLocation(Str.begin()); +} + +// Prints two lines that point out the given location in the current Line using +// a caret. The iterator must be within the bounds of the most recent line +// passed to beginLine(). void MarkupFilter::reportLocation(StringRef::iterator Loc) const { errs() << Line; WithColor(errs().indent(Loc - Line.begin()), HighlightColor::String) << '^'; errs() << '\n'; } + +// Checks for an existing mmap that overlaps the given one and returns a +// pointer to one of them. +const MarkupFilter::MMap *MarkupFilter::overlappingMMap(const MMap &Map) const { + // If the given map contains the start of another mmap, they overlap. + auto I = MMaps.upper_bound(Map.Addr); + if (I != MMaps.end() && Map.contains(I->second.Addr)) + return &I->second; + + // If no element starts inside the given mmap, the only possible overlap would + // be if the preceding mmap contains the start point of the given mmap. + if (I != MMaps.begin()) { + --I; + if (I->second.contains(Map.Addr)) + return &I->second; + } + return nullptr; +} + +StringRef MarkupFilter::lineEnding() const { + return Line.endswith("\r\n") ? "\r\n" : "\n"; +} + +bool MarkupFilter::MMap::contains(uint64_t Addr) const { + return this->Addr <= Addr && Addr < this->Addr + Size; +} diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.cpp index 43b9c2ba400b..dc07eaeaf615 100644 --- a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.cpp +++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.cpp @@ -524,4 +524,4 @@ COFFLinkGraphBuilder::exportCOMDATSymbol(COFFSymbolIndex SymIndex, } } // namespace jitlink -} // namespace llvm
\ No newline at end of file +} // namespace llvm diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/DWARFRecordSectionSplitter.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/DWARFRecordSectionSplitter.cpp index 0fc366bf505f..2a60d8206f63 100644 --- a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/DWARFRecordSectionSplitter.cpp +++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/DWARFRecordSectionSplitter.cpp @@ -107,7 +107,7 @@ Error DWARFRecordSectionSplitter::processBlock( } uint64_t BlockSize = BlockReader.getOffset() - RecordStartOffset; - auto &NewBlock = G.splitBlock(B, BlockSize); + auto &NewBlock = G.splitBlock(B, BlockSize, &Cache); (void)NewBlock; LLVM_DEBUG(dbgs() << " Extracted " << NewBlock << "\n"); } diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp index c60f4b3b263c..70a3c404d836 100644 --- a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp +++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp @@ -48,6 +48,10 @@ JITTargetMachineBuilder::createTargetMachine() { if (!TheTarget) return make_error<StringError>(std::move(ErrMsg), inconvertibleErrorCode()); + if (!TheTarget->hasJIT()) + return make_error<StringError>("Target has no JIT support", + inconvertibleErrorCode()); + auto *TM = TheTarget->createTargetMachine(TT.getTriple(), CPU, Features.getString(), Options, RM, CM, OptLevel, /*JIT*/ true); diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/MapperJITLinkMemoryManager.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/MapperJITLinkMemoryManager.cpp new file mode 100644 index 000000000000..c2e7baabb994 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/MapperJITLinkMemoryManager.cpp @@ -0,0 +1,135 @@ +//=== MapperJITLinkMemoryManager.cpp - Memory management with MemoryMapper ===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/ExecutionEngine/Orc/MapperJITLinkMemoryManager.h" + +#include "llvm/ExecutionEngine/JITLink/JITLink.h" +#include "llvm/Support/Process.h" + +#include <limits> + +using namespace llvm::jitlink; + +namespace llvm { +namespace orc { + +class MapperJITLinkMemoryManager::InFlightAlloc + : public JITLinkMemoryManager::InFlightAlloc { +public: + InFlightAlloc(MapperJITLinkMemoryManager &Parent, LinkGraph &G, + ExecutorAddr AllocAddr, + std::vector<MemoryMapper::AllocInfo::SegInfo> Segs) + : Parent(Parent), G(G), AllocAddr(AllocAddr), Segs(std::move(Segs)) {} + + void finalize(OnFinalizedFunction OnFinalize) override { + MemoryMapper::AllocInfo AI; + AI.MappingBase = AllocAddr; + + std::swap(AI.Segments, Segs); + std::swap(AI.Actions, G.allocActions()); + + Parent.Mapper->initialize(AI, [&](Expected<ExecutorAddr> Result) { + if (!Result) { + OnFinalize(Result.takeError()); + return; + } + + OnFinalize(FinalizedAlloc(*Result)); + }); + } + + void abandon(OnAbandonedFunction OnFinalize) override { + Parent.Mapper->release({AllocAddr}, std::move(OnFinalize)); + } + +private: + MapperJITLinkMemoryManager &Parent; + LinkGraph &G; + ExecutorAddr AllocAddr; + std::vector<MemoryMapper::AllocInfo::SegInfo> Segs; +}; + +MapperJITLinkMemoryManager::MapperJITLinkMemoryManager( + std::unique_ptr<MemoryMapper> Mapper) + : Mapper(std::move(Mapper)) {} + +void MapperJITLinkMemoryManager::allocate(const JITLinkDylib *JD, LinkGraph &G, + OnAllocatedFunction OnAllocated) { + BasicLayout BL(G); + + // find required address space + auto SegsSizes = BL.getContiguousPageBasedLayoutSizes(Mapper->getPageSize()); + if (!SegsSizes) { + OnAllocated(SegsSizes.takeError()); + return; + } + + // Check if total size fits in address space + if (SegsSizes->total() > std::numeric_limits<size_t>::max()) { + OnAllocated(make_error<JITLinkError>( + formatv("Total requested size {:x} for graph {} exceeds address space", + SegsSizes->total(), G.getName()))); + return; + } + + Mapper->reserve( + SegsSizes->total(), + [this, &G, BL = std::move(BL), OnAllocated = std::move(OnAllocated)]( + Expected<ExecutorAddrRange> Result) mutable { + if (!Result) { + return OnAllocated(Result.takeError()); + } + + auto NextSegAddr = Result->Start; + + std::vector<MemoryMapper::AllocInfo::SegInfo> SegInfos; + + for (auto &KV : BL.segments()) { + auto &AG = KV.first; + auto &Seg = KV.second; + + auto TotalSize = Seg.ContentSize + Seg.ZeroFillSize; + + Seg.Addr = NextSegAddr; + Seg.WorkingMem = Mapper->prepare(NextSegAddr, TotalSize); + + NextSegAddr += alignTo(TotalSize, Mapper->getPageSize()); + + MemoryMapper::AllocInfo::SegInfo SI; + SI.Offset = Seg.Addr - Result->Start; + SI.ContentSize = Seg.ContentSize; + SI.ZeroFillSize = Seg.ZeroFillSize; + SI.Prot = (toSysMemoryProtectionFlags(AG.getMemProt())); + SI.WorkingMem = Seg.WorkingMem; + + SegInfos.push_back(SI); + } + + if (auto Err = BL.apply()) { + OnAllocated(std::move(Err)); + return; + } + + OnAllocated(std::make_unique<InFlightAlloc>(*this, G, Result->Start, + std::move(SegInfos))); + }); +} + +void MapperJITLinkMemoryManager::deallocate( + std::vector<FinalizedAlloc> Allocs, OnDeallocatedFunction OnDeallocated) { + std::vector<ExecutorAddr> Bases; + Bases.reserve(Allocs.size()); + for (auto &FA : Allocs) { + Bases.push_back(FA.getAddress()); + FA.release(); + } + Mapper->release(Bases, std::move(OnDeallocated)); +} + +} // end namespace orc +} // end namespace llvm diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp index 8b3fbd7117e2..ca3f64b8a409 100644 --- a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp +++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp @@ -8,11 +8,33 @@ #include "llvm/ExecutionEngine/Orc/MemoryMapper.h" +#include "llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h" +#include "llvm/Support/WindowsError.h" + +#if defined(LLVM_ON_UNIX) +#include <fcntl.h> +#include <sys/mman.h> +#include <unistd.h> +#elif defined(_WIN32) +#include <windows.h> +#endif + namespace llvm { namespace orc { MemoryMapper::~MemoryMapper() {} +InProcessMemoryMapper::InProcessMemoryMapper(size_t PageSize) + : PageSize(PageSize) {} + +Expected<std::unique_ptr<InProcessMemoryMapper>> +InProcessMemoryMapper::Create() { + auto PageSize = sys::Process::getPageSize(); + if (!PageSize) + return PageSize.takeError(); + return std::make_unique<InProcessMemoryMapper>(*PageSize); +} + void InProcessMemoryMapper::reserve(size_t NumBytes, OnReservedFunction OnReserved) { std::error_code EC; @@ -147,6 +169,238 @@ InProcessMemoryMapper::~InProcessMemoryMapper() { cantFail(F.get()); } +// SharedMemoryMapper + +SharedMemoryMapper::SharedMemoryMapper(ExecutorProcessControl &EPC, + SymbolAddrs SAs, size_t PageSize) + : EPC(EPC), SAs(SAs), PageSize(PageSize) {} + +Expected<std::unique_ptr<SharedMemoryMapper>> +SharedMemoryMapper::Create(ExecutorProcessControl &EPC, SymbolAddrs SAs) { + auto PageSize = sys::Process::getPageSize(); + if (!PageSize) + return PageSize.takeError(); + + return std::make_unique<SharedMemoryMapper>(EPC, SAs, *PageSize); +} + +void SharedMemoryMapper::reserve(size_t NumBytes, + OnReservedFunction OnReserved) { +#if defined(LLVM_ON_UNIX) || defined(_WIN32) + + EPC.callSPSWrapperAsync< + rt::SPSExecutorSharedMemoryMapperServiceReserveSignature>( + SAs.Reserve, + [this, NumBytes, OnReserved = std::move(OnReserved)]( + Error SerializationErr, + Expected<std::pair<ExecutorAddr, std::string>> Result) mutable { + if (SerializationErr) { + cantFail(Result.takeError()); + return OnReserved(std::move(SerializationErr)); + } + + if (!Result) + return OnReserved(Result.takeError()); + + ExecutorAddr RemoteAddr; + std::string SharedMemoryName; + std::tie(RemoteAddr, SharedMemoryName) = std::move(*Result); + + void *LocalAddr = nullptr; + +#if defined(LLVM_ON_UNIX) + + int SharedMemoryFile = shm_open(SharedMemoryName.c_str(), O_RDWR, 0700); + if (SharedMemoryFile < 0) { + return OnReserved(errorCodeToError( + std::error_code(errno, std::generic_category()))); + } + + // this prevents other processes from accessing it by name + shm_unlink(SharedMemoryName.c_str()); + + LocalAddr = mmap(nullptr, NumBytes, PROT_READ | PROT_WRITE, MAP_SHARED, + SharedMemoryFile, 0); + if (LocalAddr == MAP_FAILED) { + return OnReserved(errorCodeToError( + std::error_code(errno, std::generic_category()))); + } + + close(SharedMemoryFile); + +#elif defined(_WIN32) + + std::wstring WideSharedMemoryName(SharedMemoryName.begin(), + SharedMemoryName.end()); + HANDLE SharedMemoryFile = OpenFileMappingW( + FILE_MAP_ALL_ACCESS, FALSE, WideSharedMemoryName.c_str()); + if (!SharedMemoryFile) + return OnReserved(errorCodeToError(mapWindowsError(GetLastError()))); + + LocalAddr = + MapViewOfFile(SharedMemoryFile, FILE_MAP_ALL_ACCESS, 0, 0, 0); + if (!LocalAddr) { + CloseHandle(SharedMemoryFile); + return OnReserved(errorCodeToError(mapWindowsError(GetLastError()))); + } + + CloseHandle(SharedMemoryFile); + +#endif + { + std::lock_guard<std::mutex> Lock(Mutex); + Reservations.insert({RemoteAddr, {LocalAddr, NumBytes}}); + } + + OnReserved(ExecutorAddrRange(RemoteAddr, NumBytes)); + }, + SAs.Instance, static_cast<uint64_t>(NumBytes)); + +#else + OnReserved(make_error<StringError>( + "SharedMemoryMapper is not supported on this platform yet", + inconvertibleErrorCode())); +#endif +} + +char *SharedMemoryMapper::prepare(ExecutorAddr Addr, size_t ContentSize) { + auto R = Reservations.upper_bound(Addr); + assert(R != Reservations.begin() && "Attempt to prepare unknown range"); + R--; + + ExecutorAddrDiff Offset = Addr - R->first; + + return static_cast<char *>(R->second.LocalAddr) + Offset; +} + +void SharedMemoryMapper::initialize(MemoryMapper::AllocInfo &AI, + OnInitializedFunction OnInitialized) { + auto Reservation = Reservations.find(AI.MappingBase); + assert(Reservation != Reservations.end() && + "Attempt to initialize unreserved range"); + + tpctypes::SharedMemoryFinalizeRequest FR; + + AI.Actions.swap(FR.Actions); + + FR.Segments.reserve(AI.Segments.size()); + + for (auto Segment : AI.Segments) { + char *Base = + static_cast<char *>(Reservation->second.LocalAddr) + Segment.Offset; + std::memset(Base + Segment.ContentSize, 0, Segment.ZeroFillSize); + + tpctypes::SharedMemorySegFinalizeRequest SegReq; + SegReq.Prot = tpctypes::toWireProtectionFlags( + static_cast<sys::Memory::ProtectionFlags>(Segment.Prot)); + SegReq.Addr = AI.MappingBase + Segment.Offset; + SegReq.Size = Segment.ContentSize + Segment.ZeroFillSize; + + FR.Segments.push_back(SegReq); + } + + EPC.callSPSWrapperAsync< + rt::SPSExecutorSharedMemoryMapperServiceInitializeSignature>( + SAs.Initialize, + [OnInitialized = std::move(OnInitialized)]( + Error SerializationErr, Expected<ExecutorAddr> Result) mutable { + if (SerializationErr) { + cantFail(Result.takeError()); + return OnInitialized(std::move(SerializationErr)); + } + + OnInitialized(std::move(Result)); + }, + SAs.Instance, AI.MappingBase, std::move(FR)); +} + +void SharedMemoryMapper::deinitialize( + ArrayRef<ExecutorAddr> Allocations, + MemoryMapper::OnDeinitializedFunction OnDeinitialized) { + EPC.callSPSWrapperAsync< + rt::SPSExecutorSharedMemoryMapperServiceDeinitializeSignature>( + SAs.Deinitialize, + [OnDeinitialized = std::move(OnDeinitialized)](Error SerializationErr, + Error Result) mutable { + if (SerializationErr) { + cantFail(std::move(Result)); + return OnDeinitialized(std::move(SerializationErr)); + } + + OnDeinitialized(std::move(Result)); + }, + SAs.Instance, Allocations); +} + +void SharedMemoryMapper::release(ArrayRef<ExecutorAddr> Bases, + OnReleasedFunction OnReleased) { +#if defined(LLVM_ON_UNIX) || defined(_WIN32) + Error Err = Error::success(); + + { + std::lock_guard<std::mutex> Lock(Mutex); + + for (auto Base : Bases) { + +#if defined(LLVM_ON_UNIX) + + if (munmap(Reservations[Base].LocalAddr, Reservations[Base].Size) != 0) + Err = joinErrors(std::move(Err), errorCodeToError(std::error_code( + errno, std::generic_category()))); + +#elif defined(_WIN32) + + if (!UnmapViewOfFile(Reservations[Base].LocalAddr)) + joinErrors(std::move(Err), + errorCodeToError(mapWindowsError(GetLastError()))); + +#endif + + Reservations.erase(Base); + } + } + + EPC.callSPSWrapperAsync< + rt::SPSExecutorSharedMemoryMapperServiceReleaseSignature>( + SAs.Release, + [OnReleased = std::move(OnReleased), + Err = std::move(Err)](Error SerializationErr, Error Result) mutable { + if (SerializationErr) { + cantFail(std::move(Result)); + return OnReleased( + joinErrors(std::move(Err), std::move(SerializationErr))); + } + + return OnReleased(joinErrors(std::move(Err), std::move(Result))); + }, + SAs.Instance, Bases); +#else + OnReleased(make_error<StringError>( + "SharedMemoryMapper is not supported on this platform yet", + inconvertibleErrorCode())); +#endif +} + +SharedMemoryMapper::~SharedMemoryMapper() { + std::vector<ExecutorAddr> ReservationAddrs; + if (!Reservations.empty()) { + std::lock_guard<std::mutex> Lock(Mutex); + { + ReservationAddrs.reserve(Reservations.size()); + for (const auto &R : Reservations) { + ReservationAddrs.push_back(R.first); + } + } + } + + std::promise<MSVCPError> P; + auto F = P.get_future(); + release(ReservationAddrs, [&](Error Err) { P.set_value(std::move(Err)); }); + // FIXME: Release can actually fail. The error should be propagated. + // Meanwhile, a better option is to explicitly call release(). + cantFail(F.get()); +} + } // namespace orc } // namespace llvm diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Shared/OrcRTBridge.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Shared/OrcRTBridge.cpp index 5eae33121eb9..dfdd846c46a7 100644 --- a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Shared/OrcRTBridge.cpp +++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Shared/OrcRTBridge.cpp @@ -18,6 +18,7 @@ const char *SimpleExecutorDylibManagerOpenWrapperName = "__llvm_orc_SimpleExecutorDylibManager_open_wrapper"; const char *SimpleExecutorDylibManagerLookupWrapperName = "__llvm_orc_SimpleExecutorDylibManager_lookup_wrapper"; + const char *SimpleExecutorMemoryManagerInstanceName = "__llvm_orc_SimpleExecutorMemoryManager_Instance"; const char *SimpleExecutorMemoryManagerReserveWrapperName = @@ -26,6 +27,18 @@ const char *SimpleExecutorMemoryManagerFinalizeWrapperName = "__llvm_orc_SimpleExecutorMemoryManager_finalize_wrapper"; const char *SimpleExecutorMemoryManagerDeallocateWrapperName = "__llvm_orc_SimpleExecutorMemoryManager_deallocate_wrapper"; + +const char *ExecutorSharedMemoryMapperServiceInstanceName = + "__llvm_orc_ExecutorSharedMemoryMapperService_Instance"; +const char *ExecutorSharedMemoryMapperServiceReserveWrapperName = + "__llvm_orc_ExecutorSharedMemoryMapperService_Reserve"; +const char *ExecutorSharedMemoryMapperServiceInitializeWrapperName = + "__llvm_orc_ExecutorSharedMemoryMapperService_Initialize"; +const char *ExecutorSharedMemoryMapperServiceDeinitializeWrapperName = + "__llvm_orc_ExecutorSharedMemoryMapperService_Deinitialize"; +const char *ExecutorSharedMemoryMapperServiceReleaseWrapperName = + "__llvm_orc_ExecutorSharedMemoryMapperService_Release"; + const char *MemoryWriteUInt8sWrapperName = "__llvm_orc_bootstrap_mem_write_uint8s_wrapper"; const char *MemoryWriteUInt16sWrapperName = @@ -36,10 +49,12 @@ const char *MemoryWriteUInt64sWrapperName = "__llvm_orc_bootstrap_mem_write_uint64s_wrapper"; const char *MemoryWriteBuffersWrapperName = "__llvm_orc_bootstrap_mem_write_buffers_wrapper"; + const char *RegisterEHFrameSectionWrapperName = "__llvm_orc_bootstrap_register_ehframe_section_wrapper"; const char *DeregisterEHFrameSectionWrapperName = "__llvm_orc_bootstrap_deregister_ehframe_section_wrapper"; + const char *RunAsMainWrapperName = "__llvm_orc_bootstrap_run_as_main_wrapper"; } // end namespace rt diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.cpp new file mode 100644 index 000000000000..6c9f099061ae --- /dev/null +++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.cpp @@ -0,0 +1,341 @@ +//===---------- ExecutorSharedMemoryMapperService.cpp -----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.h" + +#include "llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h" +#include "llvm/Support/Process.h" +#include "llvm/Support/WindowsError.h" + +#include <sstream> + +#if defined(LLVM_ON_UNIX) +#include <errno.h> +#include <fcntl.h> +#include <sys/mman.h> +#include <unistd.h> +#endif + +#if defined(_WIN32) +static DWORD getWindowsProtectionFlags(unsigned Flags) { + switch (Flags & llvm::sys::Memory::MF_RWE_MASK) { + case llvm::sys::Memory::MF_READ: + return PAGE_READONLY; + case llvm::sys::Memory::MF_WRITE: + // Note: PAGE_WRITE is not supported by VirtualProtect + return PAGE_READWRITE; + case llvm::sys::Memory::MF_READ | llvm::sys::Memory::MF_WRITE: + return PAGE_READWRITE; + case llvm::sys::Memory::MF_READ | llvm::sys::Memory::MF_EXEC: + return PAGE_EXECUTE_READ; + case llvm::sys::Memory::MF_READ | llvm::sys::Memory::MF_WRITE | + llvm::sys::Memory::MF_EXEC: + return PAGE_EXECUTE_READWRITE; + case llvm::sys::Memory::MF_EXEC: + return PAGE_EXECUTE; + default: + llvm_unreachable("Illegal memory protection flag specified!"); + } + // Provide a default return value as required by some compilers. + return PAGE_NOACCESS; +} +#endif + +namespace llvm { +namespace orc { +namespace rt_bootstrap { + +Expected<std::pair<ExecutorAddr, std::string>> +ExecutorSharedMemoryMapperService::reserve(uint64_t Size) { +#if defined(LLVM_ON_UNIX) || defined(_WIN32) + +#if defined(LLVM_ON_UNIX) + + std::string SharedMemoryName; + { + std::stringstream SharedMemoryNameStream; + SharedMemoryNameStream << "/jitlink_" << sys::Process::getProcessId() << '_' + << (++SharedMemoryCount); + SharedMemoryName = SharedMemoryNameStream.str(); + } + + int SharedMemoryFile = + shm_open(SharedMemoryName.c_str(), O_RDWR | O_CREAT | O_EXCL, 0700); + if (SharedMemoryFile < 0) + return errorCodeToError(std::error_code(errno, std::generic_category())); + + // by default size is 0 + if (ftruncate(SharedMemoryFile, Size) < 0) + return errorCodeToError(std::error_code(errno, std::generic_category())); + + void *Addr = mmap(nullptr, Size, PROT_NONE, MAP_SHARED, SharedMemoryFile, 0); + if (Addr == MAP_FAILED) + return errorCodeToError(std::error_code(errno, std::generic_category())); + + close(SharedMemoryFile); + +#elif defined(_WIN32) + + std::string SharedMemoryName; + { + std::stringstream SharedMemoryNameStream; + SharedMemoryNameStream << "jitlink_" << sys::Process::getProcessId() << '_' + << (++SharedMemoryCount); + SharedMemoryName = SharedMemoryNameStream.str(); + } + + std::wstring WideSharedMemoryName(SharedMemoryName.begin(), + SharedMemoryName.end()); + HANDLE SharedMemoryFile = CreateFileMappingW( + INVALID_HANDLE_VALUE, NULL, PAGE_EXECUTE_READWRITE, Size >> 32, + Size & 0xffffffff, WideSharedMemoryName.c_str()); + if (!SharedMemoryFile) + return errorCodeToError(mapWindowsError(GetLastError())); + + void *Addr = MapViewOfFile(SharedMemoryFile, + FILE_MAP_ALL_ACCESS | FILE_MAP_EXECUTE, 0, 0, 0); + if (!Addr) { + CloseHandle(SharedMemoryFile); + return errorCodeToError(mapWindowsError(GetLastError())); + } + +#endif + + { + std::lock_guard<std::mutex> Lock(Mutex); + Reservations[Addr].Size = Size; +#if defined(_WIN32) + Reservations[Addr].SharedMemoryFile = SharedMemoryFile; +#endif + } + + return std::make_pair(ExecutorAddr::fromPtr(Addr), + std::move(SharedMemoryName)); +#else + return make_error<StringError>( + "SharedMemoryMapper is not supported on this platform yet", + inconvertibleErrorCode()); +#endif +} + +Expected<ExecutorAddr> ExecutorSharedMemoryMapperService::initialize( + ExecutorAddr Reservation, tpctypes::SharedMemoryFinalizeRequest &FR) { +#if defined(LLVM_ON_UNIX) || defined(_WIN32) + + ExecutorAddr MinAddr(~0ULL); + + // Contents are already in place + for (auto &Segment : FR.Segments) { + if (Segment.Addr < MinAddr) + MinAddr = Segment.Addr; + +#if defined(LLVM_ON_UNIX) + + int NativeProt = 0; + if (Segment.Prot & tpctypes::WPF_Read) + NativeProt |= PROT_READ; + if (Segment.Prot & tpctypes::WPF_Write) + NativeProt |= PROT_WRITE; + if (Segment.Prot & tpctypes::WPF_Exec) + NativeProt |= PROT_EXEC; + + if (mprotect(Segment.Addr.toPtr<void *>(), Segment.Size, NativeProt)) + return errorCodeToError(std::error_code(errno, std::generic_category())); + +#elif defined(_WIN32) + + DWORD NativeProt = + getWindowsProtectionFlags(fromWireProtectionFlags(Segment.Prot)); + + if (!VirtualProtect(Segment.Addr.toPtr<void *>(), Segment.Size, NativeProt, + &NativeProt)) + return errorCodeToError(mapWindowsError(GetLastError())); + +#endif + + if (Segment.Prot & tpctypes::WPF_Exec) + sys::Memory::InvalidateInstructionCache(Segment.Addr.toPtr<void *>(), + Segment.Size); + } + + // Run finalization actions and get deinitlization action list. + auto DeinitializeActions = shared::runFinalizeActions(FR.Actions); + if (!DeinitializeActions) { + return DeinitializeActions.takeError(); + } + + { + std::lock_guard<std::mutex> Lock(Mutex); + Allocations[MinAddr].DeinitializationActions = + std::move(*DeinitializeActions); + Reservations[Reservation.toPtr<void *>()].Allocations.push_back(MinAddr); + } + + return MinAddr; + +#else + return make_error<StringError>( + "SharedMemoryMapper is not supported on this platform yet", + inconvertibleErrorCode()); +#endif +} + +Error ExecutorSharedMemoryMapperService::deinitialize( + const std::vector<ExecutorAddr> &Bases) { + Error AllErr = Error::success(); + + { + std::lock_guard<std::mutex> Lock(Mutex); + + for (auto Base : Bases) { + if (Error Err = shared::runDeallocActions( + Allocations[Base].DeinitializationActions)) { + AllErr = joinErrors(std::move(AllErr), std::move(Err)); + } + + Allocations.erase(Base); + } + } + + return AllErr; +} + +Error ExecutorSharedMemoryMapperService::release( + const std::vector<ExecutorAddr> &Bases) { +#if defined(LLVM_ON_UNIX) || defined(_WIN32) + Error Err = Error::success(); + + for (auto Base : Bases) { + std::vector<ExecutorAddr> AllocAddrs; + size_t Size; + +#if defined(_WIN32) + HANDLE SharedMemoryFile; +#endif + + { + std::lock_guard<std::mutex> Lock(Mutex); + auto &R = Reservations[Base.toPtr<void *>()]; + Size = R.Size; + +#if defined(_WIN32) + SharedMemoryFile = R.SharedMemoryFile; +#endif + + AllocAddrs.swap(R.Allocations); + } + + // deinitialize sub allocations + if (Error E = deinitialize(AllocAddrs)) + Err = joinErrors(std::move(Err), std::move(E)); + +#if defined(LLVM_ON_UNIX) + + if (munmap(Base.toPtr<void *>(), Size) != 0) + Err = joinErrors(std::move(Err), errorCodeToError(std::error_code( + errno, std::generic_category()))); + +#elif defined(_WIN32) + + if (!UnmapViewOfFile(Base.toPtr<void *>())) + Err = joinErrors(std::move(Err), + errorCodeToError(mapWindowsError(GetLastError()))); + + CloseHandle(SharedMemoryFile); + +#endif + + std::lock_guard<std::mutex> Lock(Mutex); + Reservations.erase(Base.toPtr<void *>()); + } + + return Err; +#else + return make_error<StringError>( + "SharedMemoryMapper is not supported on this platform yet", + inconvertibleErrorCode()); +#endif +} + +Error ExecutorSharedMemoryMapperService::shutdown() { + std::vector<ExecutorAddr> ReservationAddrs; + if (!Reservations.empty()) { + std::lock_guard<std::mutex> Lock(Mutex); + { + ReservationAddrs.reserve(Reservations.size()); + for (const auto &R : Reservations) { + ReservationAddrs.push_back(ExecutorAddr::fromPtr(R.getFirst())); + } + } + } + return release(ReservationAddrs); + + return Error::success(); +} + +void ExecutorSharedMemoryMapperService::addBootstrapSymbols( + StringMap<ExecutorAddr> &M) { + M[rt::ExecutorSharedMemoryMapperServiceInstanceName] = + ExecutorAddr::fromPtr(this); + M[rt::ExecutorSharedMemoryMapperServiceReserveWrapperName] = + ExecutorAddr::fromPtr(&reserveWrapper); + M[rt::ExecutorSharedMemoryMapperServiceInitializeWrapperName] = + ExecutorAddr::fromPtr(&initializeWrapper); + M[rt::ExecutorSharedMemoryMapperServiceDeinitializeWrapperName] = + ExecutorAddr::fromPtr(&deinitializeWrapper); + M[rt::ExecutorSharedMemoryMapperServiceReleaseWrapperName] = + ExecutorAddr::fromPtr(&releaseWrapper); +} + +llvm::orc::shared::CWrapperFunctionResult +ExecutorSharedMemoryMapperService::reserveWrapper(const char *ArgData, + size_t ArgSize) { + return shared::WrapperFunction< + rt::SPSExecutorSharedMemoryMapperServiceReserveSignature>:: + handle(ArgData, ArgSize, + shared::makeMethodWrapperHandler( + &ExecutorSharedMemoryMapperService::reserve)) + .release(); +} + +llvm::orc::shared::CWrapperFunctionResult +ExecutorSharedMemoryMapperService::initializeWrapper(const char *ArgData, + size_t ArgSize) { + return shared::WrapperFunction< + rt::SPSExecutorSharedMemoryMapperServiceInitializeSignature>:: + handle(ArgData, ArgSize, + shared::makeMethodWrapperHandler( + &ExecutorSharedMemoryMapperService::initialize)) + .release(); +} + +llvm::orc::shared::CWrapperFunctionResult +ExecutorSharedMemoryMapperService::deinitializeWrapper(const char *ArgData, + size_t ArgSize) { + return shared::WrapperFunction< + rt::SPSExecutorSharedMemoryMapperServiceDeinitializeSignature>:: + handle(ArgData, ArgSize, + shared::makeMethodWrapperHandler( + &ExecutorSharedMemoryMapperService::deinitialize)) + .release(); +} + +llvm::orc::shared::CWrapperFunctionResult +ExecutorSharedMemoryMapperService::releaseWrapper(const char *ArgData, + size_t ArgSize) { + return shared::WrapperFunction< + rt::SPSExecutorSharedMemoryMapperServiceReleaseSignature>:: + handle(ArgData, ArgSize, + shared::makeMethodWrapperHandler( + &ExecutorSharedMemoryMapperService::release)) + .release(); +} + +} // namespace rt_bootstrap +} // end namespace orc +} // end namespace llvm diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.cpp index 7cadf3bb51a7..c848dd65fa7e 100644 --- a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.cpp +++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.cpp @@ -126,7 +126,8 @@ Error SimpleExecutorMemoryManager::finalize(tpctypes::FinalizeRequest &FR) { inconvertibleErrorCode())); char *Mem = Seg.Addr.toPtr<char *>(); - memcpy(Mem, Seg.Content.data(), Seg.Content.size()); + if (!Seg.Content.empty()) + memcpy(Mem, Seg.Content.data(), Seg.Content.size()); memset(Mem + Seg.Content.size(), 0, Seg.Size - Seg.Content.size()); assert(Seg.Size <= std::numeric_limits<size_t>::max()); if (auto EC = sys::Memory::protectMappedMemory( diff --git a/contrib/llvm-project/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/contrib/llvm-project/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 574d9174bebf..cee4cddab5e8 100644 --- a/contrib/llvm-project/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/contrib/llvm-project/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -1453,7 +1453,36 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc, InsertPointTy(TaskAllocaBB, TaskAllocaBB->begin()); InsertPointTy TaskBodyIP = InsertPointTy(TaskBodyBB, TaskBodyBB->begin()); BodyGenCB(TaskAllocaIP, TaskBodyIP); - Builder.SetInsertPoint(TaskExitBB); + Builder.SetInsertPoint(TaskExitBB, TaskExitBB->begin()); + + return Builder.saveIP(); +} + +OpenMPIRBuilder::InsertPointTy +OpenMPIRBuilder::createTaskgroup(const LocationDescription &Loc, + InsertPointTy AllocaIP, + BodyGenCallbackTy BodyGenCB) { + if (!updateToLocation(Loc)) + return InsertPointTy(); + + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); + Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); + Value *ThreadID = getOrCreateThreadID(Ident); + + // Emit the @__kmpc_taskgroup runtime call to start the taskgroup + Function *TaskgroupFn = + getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_taskgroup); + Builder.CreateCall(TaskgroupFn, {Ident, ThreadID}); + + BasicBlock *TaskgroupExitBB = splitBB(Builder, true, "taskgroup.exit"); + BodyGenCB(AllocaIP, Builder.saveIP()); + + Builder.SetInsertPoint(TaskgroupExitBB); + // Emit the @__kmpc_end_taskgroup runtime call to end the taskgroup + Function *EndTaskgroupFn = + getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_taskgroup); + Builder.CreateCall(EndTaskgroupFn, {Ident, ThreadID}); return Builder.saveIP(); } diff --git a/contrib/llvm-project/llvm/lib/IR/Function.cpp b/contrib/llvm-project/llvm/lib/IR/Function.cpp index 53df94366760..d4138133721e 100644 --- a/contrib/llvm-project/llvm/lib/IR/Function.cpp +++ b/contrib/llvm-project/llvm/lib/IR/Function.cpp @@ -354,6 +354,8 @@ Function *Function::createWithDefaultAttr(FunctionType *Ty, B.addAttribute("frame-pointer", "all"); break; } + if (M->getModuleFlag("function_return_thunk_extern")) + B.addAttribute(Attribute::FnRetThunkExtern); F->addFnAttrs(B); return F; } diff --git a/contrib/llvm-project/llvm/lib/IR/GCStrategy.cpp b/contrib/llvm-project/llvm/lib/IR/GCStrategy.cpp index f3bc5b74f8fd..5833dc26c57e 100644 --- a/contrib/llvm-project/llvm/lib/IR/GCStrategy.cpp +++ b/contrib/llvm-project/llvm/lib/IR/GCStrategy.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/IR/GCStrategy.h" +#include "llvm/ADT/Twine.h" using namespace llvm; @@ -32,7 +33,7 @@ std::unique_ptr<GCStrategy> llvm::getGCStrategy(const StringRef Name) { const std::string error = std::string("unsupported GC: ") + Name.str() + " (did you remember to link and initialize the library?)"; - report_fatal_error(error); + report_fatal_error(Twine(error)); } else - report_fatal_error(std::string("unsupported GC: ") + Name.str()); + report_fatal_error(Twine(std::string("unsupported GC: ") + Name.str())); } diff --git a/contrib/llvm-project/llvm/lib/IR/Globals.cpp b/contrib/llvm-project/llvm/lib/IR/Globals.cpp index 3265050261c8..51a22897babd 100644 --- a/contrib/llvm-project/llvm/lib/IR/Globals.cpp +++ b/contrib/llvm-project/llvm/lib/IR/Globals.cpp @@ -316,32 +316,38 @@ bool GlobalObject::canIncreaseAlignment() const { return true; } +template <typename Operation> static const GlobalObject * -findBaseObject(const Constant *C, DenseSet<const GlobalAlias *> &Aliases) { - if (auto *GO = dyn_cast<GlobalObject>(C)) +findBaseObject(const Constant *C, DenseSet<const GlobalAlias *> &Aliases, + const Operation &Op) { + if (auto *GO = dyn_cast<GlobalObject>(C)) { + Op(*GO); return GO; - if (auto *GA = dyn_cast<GlobalAlias>(C)) + } + if (auto *GA = dyn_cast<GlobalAlias>(C)) { + Op(*GA); if (Aliases.insert(GA).second) - return findBaseObject(GA->getOperand(0), Aliases); + return findBaseObject(GA->getOperand(0), Aliases, Op); + } if (auto *CE = dyn_cast<ConstantExpr>(C)) { switch (CE->getOpcode()) { case Instruction::Add: { - auto *LHS = findBaseObject(CE->getOperand(0), Aliases); - auto *RHS = findBaseObject(CE->getOperand(1), Aliases); + auto *LHS = findBaseObject(CE->getOperand(0), Aliases, Op); + auto *RHS = findBaseObject(CE->getOperand(1), Aliases, Op); if (LHS && RHS) return nullptr; return LHS ? LHS : RHS; } case Instruction::Sub: { - if (findBaseObject(CE->getOperand(1), Aliases)) + if (findBaseObject(CE->getOperand(1), Aliases, Op)) return nullptr; - return findBaseObject(CE->getOperand(0), Aliases); + return findBaseObject(CE->getOperand(0), Aliases, Op); } case Instruction::IntToPtr: case Instruction::PtrToInt: case Instruction::BitCast: case Instruction::GetElementPtr: - return findBaseObject(CE->getOperand(0), Aliases); + return findBaseObject(CE->getOperand(0), Aliases, Op); default: break; } @@ -351,7 +357,7 @@ findBaseObject(const Constant *C, DenseSet<const GlobalAlias *> &Aliases) { const GlobalObject *GlobalValue::getAliaseeObject() const { DenseSet<const GlobalAlias *> Aliases; - return findBaseObject(this, Aliases); + return findBaseObject(this, Aliases, [](const GlobalValue &) {}); } bool GlobalValue::isAbsoluteSymbolRef() const { @@ -544,7 +550,7 @@ void GlobalAlias::setAliasee(Constant *Aliasee) { const GlobalObject *GlobalAlias::getAliaseeObject() const { DenseSet<const GlobalAlias *> Aliases; - return findBaseObject(getOperand(0), Aliases); + return findBaseObject(getOperand(0), Aliases, [](const GlobalValue &) {}); } //===----------------------------------------------------------------------===// @@ -577,5 +583,12 @@ void GlobalIFunc::eraseFromParent() { const Function *GlobalIFunc::getResolverFunction() const { DenseSet<const GlobalAlias *> Aliases; - return dyn_cast<Function>(findBaseObject(getResolver(), Aliases)); + return dyn_cast<Function>( + findBaseObject(getResolver(), Aliases, [](const GlobalValue &) {})); +} + +void GlobalIFunc::applyAlongResolverPath( + function_ref<void(const GlobalValue &)> Op) const { + DenseSet<const GlobalAlias *> Aliases; + findBaseObject(getResolver(), Aliases, Op); } diff --git a/contrib/llvm-project/llvm/lib/IR/InlineAsm.cpp b/contrib/llvm-project/llvm/lib/IR/InlineAsm.cpp index c75b1aa7c1d6..088fcfdec742 100644 --- a/contrib/llvm-project/llvm/lib/IR/InlineAsm.cpp +++ b/contrib/llvm-project/llvm/lib/IR/InlineAsm.cpp @@ -93,6 +93,9 @@ bool InlineAsm::ConstraintInfo::Parse(StringRef Str, } else if (*I == '=') { ++I; Type = isOutput; + } else if (*I == '!') { + ++I; + Type = isLabel; } if (*I == '*') { @@ -265,14 +268,14 @@ Error InlineAsm::verify(FunctionType *Ty, StringRef ConstStr) { return makeStringError("failed to parse constraints"); unsigned NumOutputs = 0, NumInputs = 0, NumClobbers = 0; - unsigned NumIndirect = 0; + unsigned NumIndirect = 0, NumLabels = 0; for (const ConstraintInfo &Constraint : Constraints) { switch (Constraint.Type) { case InlineAsm::isOutput: - if ((NumInputs-NumIndirect) != 0 || NumClobbers != 0) - return makeStringError("output constraint occurs after input " - "or clobber constraint"); + if ((NumInputs-NumIndirect) != 0 || NumClobbers != 0 || NumLabels != 0) + return makeStringError("output constraint occurs after input, " + "clobber or label constraint"); if (!Constraint.isIndirect) { ++NumOutputs; @@ -289,6 +292,13 @@ Error InlineAsm::verify(FunctionType *Ty, StringRef ConstStr) { case InlineAsm::isClobber: ++NumClobbers; break; + case InlineAsm::isLabel: + if (NumClobbers) + return makeStringError("label constraint occurs after clobber " + "constraint"); + + ++NumLabels; + break; } } @@ -312,5 +322,7 @@ Error InlineAsm::verify(FunctionType *Ty, StringRef ConstStr) { if (Ty->getNumParams() != NumInputs) return makeStringError("number of input constraints does not match number " "of parameters"); + + // We don't have access to labels here, NumLabels will be checked separately. return Error::success(); } diff --git a/contrib/llvm-project/llvm/lib/IR/Instructions.cpp b/contrib/llvm-project/llvm/lib/IR/Instructions.cpp index b333f40f3ce9..26171f537244 100644 --- a/contrib/llvm-project/llvm/lib/IR/Instructions.cpp +++ b/contrib/llvm-project/llvm/lib/IR/Instructions.cpp @@ -960,15 +960,10 @@ void CallBrInst::init(FunctionType *FTy, Value *Fn, BasicBlock *Fallthrough, setName(NameStr); } -void CallBrInst::updateArgBlockAddresses(unsigned i, BasicBlock *B) { - assert(getNumIndirectDests() > i && "IndirectDest # out of range for callbr"); - if (BasicBlock *OldBB = getIndirectDest(i)) { - BlockAddress *Old = BlockAddress::get(OldBB); - BlockAddress *New = BlockAddress::get(B); - for (unsigned ArgNo = 0, e = arg_size(); ArgNo != e; ++ArgNo) - if (dyn_cast<BlockAddress>(getArgOperand(ArgNo)) == Old) - setArgOperand(ArgNo, New); - } +BlockAddress * +CallBrInst::getBlockAddressForIndirectDest(unsigned DestNo) const { + return BlockAddress::get(const_cast<Function *>(getFunction()), + getIndirectDest(DestNo)); } CallBrInst::CallBrInst(const CallBrInst &CBI) diff --git a/contrib/llvm-project/llvm/lib/IR/IntrinsicInst.cpp b/contrib/llvm-project/llvm/lib/IR/IntrinsicInst.cpp index 65a9a32ad2c5..c50d6901c9da 100644 --- a/contrib/llvm-project/llvm/lib/IR/IntrinsicInst.cpp +++ b/contrib/llvm-project/llvm/lib/IR/IntrinsicInst.cpp @@ -694,8 +694,10 @@ unsigned BinaryOpIntrinsic::getNoWrapKind() const { return OverflowingBinaryOperator::NoUnsignedWrap; } -const GCStatepointInst *GCProjectionInst::getStatepoint() const { +const Value *GCProjectionInst::getStatepoint() const { const Value *Token = getArgOperand(0); + if (isa<UndefValue>(Token)) + return Token; // This takes care both of relocates for call statepoints and relocates // on normal path of invoke statepoint. @@ -714,13 +716,23 @@ const GCStatepointInst *GCProjectionInst::getStatepoint() const { } Value *GCRelocateInst::getBasePtr() const { - if (auto Opt = getStatepoint()->getOperandBundle(LLVMContext::OB_gc_live)) + auto Statepoint = getStatepoint(); + if (isa<UndefValue>(Statepoint)) + return UndefValue::get(Statepoint->getType()); + + auto *GCInst = cast<GCStatepointInst>(Statepoint); + if (auto Opt = GCInst->getOperandBundle(LLVMContext::OB_gc_live)) return *(Opt->Inputs.begin() + getBasePtrIndex()); - return *(getStatepoint()->arg_begin() + getBasePtrIndex()); + return *(GCInst->arg_begin() + getBasePtrIndex()); } Value *GCRelocateInst::getDerivedPtr() const { - if (auto Opt = getStatepoint()->getOperandBundle(LLVMContext::OB_gc_live)) + auto *Statepoint = getStatepoint(); + if (isa<UndefValue>(Statepoint)) + return UndefValue::get(Statepoint->getType()); + + auto *GCInst = cast<GCStatepointInst>(Statepoint); + if (auto Opt = GCInst->getOperandBundle(LLVMContext::OB_gc_live)) return *(Opt->Inputs.begin() + getDerivedPtrIndex()); - return *(getStatepoint()->arg_begin() + getDerivedPtrIndex()); + return *(GCInst->arg_begin() + getDerivedPtrIndex()); } diff --git a/contrib/llvm-project/llvm/lib/IR/Verifier.cpp b/contrib/llvm-project/llvm/lib/IR/Verifier.cpp index 75d02f4c8c82..e3ea256af16d 100644 --- a/contrib/llvm-project/llvm/lib/IR/Verifier.cpp +++ b/contrib/llvm-project/llvm/lib/IR/Verifier.cpp @@ -469,6 +469,9 @@ private: void visitRangeMetadata(Instruction &I, MDNode *Range, Type *Ty); void visitDereferenceableMetadata(Instruction &I, MDNode *MD); void visitProfMetadata(Instruction &I, MDNode *MD); + void visitCallStackMetadata(MDNode *MD); + void visitMemProfMetadata(Instruction &I, MDNode *MD); + void visitCallsiteMetadata(Instruction &I, MDNode *MD); void visitAnnotationMetadata(MDNode *Annotation); void visitAliasScopeMetadata(const MDNode *MD); void visitAliasScopeListMetadata(const MDNode *MD); @@ -1624,8 +1627,10 @@ Verifier::visitModuleFlag(const MDNode *Op, break; case Module::Min: { - Check(mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(2)), - "invalid value for 'min' module flag (expected constant integer)", + auto *V = mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(2)); + Check(V && V->getValue().isNonNegative(), + "invalid value for 'min' module flag (expected constant non-negative " + "integer)", Op->getOperand(2)); break; } @@ -2200,7 +2205,13 @@ bool Verifier::verifyAttributeCount(AttributeList Attrs, unsigned Params) { void Verifier::verifyInlineAsmCall(const CallBase &Call) { const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand()); unsigned ArgNo = 0; + unsigned LabelNo = 0; for (const InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) { + if (CI.Type == InlineAsm::isLabel) { + ++LabelNo; + continue; + } + // Only deal with constraints that correspond to call arguments. if (!CI.hasArg()) continue; @@ -2222,6 +2233,15 @@ void Verifier::verifyInlineAsmCall(const CallBase &Call) { ArgNo++; } + + if (auto *CallBr = dyn_cast<CallBrInst>(&Call)) { + Check(LabelNo == CallBr->getNumIndirectDests(), + "Number of label constraints does not match number of callbr dests", + &Call); + } else { + Check(LabelNo == 0, "Label constraints can only be used with callbr", + &Call); + } } /// Verify that statepoint intrinsic is well formed. @@ -2839,25 +2859,6 @@ void Verifier::visitCallBrInst(CallBrInst &CBI) { Check(CBI.isInlineAsm(), "Callbr is currently only used for asm-goto!", &CBI); const InlineAsm *IA = cast<InlineAsm>(CBI.getCalledOperand()); Check(!IA->canThrow(), "Unwinding from Callbr is not allowed"); - for (unsigned i = 0, e = CBI.getNumSuccessors(); i != e; ++i) - Check(CBI.getSuccessor(i)->getType()->isLabelTy(), - "Callbr successors must all have pointer type!", &CBI); - for (unsigned i = 0, e = CBI.getNumOperands(); i != e; ++i) { - Check(i >= CBI.arg_size() || !isa<BasicBlock>(CBI.getOperand(i)), - "Using an unescaped label as a callbr argument!", &CBI); - if (isa<BasicBlock>(CBI.getOperand(i))) - for (unsigned j = i + 1; j != e; ++j) - Check(CBI.getOperand(i) != CBI.getOperand(j), - "Duplicate callbr destination!", &CBI); - } - { - SmallPtrSet<BasicBlock *, 4> ArgBBs; - for (Value *V : CBI.args()) - if (auto *BA = dyn_cast<BlockAddress>(V)) - ArgBBs.insert(BA->getBasicBlock()); - for (BasicBlock *BB : CBI.getIndirectDests()) - Check(ArgBBs.count(BB), "Indirect label missing from arglist.", &CBI); - } verifyInlineAsmCall(CBI); visitTerminator(CBI); @@ -4489,6 +4490,55 @@ void Verifier::visitProfMetadata(Instruction &I, MDNode *MD) { } } +void Verifier::visitCallStackMetadata(MDNode *MD) { + // Call stack metadata should consist of a list of at least 1 constant int + // (representing a hash of the location). + Check(MD->getNumOperands() >= 1, + "call stack metadata should have at least 1 operand", MD); + + for (const auto &Op : MD->operands()) + Check(mdconst::dyn_extract_or_null<ConstantInt>(Op), + "call stack metadata operand should be constant integer", Op); +} + +void Verifier::visitMemProfMetadata(Instruction &I, MDNode *MD) { + Check(isa<CallBase>(I), "!memprof metadata should only exist on calls", &I); + Check(MD->getNumOperands() >= 1, + "!memprof annotations should have at least 1 metadata operand " + "(MemInfoBlock)", + MD); + + // Check each MIB + for (auto &MIBOp : MD->operands()) { + MDNode *MIB = dyn_cast<MDNode>(MIBOp); + // The first operand of an MIB should be the call stack metadata. + // There rest of the operands should be MDString tags, and there should be + // at least one. + Check(MIB->getNumOperands() >= 2, + "Each !memprof MemInfoBlock should have at least 2 operands", MIB); + + // Check call stack metadata (first operand). + Check(MIB->getOperand(0) != nullptr, + "!memprof MemInfoBlock first operand should not be null", MIB); + Check(isa<MDNode>(MIB->getOperand(0)), + "!memprof MemInfoBlock first operand should be an MDNode", MIB); + MDNode *StackMD = dyn_cast<MDNode>(MIB->getOperand(0)); + visitCallStackMetadata(StackMD); + + // Check that remaining operands are MDString. + Check(std::all_of(MIB->op_begin() + 1, MIB->op_end(), + [](const MDOperand &Op) { return isa<MDString>(Op); }), + "Not all !memprof MemInfoBlock operands 1 to N are MDString", MIB); + } +} + +void Verifier::visitCallsiteMetadata(Instruction &I, MDNode *MD) { + Check(isa<CallBase>(I), "!callsite metadata should only exist on calls", &I); + // Verify the partial callstack annotated from memprof profiles. This callsite + // is a part of a profiled allocation callstack. + visitCallStackMetadata(MD); +} + void Verifier::visitAnnotationMetadata(MDNode *Annotation) { Check(isa<MDTuple>(Annotation), "annotation must be a tuple"); Check(Annotation->getNumOperands() >= 1, @@ -4735,6 +4785,12 @@ void Verifier::visitInstruction(Instruction &I) { if (MDNode *MD = I.getMetadata(LLVMContext::MD_prof)) visitProfMetadata(I, MD); + if (MDNode *MD = I.getMetadata(LLVMContext::MD_memprof)) + visitMemProfMetadata(I, MD); + + if (MDNode *MD = I.getMetadata(LLVMContext::MD_callsite)) + visitCallsiteMetadata(I, MD); + if (MDNode *Annotation = I.getMetadata(LLVMContext::MD_annotation)) visitAnnotationMetadata(Annotation); @@ -5160,14 +5216,13 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { // In all other cases relocate should be tied to the statepoint directly. // This covers relocates on a normal return path of invoke statepoint and // relocates of a call statepoint. - auto Token = Call.getArgOperand(0); - Check(isa<GCStatepointInst>(Token), + auto *Token = Call.getArgOperand(0); + Check(isa<GCStatepointInst>(Token) || isa<UndefValue>(Token), "gc relocate is incorrectly tied to the statepoint", Call, Token); } // Verify rest of the relocate arguments. - const CallBase &StatepointCall = - *cast<GCRelocateInst>(Call).getStatepoint(); + const Value &StatepointCall = *cast<GCRelocateInst>(Call).getStatepoint(); // Both the base and derived must be piped through the safepoint. Value *Base = Call.getArgOperand(1); @@ -5182,7 +5237,10 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { const uint64_t DerivedIndex = cast<ConstantInt>(Derived)->getZExtValue(); // Check the bounds - if (auto Opt = StatepointCall.getOperandBundle(LLVMContext::OB_gc_live)) { + if (isa<UndefValue>(StatepointCall)) + break; + if (auto Opt = cast<GCStatepointInst>(StatepointCall) + .getOperandBundle(LLVMContext::OB_gc_live)) { Check(BaseIndex < Opt->Inputs.size(), "gc.relocate: statepoint base index out of bounds", Call); Check(DerivedIndex < Opt->Inputs.size(), diff --git a/contrib/llvm-project/llvm/lib/Linker/IRMover.cpp b/contrib/llvm-project/llvm/lib/Linker/IRMover.cpp index 9e89cce8312e..e31faf6422ed 100644 --- a/contrib/llvm-project/llvm/lib/Linker/IRMover.cpp +++ b/contrib/llvm-project/llvm/lib/Linker/IRMover.cpp @@ -1273,14 +1273,19 @@ Error IRLinker::linkModuleFlagsMetadata() { // First build a map of the existing module flags and requirements. DenseMap<MDString *, std::pair<MDNode *, unsigned>> Flags; SmallSetVector<MDNode *, 16> Requirements; + SmallVector<unsigned, 0> Mins; + DenseSet<MDString *> SeenMin; for (unsigned I = 0, E = DstModFlags->getNumOperands(); I != E; ++I) { MDNode *Op = DstModFlags->getOperand(I); - ConstantInt *Behavior = mdconst::extract<ConstantInt>(Op->getOperand(0)); + uint64_t Behavior = + mdconst::extract<ConstantInt>(Op->getOperand(0))->getZExtValue(); MDString *ID = cast<MDString>(Op->getOperand(1)); - if (Behavior->getZExtValue() == Module::Require) { + if (Behavior == Module::Require) { Requirements.insert(cast<MDNode>(Op->getOperand(2))); } else { + if (Behavior == Module::Min) + Mins.push_back(I); Flags[ID] = std::make_pair(Op, I); } } @@ -1296,6 +1301,7 @@ Error IRLinker::linkModuleFlagsMetadata() { unsigned DstIndex; std::tie(DstOp, DstIndex) = Flags.lookup(ID); unsigned SrcBehaviorValue = SrcBehavior->getZExtValue(); + SeenMin.insert(ID); // If this is a requirement, add it and continue. if (SrcBehaviorValue == Module::Require) { @@ -1309,6 +1315,10 @@ Error IRLinker::linkModuleFlagsMetadata() { // If there is no existing flag with this ID, just add it. if (!DstOp) { + if (SrcBehaviorValue == Module::Min) { + Mins.push_back(DstModFlags->getNumOperands()); + SeenMin.erase(ID); + } Flags[ID] = std::make_pair(SrcOp, DstModFlags->getNumOperands()); DstModFlags->addOperand(SrcOp); continue; @@ -1362,8 +1372,10 @@ Error IRLinker::linkModuleFlagsMetadata() { "Expected MDTuple when appending module flags"); if (DstValue->isDistinct()) return dyn_cast<MDTuple>(DstValue); + ArrayRef<MDOperand> DstOperands = DstValue->operands(); MDTuple *New = MDTuple::getDistinct( - DstM.getContext(), SmallVector<Metadata *, 4>(DstValue->operands())); + DstM.getContext(), + SmallVector<Metadata *, 4>(DstOperands.begin(), DstOperands.end())); Metadata *FlagOps[] = {DstOp->getOperand(0), ID, New}; MDNode *Flag = MDTuple::getDistinct(DstM.getContext(), FlagOps); DstModFlags->setOperand(DstIndex, Flag); @@ -1465,6 +1477,20 @@ Error IRLinker::linkModuleFlagsMetadata() { } + // For the Min behavior, set the value to 0 if either module does not have the + // flag. + for (auto Idx : Mins) { + MDNode *Op = DstModFlags->getOperand(Idx); + MDString *ID = cast<MDString>(Op->getOperand(1)); + if (!SeenMin.count(ID)) { + ConstantInt *V = mdconst::extract<ConstantInt>(Op->getOperand(2)); + Metadata *FlagOps[] = { + Op->getOperand(0), ID, + ConstantAsMetadata::get(ConstantInt::get(V->getType(), 0))}; + DstModFlags->setOperand(Idx, MDNode::get(DstM.getContext(), FlagOps)); + } + } + // Check all of the requirements. for (unsigned I = 0, E = Requirements.size(); I != E; ++I) { MDNode *Requirement = Requirements[I]; diff --git a/contrib/llvm-project/llvm/lib/MC/ELFObjectWriter.cpp b/contrib/llvm-project/llvm/lib/MC/ELFObjectWriter.cpp index 78204ffe4c3b..0b4e9866d50a 100644 --- a/contrib/llvm-project/llvm/lib/MC/ELFObjectWriter.cpp +++ b/contrib/llvm-project/llvm/lib/MC/ELFObjectWriter.cpp @@ -144,9 +144,9 @@ struct ELFWriter { uint64_t align(unsigned Alignment); - bool maybeWriteCompression(uint64_t Size, + bool maybeWriteCompression(uint32_t ChType, uint64_t Size, SmallVectorImpl<uint8_t> &CompressedContents, - bool ZLibStyle, unsigned Alignment); + unsigned Alignment); public: ELFWriter(ELFObjectWriter &OWriter, raw_pwrite_stream &OS, @@ -819,36 +819,25 @@ MCSectionELF *ELFWriter::createRelocationSection(MCContext &Ctx, // Include the debug info compression header. bool ELFWriter::maybeWriteCompression( - uint64_t Size, SmallVectorImpl<uint8_t> &CompressedContents, bool ZLibStyle, - unsigned Alignment) { - if (ZLibStyle) { - uint64_t HdrSize = - is64Bit() ? sizeof(ELF::Elf32_Chdr) : sizeof(ELF::Elf64_Chdr); - if (Size <= HdrSize + CompressedContents.size()) - return false; - // Platform specific header is followed by compressed data. - if (is64Bit()) { - // Write Elf64_Chdr header. - write(static_cast<ELF::Elf64_Word>(ELF::ELFCOMPRESS_ZLIB)); - write(static_cast<ELF::Elf64_Word>(0)); // ch_reserved field. - write(static_cast<ELF::Elf64_Xword>(Size)); - write(static_cast<ELF::Elf64_Xword>(Alignment)); - } else { - // Write Elf32_Chdr header otherwise. - write(static_cast<ELF::Elf32_Word>(ELF::ELFCOMPRESS_ZLIB)); - write(static_cast<ELF::Elf32_Word>(Size)); - write(static_cast<ELF::Elf32_Word>(Alignment)); - } - return true; - } - - // "ZLIB" followed by 8 bytes representing the uncompressed size of the section, - // useful for consumers to preallocate a buffer to decompress into. - const StringRef Magic = "ZLIB"; - if (Size <= Magic.size() + sizeof(Size) + CompressedContents.size()) + uint32_t ChType, uint64_t Size, + SmallVectorImpl<uint8_t> &CompressedContents, unsigned Alignment) { + uint64_t HdrSize = + is64Bit() ? sizeof(ELF::Elf32_Chdr) : sizeof(ELF::Elf64_Chdr); + if (Size <= HdrSize + CompressedContents.size()) return false; - W.OS << Magic; - support::endian::write(W.OS, Size, support::big); + // Platform specific header is followed by compressed data. + if (is64Bit()) { + // Write Elf64_Chdr header. + write(static_cast<ELF::Elf64_Word>(ChType)); + write(static_cast<ELF::Elf64_Word>(0)); // ch_reserved field. + write(static_cast<ELF::Elf64_Xword>(Size)); + write(static_cast<ELF::Elf64_Xword>(Alignment)); + } else { + // Write Elf32_Chdr header otherwise. + write(static_cast<ELF::Elf32_Word>(ChType)); + write(static_cast<ELF::Elf32_Word>(Size)); + write(static_cast<ELF::Elf32_Word>(Alignment)); + } return true; } @@ -867,38 +856,31 @@ void ELFWriter::writeSectionData(const MCAssembler &Asm, MCSection &Sec, return; } - assert((MAI->compressDebugSections() == DebugCompressionType::Z || - MAI->compressDebugSections() == DebugCompressionType::GNU) && - "expected zlib or zlib-gnu style compression"); + assert(MAI->compressDebugSections() == DebugCompressionType::Z && + "expected zlib style compression"); SmallVector<char, 128> UncompressedData; raw_svector_ostream VecOS(UncompressedData); Asm.writeSectionData(VecOS, &Section, Layout); - SmallVector<uint8_t, 128> CompressedContents; + SmallVector<uint8_t, 128> Compressed; + const uint32_t ChType = ELF::ELFCOMPRESS_ZLIB; compression::zlib::compress( makeArrayRef(reinterpret_cast<uint8_t *>(UncompressedData.data()), UncompressedData.size()), - CompressedContents); + Compressed); - bool ZlibStyle = MAI->compressDebugSections() == DebugCompressionType::Z; - if (!maybeWriteCompression(UncompressedData.size(), CompressedContents, - ZlibStyle, Sec.getAlignment())) { + if (!maybeWriteCompression(ChType, UncompressedData.size(), Compressed, + Sec.getAlignment())) { W.OS << UncompressedData; return; } - if (ZlibStyle) { - // Set the compressed flag. That is zlib style. - Section.setFlags(Section.getFlags() | ELF::SHF_COMPRESSED); - // Alignment field should reflect the requirements of - // the compressed section header. - Section.setAlignment(is64Bit() ? Align(8) : Align(4)); - } else { - // Add "z" prefix to section name. This is zlib-gnu style. - MC.renameELFSection(&Section, (".z" + SectionName.drop_front(1)).str()); - } - W.OS << toStringRef(CompressedContents); + Section.setFlags(Section.getFlags() | ELF::SHF_COMPRESSED); + // Alignment field should reflect the requirements of + // the compressed section header. + Section.setAlignment(is64Bit() ? Align(8) : Align(4)); + W.OS << toStringRef(Compressed); } void ELFWriter::WriteSecHdrEntry(uint32_t Name, uint32_t Type, uint64_t Flags, diff --git a/contrib/llvm-project/llvm/lib/MC/MCContext.cpp b/contrib/llvm-project/llvm/lib/MC/MCContext.cpp index d312e3521c9e..322ed8e23eb6 100644 --- a/contrib/llvm-project/llvm/lib/MC/MCContext.cpp +++ b/contrib/llvm-project/llvm/lib/MC/MCContext.cpp @@ -468,24 +468,6 @@ MCSectionMachO *MCContext::getMachOSection(StringRef Segment, StringRef Section, return R.first->second; } -void MCContext::renameELFSection(MCSectionELF *Section, StringRef Name) { - StringRef GroupName; - if (const MCSymbol *Group = Section->getGroup()) - GroupName = Group->getName(); - - // This function is only used by .debug*, which should not have the - // SHF_LINK_ORDER flag. - unsigned UniqueID = Section->getUniqueID(); - ELFUniquingMap.erase( - ELFSectionKey{Section->getName(), GroupName, "", UniqueID}); - auto I = ELFUniquingMap - .insert(std::make_pair( - ELFSectionKey{Name, GroupName, "", UniqueID}, Section)) - .first; - StringRef CachedName = I->first.SectionName; - const_cast<MCSectionELF *>(Section)->setSectionName(CachedName); -} - MCSectionELF *MCContext::createELFSectionImpl(StringRef Section, unsigned Type, unsigned Flags, SectionKind K, unsigned EntrySize, diff --git a/contrib/llvm-project/llvm/lib/MC/MCMachOStreamer.cpp b/contrib/llvm-project/llvm/lib/MC/MCMachOStreamer.cpp index 9f22b9b0a866..f358f593ff39 100644 --- a/contrib/llvm-project/llvm/lib/MC/MCMachOStreamer.cpp +++ b/contrib/llvm-project/llvm/lib/MC/MCMachOStreamer.cpp @@ -583,15 +583,27 @@ MCStreamer *llvm::createMachOStreamer(MCContext &Context, return S; } -// Create the AddrSig section and first data fragment here as its layout needs -// to be computed immediately after in order for it to be exported correctly. +// The AddrSig section uses a series of relocations to refer to the symbols that +// should be considered address-significant. The only interesting content of +// these relocations is their symbol; the type, length etc will be ignored by +// the linker. The reason we are not referring to the symbol indices directly is +// that those indices will be invalidated by tools that update the symbol table. +// Symbol relocations OTOH will have their indices updated by e.g. llvm-strip. void MCMachOStreamer::createAddrSigSection() { MCAssembler &Asm = getAssembler(); MCObjectWriter &writer = Asm.getWriter(); if (!writer.getEmitAddrsigSection()) return; + // Create the AddrSig section and first data fragment here as its layout needs + // to be computed immediately after in order for it to be exported correctly. MCSection *AddrSigSection = Asm.getContext().getObjectFileInfo()->getAddrSigSection(); Asm.registerSection(*AddrSigSection); - new MCDataFragment(AddrSigSection); + auto *Frag = new MCDataFragment(AddrSigSection); + // We will generate a series of pointer-sized symbol relocations at offset + // 0x0. Set the section size to be large enough to contain a single pointer + // (instead of emitting a zero-sized section) so these relocations are + // technically valid, even though we don't expect these relocations to + // actually be applied by the linker. + Frag->getContents().resize(8); } diff --git a/contrib/llvm-project/llvm/lib/MC/MCParser/MasmParser.cpp b/contrib/llvm-project/llvm/lib/MC/MCParser/MasmParser.cpp index 694ea395fdec..af80d8327210 100644 --- a/contrib/llvm-project/llvm/lib/MC/MCParser/MasmParser.cpp +++ b/contrib/llvm-project/llvm/lib/MC/MCParser/MasmParser.cpp @@ -4238,11 +4238,8 @@ bool MasmParser::parseStructInitializer(const StructInfo &Structure, } } // Default-initialize all remaining fields. - for (auto It = Structure.Fields.begin() + FieldIndex; - It != Structure.Fields.end(); ++It) { - const FieldInfo &Field = *It; + for (const FieldInfo &Field : llvm::drop_begin(Structure.Fields, FieldIndex)) FieldInitializers.push_back(Field.Contents); - } if (EndToken) { if (EndToken.value() == AsmToken::Greater) @@ -4350,9 +4347,8 @@ bool MasmParser::emitFieldInitializer(const FieldInfo &Field, return true; } // Default-initialize all remaining values. - for (auto it = Contents.Values.begin() + Initializer.Values.size(); - it != Contents.Values.end(); ++it) { - const auto &Value = *it; + for (const auto &Value : + llvm::drop_begin(Contents.Values, Initializer.Values.size())) { if (emitIntValue(Value, Field.Type)) return true; } @@ -4367,9 +4363,8 @@ bool MasmParser::emitFieldInitializer(const FieldInfo &Field, AsInt.getBitWidth() / 8); } // Default-initialize all remaining values. - for (auto It = Contents.AsIntValues.begin() + Initializer.AsIntValues.size(); - It != Contents.AsIntValues.end(); ++It) { - const auto &AsInt = *It; + for (const auto &AsInt : + llvm::drop_begin(Contents.AsIntValues, Initializer.AsIntValues.size())) { getStreamer().emitIntValue(AsInt.getLimitedValue(), AsInt.getBitWidth() / 8); } @@ -4384,10 +4379,8 @@ bool MasmParser::emitFieldInitializer(const FieldInfo &Field, return true; } // Default-initialize all remaining values. - for (auto It = - Contents.Initializers.begin() + Initializer.Initializers.size(); - It != Contents.Initializers.end(); ++It) { - const auto &Init = *It; + for (const auto &Init : llvm::drop_begin(Contents.Initializers, + Initializer.Initializers.size())) { if (emitStructInitializer(Contents.Structure, Init)) return true; } @@ -4425,10 +4418,8 @@ bool MasmParser::emitStructInitializer(const StructInfo &Structure, return true; } // Default-initialize all remaining fields. - for (auto It = - Structure.Fields.begin() + Initializer.FieldInitializers.size(); - It != Structure.Fields.end(); ++It) { - const auto &Field = *It; + for (const auto &Field : llvm::drop_begin( + Structure.Fields, Initializer.FieldInitializers.size())) { getStreamer().emitZeros(Field.Offset - Offset); Offset = Field.Offset + Field.SizeOf; if (emitFieldValue(Field)) @@ -4649,10 +4640,8 @@ bool MasmParser::parseDirectiveNestedEnds() { if (ParentStruct.IsUnion) { ParentStruct.Size = std::max(ParentStruct.Size, Structure.Size); } else { - for (auto FieldIter = ParentStruct.Fields.begin() + OldFields; - FieldIter != ParentStruct.Fields.end(); ++FieldIter) { - FieldIter->Offset += FirstFieldOffset; - } + for (auto &Field : llvm::drop_begin(ParentStruct.Fields, OldFields)) + Field.Offset += FirstFieldOffset; const unsigned StructureEnd = FirstFieldOffset + Structure.Size; if (!ParentStruct.IsUnion) { diff --git a/contrib/llvm-project/llvm/lib/MC/MCPseudoProbe.cpp b/contrib/llvm-project/llvm/lib/MC/MCPseudoProbe.cpp index 5277ce87bee0..fdf8bbbe0a4d 100644 --- a/contrib/llvm-project/llvm/lib/MC/MCPseudoProbe.cpp +++ b/contrib/llvm-project/llvm/lib/MC/MCPseudoProbe.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "llvm/MC/MCPseudoProbe.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" @@ -519,7 +520,7 @@ void MCPseudoProbeDecoder::printProbesForAllAddresses(raw_ostream &OS) { std::vector<uint64_t> Addresses; for (auto Entry : Address2ProbesMap) Addresses.push_back(Entry.first); - std::sort(Addresses.begin(), Addresses.end()); + llvm::sort(Addresses); for (auto K : Addresses) { OS << "Address:\t"; OS << K; diff --git a/contrib/llvm-project/llvm/lib/MC/MachObjectWriter.cpp b/contrib/llvm-project/llvm/lib/MC/MachObjectWriter.cpp index 78d0d9cec556..038433cb24fa 100644 --- a/contrib/llvm-project/llvm/lib/MC/MachObjectWriter.cpp +++ b/contrib/llvm-project/llvm/lib/MC/MachObjectWriter.cpp @@ -753,32 +753,27 @@ static MachO::LoadCommandType getLCFromMCVM(MCVersionMinType Type) { llvm_unreachable("Invalid mc version min type"); } -// Encode addrsig data as symbol indexes in variable length encoding. -void MachObjectWriter::writeAddrsigSection(MCAssembler &Asm) { +void MachObjectWriter::populateAddrSigSection(MCAssembler &Asm) { MCSection *AddrSigSection = Asm.getContext().getObjectFileInfo()->getAddrSigSection(); - MCSection::FragmentListType &fragmentList = AddrSigSection->getFragmentList(); - if (!fragmentList.size()) - return; - - assert(fragmentList.size() == 1); - MCFragment *pFragment = &*fragmentList.begin(); - MCDataFragment *pDataFragment = dyn_cast_or_null<MCDataFragment>(pFragment); - assert(pDataFragment); - - raw_svector_ostream OS(pDataFragment->getContents()); - for (const MCSymbol *sym : this->getAddrsigSyms()) - encodeULEB128(sym->getIndex(), OS); + unsigned Log2Size = is64Bit() ? 3 : 2; + for (const MCSymbol *S : getAddrsigSyms()) { + MachO::any_relocation_info MRE; + MRE.r_word0 = 0; + MRE.r_word1 = (Log2Size << 25) | (MachO::GENERIC_RELOC_VANILLA << 28); + addRelocation(S, AddrSigSection, MRE); + } } uint64_t MachObjectWriter::writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) { uint64_t StartOffset = W.OS.tell(); + populateAddrSigSection(Asm); + // Compute symbol table information and bind symbol indices. computeSymbolTable(Asm, LocalSymbolData, ExternalSymbolData, UndefinedSymbolData); - writeAddrsigSection(Asm); if (!Asm.CGProfile.empty()) { MCSection *CGProfileSection = Asm.getContext().getMachOSection( diff --git a/contrib/llvm-project/llvm/lib/MC/WinCOFFObjectWriter.cpp b/contrib/llvm-project/llvm/lib/MC/WinCOFFObjectWriter.cpp index 33e496b7a864..809ac37c3442 100644 --- a/contrib/llvm-project/llvm/lib/MC/WinCOFFObjectWriter.cpp +++ b/contrib/llvm-project/llvm/lib/MC/WinCOFFObjectWriter.cpp @@ -169,6 +169,7 @@ public: Strings.clear(); SectionMap.clear(); SymbolMap.clear(); + WeakDefaults.clear(); MCObjectWriter::reset(); } diff --git a/contrib/llvm-project/llvm/lib/MC/XCOFFObjectWriter.cpp b/contrib/llvm-project/llvm/lib/MC/XCOFFObjectWriter.cpp index 977e77bf67fd..d46ae2247535 100644 --- a/contrib/llvm-project/llvm/lib/MC/XCOFFObjectWriter.cpp +++ b/contrib/llvm-project/llvm/lib/MC/XCOFFObjectWriter.cpp @@ -206,6 +206,7 @@ class XCOFFObjectWriter : public MCObjectWriter { uint16_t SectionCount = 0; uint64_t RelocationEntryOffset = 0; std::vector<std::pair<std::string, size_t>> FileNames; + bool HasVisibility = false; support::endian::Writer W; std::unique_ptr<MCXCOFFObjectTargetWriter> TargetObjectWriter; @@ -275,6 +276,7 @@ class XCOFFObjectWriter : public MCObjectWriter { void writeSymbolEntryForDwarfSection(const XCOFFSection &DwarfSectionRef, int16_t SectionIndex); void writeFileHeader(); + void writeAuxFileHeader(); void writeSectionHeaderTable(); void writeSections(const MCAssembler &Asm, const MCAsmLayout &Layout); void writeSectionForControlSectionEntry(const MCAssembler &Asm, @@ -308,14 +310,9 @@ class XCOFFObjectWriter : public MCObjectWriter { void assignAddressesAndIndices(const MCAsmLayout &); void finalizeSectionInfo(); - // TODO aux header support not implemented. - bool needsAuxiliaryHeader() const { return false; } - - // Returns the size of the auxiliary header to be written to the object file. size_t auxiliaryHeaderSize() const { - assert(!needsAuxiliaryHeader() && - "Auxiliary header support not implemented."); - return 0; + // 64-bit object files have no auxiliary header. + return HasVisibility && !is64Bit() ? XCOFF::AuxFileHeaderSizeShort : 0; } public: @@ -468,6 +465,9 @@ void XCOFFObjectWriter::executePostLayoutBinding(MCAssembler &Asm, const MCSymbolXCOFF *XSym = cast<MCSymbolXCOFF>(&S); const MCSectionXCOFF *ContainingCsect = getContainingCsect(XSym); + if (XSym->getVisibilityType() != XCOFF::SYM_V_UNSPECIFIED) + HasVisibility = true; + if (ContainingCsect->getCSectType() == XCOFF::XTY_ER) { // Handle undefined symbol. UndefinedCsects.emplace_back(ContainingCsect); @@ -648,6 +648,7 @@ uint64_t XCOFFObjectWriter::writeObject(MCAssembler &Asm, uint64_t StartOffset = W.OS.tell(); writeFileHeader(); + writeAuxFileHeader(); writeSectionHeaderTable(); writeSections(Asm, Layout); writeRelocations(); @@ -688,12 +689,6 @@ void XCOFFObjectWriter::writeSymbolEntry(StringRef SymbolName, uint64_t Value, W.write<uint32_t>(Value); } W.write<int16_t>(SectionNumber); - // Basic/Derived type. See the description of the n_type field for symbol - // table entries for a detailed description. Since we don't yet support - // visibility, and all other bits are either optionally set or reserved, this - // is always zero. - if (SymbolType != 0) - report_fatal_error("Emitting non-zero visibilities is not supported yet."); // TODO Set the function indicator (bit 10, 0x0020) for functions // when debugging is enabled. W.write<uint16_t>(SymbolType); @@ -773,18 +768,32 @@ void XCOFFObjectWriter::writeFileHeader() { W.write<int32_t>(0); // TimeStamp writeWord(SymbolTableOffset); if (is64Bit()) { - W.write<uint16_t>(0); // AuxHeaderSize. No optional header for an object - // file that is not to be loaded. + W.write<uint16_t>(auxiliaryHeaderSize()); W.write<uint16_t>(0); // Flags W.write<int32_t>(SymbolTableEntryCount); } else { W.write<int32_t>(SymbolTableEntryCount); - W.write<uint16_t>(0); // AuxHeaderSize. No optional header for an object - // file that is not to be loaded. + W.write<uint16_t>(auxiliaryHeaderSize()); W.write<uint16_t>(0); // Flags } } +void XCOFFObjectWriter::writeAuxFileHeader() { + if (!auxiliaryHeaderSize()) + return; + W.write<uint16_t>(0); // Magic + W.write<uint16_t>( + XCOFF::NEW_XCOFF_INTERPRET); // Version. The new interpretation of the + // n_type field in the symbol table entry is + // used in XCOFF32. + W.write<uint32_t>(Sections[0]->Size); // TextSize + W.write<uint32_t>(Sections[1]->Size); // InitDataSize + W.write<uint32_t>(Sections[2]->Size); // BssDataSize + W.write<uint32_t>(0); // EntryPointAddr + W.write<uint32_t>(Sections[0]->Address); // TextStartAddr + W.write<uint32_t>(Sections[1]->Address); // DataStartAddr +} + void XCOFFObjectWriter::writeSectionHeaderTable() { auto writeSectionHeader = [&](const SectionEntry *Sec, bool IsDwarf) { // Nothing to write for this Section. diff --git a/contrib/llvm-project/llvm/lib/ObjCopy/ELF/ELFObject.cpp b/contrib/llvm-project/llvm/lib/ObjCopy/ELF/ELFObject.cpp index f0e4f91cd347..8b44c09023f1 100644 --- a/contrib/llvm-project/llvm/lib/ObjCopy/ELF/ELFObject.cpp +++ b/contrib/llvm-project/llvm/lib/ObjCopy/ELF/ELFObject.cpp @@ -523,9 +523,6 @@ Error ELFSectionWriter<ELFT>::visit(const CompressedSection &Sec) { case DebugCompressionType::None: std::copy(Sec.OriginalData.begin(), Sec.OriginalData.end(), Buf); return Error::success(); - case DebugCompressionType::GNU: - llvm_unreachable("unexpected zlib-gnu"); - break; case DebugCompressionType::Z: Chdr.ch_type = ELF::ELFCOMPRESS_ZLIB; break; diff --git a/contrib/llvm-project/llvm/lib/Object/Archive.cpp b/contrib/llvm-project/llvm/lib/Object/Archive.cpp index ad03f9cae9f8..1dffe007b9a9 100644 --- a/contrib/llvm-project/llvm/lib/Object/Archive.cpp +++ b/contrib/llvm-project/llvm/lib/Object/Archive.cpp @@ -965,14 +965,15 @@ StringRef Archive::Symbol::getName() const { Expected<Archive::Child> Archive::Symbol::getMember() const { const char *Buf = Parent->getSymbolTable().begin(); const char *Offsets = Buf; - if (Parent->kind() == K_GNU64 || Parent->kind() == K_DARWIN64) + if (Parent->kind() == K_GNU64 || Parent->kind() == K_DARWIN64 || + Parent->kind() == K_AIXBIG) Offsets += sizeof(uint64_t); else Offsets += sizeof(uint32_t); uint64_t Offset = 0; if (Parent->kind() == K_GNU) { Offset = read32be(Offsets + SymbolIndex * 4); - } else if (Parent->kind() == K_GNU64) { + } else if (Parent->kind() == K_GNU64 || Parent->kind() == K_AIXBIG) { Offset = read64be(Offsets + SymbolIndex * 8); } else if (Parent->kind() == K_BSD) { // The SymbolIndex is an index into the ranlib structs that start at @@ -1105,6 +1106,8 @@ Archive::symbol_iterator Archive::symbol_begin() const { // Skip the byte count of the string table. buf += sizeof(uint64_t); buf += ran_strx; + } else if (kind() == K_AIXBIG) { + buf = getStringTable().begin(); } else { uint32_t member_count = 0; uint32_t symbol_count = 0; @@ -1127,7 +1130,7 @@ uint32_t Archive::getNumberOfSymbols() const { const char *buf = getSymbolTable().begin(); if (kind() == K_GNU) return read32be(buf); - if (kind() == K_GNU64) + if (kind() == K_GNU64 || kind() == K_AIXBIG) return read64be(buf); if (kind() == K_BSD) return read32le(buf) / 8; @@ -1180,6 +1183,58 @@ BigArchive::BigArchive(MemoryBufferRef Source, Error &Err) Err = malformedError("malformed AIX big archive: last member offset \"" + RawOffset + "\" is not a number"); + // Calculate the global symbol table. + uint64_t GlobSymOffset = 0; + RawOffset = getFieldRawString(ArFixLenHdr->GlobSymOffset); + if (RawOffset.getAsInteger(10, GlobSymOffset)) + // TODO: add test case. + Err = malformedError( + "malformed AIX big archive: global symbol table offset \"" + RawOffset + + "\" is not a number"); + + if (Err) + return; + + if (GlobSymOffset > 0) { + uint64_t BufferSize = Data.getBufferSize(); + uint64_t GlobalSymTblContentOffset = + GlobSymOffset + sizeof(BigArMemHdrType); + if (GlobalSymTblContentOffset > BufferSize) { + Err = malformedError("global symbol table header at offset 0x" + + Twine::utohexstr(GlobSymOffset) + " and size 0x" + + Twine::utohexstr(sizeof(BigArMemHdrType)) + + " goes past the end of file"); + return; + } + + const char *GlobSymTblLoc = Data.getBufferStart() + GlobSymOffset; + const BigArMemHdrType *GlobalSymHdr = + reinterpret_cast<const BigArMemHdrType *>(GlobSymTblLoc); + RawOffset = getFieldRawString(GlobalSymHdr->Size); + uint64_t Size; + if (RawOffset.getAsInteger(10, Size)) { + // TODO: add test case. + Err = malformedError( + "malformed AIX big archive: global symbol table size \"" + RawOffset + + "\" is not a number"); + return; + } + if (GlobalSymTblContentOffset + Size > BufferSize) { + Err = malformedError("global symbol table content at offset 0x" + + Twine::utohexstr(GlobalSymTblContentOffset) + + " and size 0x" + Twine::utohexstr(Size) + + " goes past the end of file"); + return; + } + SymbolTable = StringRef(GlobSymTblLoc + sizeof(BigArMemHdrType), Size); + unsigned SymNum = getNumberOfSymbols(); + unsigned SymOffsetsSize = 8 * (SymNum + 1); + uint64_t SymbolTableStringSize = Size - SymOffsetsSize; + StringTable = + StringRef(GlobSymTblLoc + sizeof(BigArMemHdrType) + SymOffsetsSize, + SymbolTableStringSize); + } + child_iterator I = child_begin(Err, false); if (Err) return; diff --git a/contrib/llvm-project/llvm/lib/Object/Decompressor.cpp b/contrib/llvm-project/llvm/lib/Object/Decompressor.cpp index a6a28a0589ac..3842ec92ccfc 100644 --- a/contrib/llvm-project/llvm/lib/Object/Decompressor.cpp +++ b/contrib/llvm-project/llvm/lib/Object/Decompressor.cpp @@ -23,9 +23,7 @@ Expected<Decompressor> Decompressor::create(StringRef Name, StringRef Data, return createError("zlib is not available"); Decompressor D(Data); - Error Err = isGnuStyle(Name) ? D.consumeCompressedGnuHeader() - : D.consumeCompressedZLibHeader(Is64Bit, IsLE); - if (Err) + if (Error Err = D.consumeCompressedZLibHeader(Is64Bit, IsLE)) return std::move(Err); return D; } @@ -33,21 +31,6 @@ Expected<Decompressor> Decompressor::create(StringRef Name, StringRef Data, Decompressor::Decompressor(StringRef Data) : SectionData(Data), DecompressedSize(0) {} -Error Decompressor::consumeCompressedGnuHeader() { - if (!SectionData.startswith("ZLIB")) - return createError("corrupted compressed section header"); - - SectionData = SectionData.substr(4); - - // Consume uncompressed section size (big-endian 8 bytes). - if (SectionData.size() < 8) - return createError("corrupted uncompressed section size"); - DecompressedSize = read64be(SectionData.data()); - SectionData = SectionData.substr(8); - - return Error::success(); -} - Error Decompressor::consumeCompressedZLibHeader(bool Is64Bit, bool IsLittleEndian) { using namespace ELF; @@ -72,26 +55,6 @@ Error Decompressor::consumeCompressedZLibHeader(bool Is64Bit, return Error::success(); } -bool Decompressor::isGnuStyle(StringRef Name) { - return Name.startswith(".zdebug"); -} - -bool Decompressor::isCompressed(const object::SectionRef &Section) { - if (Section.isCompressed()) - return true; - - Expected<StringRef> SecNameOrErr = Section.getName(); - if (SecNameOrErr) - return isGnuStyle(*SecNameOrErr); - - consumeError(SecNameOrErr.takeError()); - return false; -} - -bool Decompressor::isCompressedELFSection(uint64_t Flags, StringRef Name) { - return (Flags & ELF::SHF_COMPRESSED) || isGnuStyle(Name); -} - Error Decompressor::decompress(MutableArrayRef<uint8_t> Buffer) { size_t Size = Buffer.size(); return compression::zlib::uncompress(arrayRefFromStringRef(SectionData), diff --git a/contrib/llvm-project/llvm/lib/Object/WasmObjectFile.cpp b/contrib/llvm-project/llvm/lib/Object/WasmObjectFile.cpp index ce816b097691..d00359c6deef 100644 --- a/contrib/llvm-project/llvm/lib/Object/WasmObjectFile.cpp +++ b/contrib/llvm-project/llvm/lib/Object/WasmObjectFile.cpp @@ -204,7 +204,7 @@ static Error readInitExpr(wasm::WasmInitExpr &Expr, if (Expr.Extended) { Ctx.Ptr = Start; - while (1) { + while (true) { uint8_t Opcode = readOpcode(Ctx); switch (Opcode) { case wasm::WASM_OPCODE_I32_CONST: diff --git a/contrib/llvm-project/llvm/lib/Passes/PassBuilderPipelines.cpp b/contrib/llvm-project/llvm/lib/Passes/PassBuilderPipelines.cpp index 593243144f01..3b3eefcc29ca 100644 --- a/contrib/llvm-project/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/contrib/llvm-project/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -172,7 +172,7 @@ static cl::opt<bool> EnableEagerlyInvalidateAnalyses( cl::desc("Eagerly invalidate more analyses in default pipelines")); static cl::opt<bool> EnableNoRerunSimplificationPipeline( - "enable-no-rerun-simplification-pipeline", cl::init(false), cl::Hidden, + "enable-no-rerun-simplification-pipeline", cl::init(true), cl::Hidden, cl::desc( "Prevent running the simplification pipeline on a function more " "than once in the case that SCC mutations cause a function to be " diff --git a/contrib/llvm-project/llvm/lib/Passes/StandardInstrumentations.cpp b/contrib/llvm-project/llvm/lib/Passes/StandardInstrumentations.cpp index bad8184dffcf..baea0eb53ef9 100644 --- a/contrib/llvm-project/llvm/lib/Passes/StandardInstrumentations.cpp +++ b/contrib/llvm-project/llvm/lib/Passes/StandardInstrumentations.cpp @@ -81,36 +81,35 @@ cl::opt<bool> PreservedCFGCheckerInstrumentation::VerifyPreservedCFG( // facilities, the error message will be shown in place of the expected output. // enum class ChangePrinter { - NoChangePrinter, - PrintChangedVerbose, - PrintChangedQuiet, - PrintChangedDiffVerbose, - PrintChangedDiffQuiet, - PrintChangedColourDiffVerbose, - PrintChangedColourDiffQuiet, - PrintChangedDotCfgVerbose, - PrintChangedDotCfgQuiet + None, + Verbose, + Quiet, + DiffVerbose, + DiffQuiet, + ColourDiffVerbose, + ColourDiffQuiet, + DotCfgVerbose, + DotCfgQuiet, }; static cl::opt<ChangePrinter> PrintChanged( "print-changed", cl::desc("Print changed IRs"), cl::Hidden, - cl::ValueOptional, cl::init(ChangePrinter::NoChangePrinter), + cl::ValueOptional, cl::init(ChangePrinter::None), cl::values( - clEnumValN(ChangePrinter::PrintChangedQuiet, "quiet", - "Run in quiet mode"), - clEnumValN(ChangePrinter::PrintChangedDiffVerbose, "diff", + clEnumValN(ChangePrinter::Quiet, "quiet", "Run in quiet mode"), + clEnumValN(ChangePrinter::DiffVerbose, "diff", "Display patch-like changes"), - clEnumValN(ChangePrinter::PrintChangedDiffQuiet, "diff-quiet", + clEnumValN(ChangePrinter::DiffQuiet, "diff-quiet", "Display patch-like changes in quiet mode"), - clEnumValN(ChangePrinter::PrintChangedColourDiffVerbose, "cdiff", + clEnumValN(ChangePrinter::ColourDiffVerbose, "cdiff", "Display patch-like changes with color"), - clEnumValN(ChangePrinter::PrintChangedColourDiffQuiet, "cdiff-quiet", + clEnumValN(ChangePrinter::ColourDiffQuiet, "cdiff-quiet", "Display patch-like changes in quiet mode with color"), - clEnumValN(ChangePrinter::PrintChangedDotCfgVerbose, "dot-cfg", + clEnumValN(ChangePrinter::DotCfgVerbose, "dot-cfg", "Create a website with graphical changes"), - clEnumValN(ChangePrinter::PrintChangedDotCfgQuiet, "dot-cfg-quiet", + clEnumValN(ChangePrinter::DotCfgQuiet, "dot-cfg-quiet", "Create a website with graphical changes in quiet mode"), // Sentinel value for unspecified option. - clEnumValN(ChangePrinter::PrintChangedVerbose, "", ""))); + clEnumValN(ChangePrinter::Verbose, "", ""))); // An option that supports the -print-changed option. See // the description for -print-changed for an explanation of the use @@ -596,8 +595,8 @@ void TextChangeReporter<T>::handleIgnored(StringRef PassID, std::string &Name) { IRChangedPrinter::~IRChangedPrinter() = default; void IRChangedPrinter::registerCallbacks(PassInstrumentationCallbacks &PIC) { - if (PrintChanged == ChangePrinter::PrintChangedVerbose || - PrintChanged == ChangePrinter::PrintChangedQuiet) + if (PrintChanged == ChangePrinter::Verbose || + PrintChanged == ChangePrinter::Quiet) TextChangeReporter<std::string>::registerRequiredCallbacks(PIC); } @@ -940,7 +939,22 @@ void PrintPassInstrumentation::registerCallbacks( if (isSpecialPass(PassID, SpecialPasses)) return; - print() << "Running pass: " << PassID << " on " << getIRName(IR) << "\n"; + auto &OS = print(); + OS << "Running pass: " << PassID << " on " << getIRName(IR); + if (any_isa<const Function *>(IR)) { + unsigned Count = any_cast<const Function *>(IR)->getInstructionCount(); + OS << " (" << Count << " instruction"; + if (Count != 1) + OS << 's'; + OS << ')'; + } else if (any_isa<const LazyCallGraph::SCC *>(IR)) { + int Count = any_cast<const LazyCallGraph::SCC *>(IR)->size(); + OS << " (" << Count << " node"; + if (Count != 1) + OS << 's'; + OS << ')'; + } + OS << "\n"; Indent += 2; }); PIC.registerAfterPassCallback( @@ -1260,10 +1274,10 @@ void InLineChangePrinter::handleFunctionCompare( } void InLineChangePrinter::registerCallbacks(PassInstrumentationCallbacks &PIC) { - if (PrintChanged == ChangePrinter::PrintChangedDiffVerbose || - PrintChanged == ChangePrinter::PrintChangedDiffQuiet || - PrintChanged == ChangePrinter::PrintChangedColourDiffVerbose || - PrintChanged == ChangePrinter::PrintChangedColourDiffQuiet) + if (PrintChanged == ChangePrinter::DiffVerbose || + PrintChanged == ChangePrinter::DiffQuiet || + PrintChanged == ChangePrinter::ColourDiffVerbose || + PrintChanged == ChangePrinter::ColourDiffQuiet) TextChangeReporter<IRDataT<EmptyData>>::registerRequiredCallbacks(PIC); } @@ -2096,8 +2110,8 @@ DotCfgChangeReporter::~DotCfgChangeReporter() { void DotCfgChangeReporter::registerCallbacks( PassInstrumentationCallbacks &PIC) { - if ((PrintChanged == ChangePrinter::PrintChangedDotCfgVerbose || - PrintChanged == ChangePrinter::PrintChangedDotCfgQuiet)) { + if (PrintChanged == ChangePrinter::DotCfgVerbose || + PrintChanged == ChangePrinter::DotCfgQuiet) { SmallString<128> OutputDir; sys::fs::expand_tilde(DotCfgDir, OutputDir); sys::fs::make_absolute(OutputDir); @@ -2114,14 +2128,12 @@ void DotCfgChangeReporter::registerCallbacks( StandardInstrumentations::StandardInstrumentations( bool DebugLogging, bool VerifyEach, PrintPassOptions PrintPassOpts) : PrintPass(DebugLogging, PrintPassOpts), OptNone(DebugLogging), - PrintChangedIR(PrintChanged == ChangePrinter::PrintChangedVerbose), - PrintChangedDiff( - PrintChanged == ChangePrinter::PrintChangedDiffVerbose || - PrintChanged == ChangePrinter::PrintChangedColourDiffVerbose, - PrintChanged == ChangePrinter::PrintChangedColourDiffVerbose || - PrintChanged == ChangePrinter::PrintChangedColourDiffQuiet), - WebsiteChangeReporter(PrintChanged == - ChangePrinter::PrintChangedDotCfgVerbose), + PrintChangedIR(PrintChanged == ChangePrinter::Verbose), + PrintChangedDiff(PrintChanged == ChangePrinter::DiffVerbose || + PrintChanged == ChangePrinter::ColourDiffVerbose, + PrintChanged == ChangePrinter::ColourDiffVerbose || + PrintChanged == ChangePrinter::ColourDiffQuiet), + WebsiteChangeReporter(PrintChanged == ChangePrinter::DotCfgVerbose), Verify(DebugLogging), VerifyEach(VerifyEach) {} PrintCrashIRInstrumentation *PrintCrashIRInstrumentation::CrashReporter = diff --git a/contrib/llvm-project/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp b/contrib/llvm-project/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp index f4f13bafb233..8c1eadbe8271 100644 --- a/contrib/llvm-project/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp +++ b/contrib/llvm-project/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp @@ -349,7 +349,7 @@ CoverageMapping::load(ArrayRef<StringRef> ObjectFilenames, StringRef CompilationDir) { auto ProfileReaderOrErr = IndexedInstrProfReader::create(ProfileFilename); if (Error E = ProfileReaderOrErr.takeError()) - return std::move(E); + return createFileError(ProfileFilename, std::move(E)); auto ProfileReader = std::move(ProfileReaderOrErr.get()); auto Coverage = std::unique_ptr<CoverageMapping>(new CoverageMapping()); bool DataFound = false; @@ -358,7 +358,7 @@ CoverageMapping::load(ArrayRef<StringRef> ObjectFilenames, auto CovMappingBufOrErr = MemoryBuffer::getFileOrSTDIN( File.value(), /*IsText=*/false, /*RequiresNullTerminator=*/false); if (std::error_code EC = CovMappingBufOrErr.getError()) - return errorCodeToError(EC); + return createFileError(File.value(), errorCodeToError(EC)); StringRef Arch = Arches.empty() ? StringRef() : Arches[File.index()]; MemoryBufferRef CovMappingBufRef = CovMappingBufOrErr.get()->getMemBufferRef(); @@ -368,7 +368,7 @@ CoverageMapping::load(ArrayRef<StringRef> ObjectFilenames, if (Error E = CoverageReadersOrErr.takeError()) { E = handleMaybeNoDataFoundError(std::move(E)); if (E) - return std::move(E); + return createFileError(File.value(), std::move(E)); // E == success (originally a no_data_found error). continue; } @@ -378,12 +378,14 @@ CoverageMapping::load(ArrayRef<StringRef> ObjectFilenames, Readers.push_back(std::move(Reader)); DataFound |= !Readers.empty(); if (Error E = loadFromReaders(Readers, *ProfileReader, *Coverage)) - return std::move(E); + return createFileError(File.value(), std::move(E)); } // If no readers were created, either no objects were provided or none of them // had coverage data. Return an error in the latter case. if (!DataFound && !ObjectFilenames.empty()) - return make_error<CoverageMapError>(coveragemap_error::no_data_found); + return createFileError( + join(ObjectFilenames.begin(), ObjectFilenames.end(), ", "), + make_error<CoverageMapError>(coveragemap_error::no_data_found)); return std::move(Coverage); } diff --git a/contrib/llvm-project/llvm/lib/ProfileData/InstrProfReader.cpp b/contrib/llvm-project/llvm/lib/ProfileData/InstrProfReader.cpp index ee8989979a26..23804ce604c4 100644 --- a/contrib/llvm-project/llvm/lib/ProfileData/InstrProfReader.cpp +++ b/contrib/llvm-project/llvm/lib/ProfileData/InstrProfReader.cpp @@ -1026,20 +1026,50 @@ InstrProfSymtab &IndexedInstrProfReader::getSymtab() { return *Symtab; } -Expected<InstrProfRecord> -IndexedInstrProfReader::getInstrProfRecord(StringRef FuncName, - uint64_t FuncHash) { +Expected<InstrProfRecord> IndexedInstrProfReader::getInstrProfRecord( + StringRef FuncName, uint64_t FuncHash, uint64_t *MismatchedFuncSum) { ArrayRef<NamedInstrProfRecord> Data; + uint64_t FuncSum = 0; Error Err = Remapper->getRecords(FuncName, Data); if (Err) return std::move(Err); // Found it. Look for counters with the right hash. + + // A flag to indicate if the records are from the same type + // of profile (i.e cs vs nocs). + bool CSBitMatch = false; + auto getFuncSum = [](const std::vector<uint64_t> &Counts) { + uint64_t ValueSum = 0; + for (unsigned I = 0, S = Counts.size(); I < S; I++) { + uint64_t CountValue = Counts[I]; + if (CountValue == (uint64_t)-1) + continue; + // Handle overflow -- if that happens, return max. + if (std::numeric_limits<uint64_t>::max() - CountValue <= ValueSum) + return std::numeric_limits<uint64_t>::max(); + ValueSum += CountValue; + } + return ValueSum; + }; + for (const NamedInstrProfRecord &I : Data) { // Check for a match and fill the vector if there is one. if (I.Hash == FuncHash) return std::move(I); + if (NamedInstrProfRecord::hasCSFlagInHash(I.Hash) == + NamedInstrProfRecord::hasCSFlagInHash(FuncHash)) { + CSBitMatch = true; + if (MismatchedFuncSum == nullptr) + continue; + FuncSum = std::max(FuncSum, getFuncSum(I.Counts)); + } + } + if (CSBitMatch) { + if (MismatchedFuncSum != nullptr) + *MismatchedFuncSum = FuncSum; + return error(instrprof_error::hash_mismatch); } - return error(instrprof_error::hash_mismatch); + return error(instrprof_error::unknown_function); } Expected<memprof::MemProfRecord> diff --git a/contrib/llvm-project/llvm/lib/Support/AddressRanges.cpp b/contrib/llvm-project/llvm/lib/Support/AddressRanges.cpp index 5ba011bac4e9..187d5be00dae 100644 --- a/contrib/llvm-project/llvm/lib/Support/AddressRanges.cpp +++ b/contrib/llvm-project/llvm/lib/Support/AddressRanges.cpp @@ -12,48 +12,59 @@ using namespace llvm; -void AddressRanges::insert(AddressRange Range) { +AddressRanges::Collection::const_iterator +AddressRanges::insert(AddressRange Range) { if (Range.size() == 0) - return; + return Ranges.end(); auto It = llvm::upper_bound(Ranges, Range); auto It2 = It; - while (It2 != Ranges.end() && It2->start() < Range.end()) + while (It2 != Ranges.end() && It2->start() <= Range.end()) ++It2; if (It != It2) { - Range = {Range.start(), std::max(Range.end(), It2[-1].end())}; + Range = {Range.start(), std::max(Range.end(), std::prev(It2)->end())}; It = Ranges.erase(It, It2); } - if (It != Ranges.begin() && Range.start() < It[-1].end()) - It[-1] = {It[-1].start(), std::max(It[-1].end(), Range.end())}; - else - Ranges.insert(It, Range); + if (It != Ranges.begin() && Range.start() <= std::prev(It)->end()) { + --It; + *It = {It->start(), std::max(It->end(), Range.end())}; + return It; + } + + return Ranges.insert(It, Range); } -bool AddressRanges::contains(uint64_t Addr) const { +AddressRanges::Collection::const_iterator +AddressRanges::find(uint64_t Addr) const { auto It = std::partition_point( Ranges.begin(), Ranges.end(), [=](const AddressRange &R) { return R.start() <= Addr; }); - return It != Ranges.begin() && Addr < It[-1].end(); + + if (It == Ranges.begin()) + return Ranges.end(); + + --It; + if (Addr >= It->end()) + return Ranges.end(); + + return It; } -bool AddressRanges::contains(AddressRange Range) const { +AddressRanges::Collection::const_iterator +AddressRanges::find(AddressRange Range) const { if (Range.size() == 0) - return false; + return Ranges.end(); + auto It = std::partition_point( Ranges.begin(), Ranges.end(), [=](const AddressRange &R) { return R.start() <= Range.start(); }); + if (It == Ranges.begin()) - return false; - return Range.end() <= It[-1].end(); -} + return Ranges.end(); -Optional<AddressRange> -AddressRanges::getRangeThatContains(uint64_t Addr) const { - auto It = std::partition_point( - Ranges.begin(), Ranges.end(), - [=](const AddressRange &R) { return R.start() <= Addr; }); - if (It != Ranges.begin() && Addr < It[-1].end()) - return It[-1]; - return llvm::None; + --It; + if (Range.end() > It->end()) + return Ranges.end(); + + return It; } diff --git a/contrib/llvm-project/llvm/lib/Support/CommandLine.cpp b/contrib/llvm-project/llvm/lib/Support/CommandLine.cpp index e3df172ef113..5e7d63165130 100644 --- a/contrib/llvm-project/llvm/lib/Support/CommandLine.cpp +++ b/contrib/llvm-project/llvm/lib/Support/CommandLine.cpp @@ -2382,7 +2382,7 @@ protected: for (size_t I = 0, E = Opts.size(); I != E; ++I) { Option *Opt = Opts[I].second; for (auto &Cat : Opt->Categories) { - assert(find(SortedCategories, Cat) != SortedCategories.end() && + assert(llvm::is_contained(SortedCategories, Cat) && "Option has an unregistered category"); CategorizedOptions[Cat].push_back(Opt); } diff --git a/contrib/llvm-project/llvm/lib/Support/Compression.cpp b/contrib/llvm-project/llvm/lib/Support/Compression.cpp index 21191972fb8b..e8fb715aa770 100644 --- a/contrib/llvm-project/llvm/lib/Support/Compression.cpp +++ b/contrib/llvm-project/llvm/lib/Support/Compression.cpp @@ -20,6 +20,9 @@ #if LLVM_ENABLE_ZLIB #include <zlib.h> #endif +#if LLVM_ENABLE_ZSTD +#include <zstd.h> +#endif using namespace llvm; using namespace llvm::compression; @@ -100,3 +103,65 @@ Error zlib::uncompress(ArrayRef<uint8_t> Input, llvm_unreachable("zlib::uncompress is unavailable"); } #endif + +#if LLVM_ENABLE_ZSTD + +bool zstd::isAvailable() { return true; } + +void zstd::compress(ArrayRef<uint8_t> Input, + SmallVectorImpl<uint8_t> &CompressedBuffer, int Level) { + unsigned long CompressedBufferSize = ::ZSTD_compressBound(Input.size()); + CompressedBuffer.resize_for_overwrite(CompressedBufferSize); + unsigned long CompressedSize = + ::ZSTD_compress((char *)CompressedBuffer.data(), CompressedBufferSize, + (const char *)Input.data(), Input.size(), Level); + if (ZSTD_isError(CompressedSize)) + report_bad_alloc_error("Allocation failed"); + // Tell MemorySanitizer that zstd output buffer is fully initialized. + // This avoids a false report when running LLVM with uninstrumented ZLib. + __msan_unpoison(CompressedBuffer.data(), CompressedSize); + if (CompressedSize < CompressedBuffer.size()) + CompressedBuffer.truncate(CompressedSize); +} + +Error zstd::uncompress(ArrayRef<uint8_t> Input, uint8_t *UncompressedBuffer, + size_t &UncompressedSize) { + const size_t Res = + ::ZSTD_decompress(UncompressedBuffer, UncompressedSize, + (const uint8_t *)Input.data(), Input.size()); + UncompressedSize = Res; + // Tell MemorySanitizer that zstd output buffer is fully initialized. + // This avoids a false report when running LLVM with uninstrumented ZLib. + __msan_unpoison(UncompressedBuffer, UncompressedSize); + return ZSTD_isError(Res) ? make_error<StringError>(ZSTD_getErrorName(Res), + inconvertibleErrorCode()) + : Error::success(); +} + +Error zstd::uncompress(ArrayRef<uint8_t> Input, + SmallVectorImpl<uint8_t> &UncompressedBuffer, + size_t UncompressedSize) { + UncompressedBuffer.resize_for_overwrite(UncompressedSize); + Error E = + zstd::uncompress(Input, UncompressedBuffer.data(), UncompressedSize); + if (UncompressedSize < UncompressedBuffer.size()) + UncompressedBuffer.truncate(UncompressedSize); + return E; +} + +#else +bool zstd::isAvailable() { return false; } +void zstd::compress(ArrayRef<uint8_t> Input, + SmallVectorImpl<uint8_t> &CompressedBuffer, int Level) { + llvm_unreachable("zstd::compress is unavailable"); +} +Error zstd::uncompress(ArrayRef<uint8_t> Input, uint8_t *UncompressedBuffer, + size_t &UncompressedSize) { + llvm_unreachable("zstd::uncompress is unavailable"); +} +Error zstd::uncompress(ArrayRef<uint8_t> Input, + SmallVectorImpl<uint8_t> &UncompressedBuffer, + size_t UncompressedSize) { + llvm_unreachable("zstd::uncompress is unavailable"); +} +#endif diff --git a/contrib/llvm-project/llvm/lib/Support/DivisionByConstantInfo.cpp b/contrib/llvm-project/llvm/lib/Support/DivisionByConstantInfo.cpp index 69f39386798c..35486674e02f 100644 --- a/contrib/llvm-project/llvm/lib/Support/DivisionByConstantInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Support/DivisionByConstantInfo.cpp @@ -1,4 +1,4 @@ -//===----- DivisonByConstantInfo.cpp - division by constant -*- C++ -*-----===// +//===----- DivisionByConstantInfo.cpp - division by constant -*- C++ -*----===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -62,11 +62,11 @@ SignedDivisionByConstantInfo SignedDivisionByConstantInfo::get(const APInt &D) { /// S. Warren, Jr., chapter 10. /// LeadingZeros can be used to simplify the calculation if the upper bits /// of the divided value are known zero. -UnsignedDivisonByConstantInfo -UnsignedDivisonByConstantInfo::get(const APInt &D, unsigned LeadingZeros) { +UnsignedDivisionByConstantInfo +UnsignedDivisionByConstantInfo::get(const APInt &D, unsigned LeadingZeros) { unsigned P; APInt NC, Delta, Q1, R1, Q2, R2; - struct UnsignedDivisonByConstantInfo Retval; + struct UnsignedDivisionByConstantInfo Retval; Retval.IsAdd = false; // initialize "add" indicator APInt AllOnes = APInt::getAllOnes(D.getBitWidth()).lshr(LeadingZeros); APInt SignedMin = APInt::getSignedMinValue(D.getBitWidth()); diff --git a/contrib/llvm-project/llvm/lib/Support/RISCVISAInfo.cpp b/contrib/llvm-project/llvm/lib/Support/RISCVISAInfo.cpp index 7fe04af4696b..0fe286d239d4 100644 --- a/contrib/llvm-project/llvm/lib/Support/RISCVISAInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Support/RISCVISAInfo.cpp @@ -77,6 +77,8 @@ static const RISCVSupportedExtension SupportedExtensions[] = { {"zkt", RISCVExtensionVersion{1, 0}}, {"zk", RISCVExtensionVersion{1, 0}}, + {"zmmul", RISCVExtensionVersion{1, 0}}, + {"v", RISCVExtensionVersion{1, 0}}, {"zvl32b", RISCVExtensionVersion{1, 0}}, {"zvl64b", RISCVExtensionVersion{1, 0}}, diff --git a/contrib/llvm-project/llvm/lib/TableGen/JSONBackend.cpp b/contrib/llvm-project/llvm/lib/TableGen/JSONBackend.cpp index e38903910275..6dc466e29df3 100644 --- a/contrib/llvm-project/llvm/lib/TableGen/JSONBackend.cpp +++ b/contrib/llvm-project/llvm/lib/TableGen/JSONBackend.cpp @@ -129,13 +129,13 @@ void JSONEmitter::run(raw_ostream &OS) { // construct the array for each one. std::map<std::string, json::Array> instance_lists; for (const auto &C : Records.getClasses()) { - auto &Name = C.second->getNameInitAsString(); + const auto Name = C.second->getNameInitAsString(); (void)instance_lists[Name]; } // Main iteration over the defs. for (const auto &D : Records.getDefs()) { - auto &Name = D.second->getNameInitAsString(); + const auto Name = D.second->getNameInitAsString(); auto &Def = *D.second; json::Object obj; diff --git a/contrib/llvm-project/llvm/lib/TableGen/Record.cpp b/contrib/llvm-project/llvm/lib/TableGen/Record.cpp index 75a99e95541a..6b899a049e6b 100644 --- a/contrib/llvm-project/llvm/lib/TableGen/Record.cpp +++ b/contrib/llvm-project/llvm/lib/TableGen/Record.cpp @@ -2424,6 +2424,14 @@ void RecordVal::print(raw_ostream &OS, bool PrintSem) const { if (PrintSem) OS << ";\n"; } +void Record::updateClassLoc(SMLoc Loc) { + assert(Locs.size() == 1); + ForwardDeclarationLocs.push_back(Locs.front()); + + Locs.clear(); + Locs.push_back(Loc); +} + void Record::checkName() { // Ensure the record name has string type. const TypedInit *TypedName = cast<const TypedInit>(Name); diff --git a/contrib/llvm-project/llvm/lib/TableGen/TGParser.cpp b/contrib/llvm-project/llvm/lib/TableGen/TGParser.cpp index acf93dc3d792..aab1802c5348 100644 --- a/contrib/llvm-project/llvm/lib/TableGen/TGParser.cpp +++ b/contrib/llvm-project/llvm/lib/TableGen/TGParser.cpp @@ -3391,6 +3391,8 @@ bool TGParser::ParseClass() { !CurRec->getTemplateArgs().empty()) return TokError("Class '" + CurRec->getNameInitAsString() + "' already defined"); + + CurRec->updateClassLoc(Lex.getLoc()); } else { // If this is the first reference to this class, create and add it. auto NewRec = diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.td index b332e9dcb176..8fb5d49e2121 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.td +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.td @@ -216,7 +216,7 @@ def FeatureSlowPaired128 : SubtargetFeature<"slow-paired-128", "IsPaired128Slow", "true", "Paired 128 bit loads and stores are slow">; def FeatureAscendStoreAddress : SubtargetFeature<"ascend-store-address", - "IsStoreAddressAscend", "false", + "IsStoreAddressAscend", "true", "Schedule vector stores by ascending address">; def FeatureSlowSTRQro : SubtargetFeature<"slow-strqro-store", "IsSTRQroSlow", diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 82fe5772c99d..00621b84d2f2 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -69,6 +69,7 @@ public: bool tryMLAV64LaneV128(SDNode *N); bool tryMULLV64LaneV128(unsigned IntNo, SDNode *N); bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift); + bool SelectArithUXTXRegister(SDValue N, SDValue &Reg, SDValue &Shift); bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift); bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift); bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) { @@ -893,6 +894,30 @@ bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg, return isWorthFolding(N); } +/// SelectArithUXTXRegister - Select a "UXTX register" operand. This +/// operand is refered by the instructions have SP operand +bool AArch64DAGToDAGISel::SelectArithUXTXRegister(SDValue N, SDValue &Reg, + SDValue &Shift) { + unsigned ShiftVal = 0; + AArch64_AM::ShiftExtendType Ext; + + if (N.getOpcode() != ISD::SHL) + return false; + + ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1)); + if (!CSD) + return false; + ShiftVal = CSD->getZExtValue(); + if (ShiftVal > 4) + return false; + + Ext = AArch64_AM::UXTX; + Reg = N.getOperand(0); + Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N), + MVT::i32); + return isWorthFolding(N); +} + /// If there's a use of this ADDlow that's not itself a load/store then we'll /// need to create a real ADD instruction from it anyway and there's no point in /// folding it into the mem op. Theoretically, it shouldn't matter, but there's @@ -4049,6 +4074,24 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { } break; } + case Intrinsic::swift_async_context_addr: { + SDLoc DL(Node); + SDValue Chain = Node->getOperand(0); + SDValue CopyFP = CurDAG->getCopyFromReg(Chain, DL, AArch64::FP, MVT::i64); + SDValue Res = SDValue( + CurDAG->getMachineNode(AArch64::SUBXri, DL, MVT::i64, CopyFP, + CurDAG->getTargetConstant(8, DL, MVT::i32), + CurDAG->getTargetConstant(0, DL, MVT::i32)), + 0); + ReplaceUses(SDValue(Node, 0), Res); + ReplaceUses(SDValue(Node, 1), CopyFP.getValue(1)); + CurDAG->RemoveDeadNode(Node); + + auto &MF = CurDAG->getMachineFunction(); + MF.getFrameInfo().setFrameAddressIsTaken(true); + MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true); + return; + } } } break; case ISD::INTRINSIC_WO_CHAIN: { @@ -4094,18 +4137,6 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { if (tryMULLV64LaneV128(IntNo, Node)) return; break; - case Intrinsic::swift_async_context_addr: { - SDLoc DL(Node); - CurDAG->SelectNodeTo(Node, AArch64::SUBXri, MVT::i64, - CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, - AArch64::FP, MVT::i64), - CurDAG->getTargetConstant(8, DL, MVT::i32), - CurDAG->getTargetConstant(0, DL, MVT::i32)); - auto &MF = CurDAG->getMachineFunction(); - MF.getFrameInfo().setFrameAddressIsTaken(true); - MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true); - return; - } } break; } diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 447ad10ddf22..e070ce2efa6b 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -521,6 +521,9 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::CTPOP, MVT::i64, Custom); setOperationAction(ISD::CTPOP, MVT::i128, Custom); + setOperationAction(ISD::PARITY, MVT::i64, Custom); + setOperationAction(ISD::PARITY, MVT::i128, Custom); + setOperationAction(ISD::ABS, MVT::i32, Custom); setOperationAction(ISD::ABS, MVT::i64, Custom); @@ -5463,7 +5466,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op, case ISD::SRA_PARTS: return LowerShiftParts(Op, DAG); case ISD::CTPOP: - return LowerCTPOP(Op, DAG); + case ISD::PARITY: + return LowerCTPOP_PARITY(Op, DAG); case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); case ISD::OR: @@ -7783,7 +7787,8 @@ SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op, return BitCast(VT, BSP, DAG); } -SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const { +SDValue AArch64TargetLowering::LowerCTPOP_PARITY(SDValue Op, + SelectionDAG &DAG) const { if (DAG.getMachineFunction().getFunction().hasFnAttribute( Attribute::NoImplicitFloat)) return SDValue(); @@ -7791,6 +7796,8 @@ SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const { if (!Subtarget->hasNEON()) return SDValue(); + bool IsParity = Op.getOpcode() == ISD::PARITY; + // While there is no integer popcount instruction, it can // be more efficiently lowered to the following sequence that uses // AdvSIMD registers/instructions as long as the copies to/from @@ -7813,6 +7820,10 @@ SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const { ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32, DAG.getConstant(Intrinsic::aarch64_neon_uaddlv, DL, MVT::i32), CtPop); + if (IsParity) + UaddLV = DAG.getNode(ISD::AND, DL, MVT::i32, UaddLV, + DAG.getConstant(1, DL, MVT::i32)); + if (VT == MVT::i64) UaddLV = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, UaddLV); return UaddLV; @@ -7824,9 +7835,15 @@ SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const { ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32, DAG.getConstant(Intrinsic::aarch64_neon_uaddlv, DL, MVT::i32), CtPop); + if (IsParity) + UaddLV = DAG.getNode(ISD::AND, DL, MVT::i32, UaddLV, + DAG.getConstant(1, DL, MVT::i32)); + return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, UaddLV); } + assert(!IsParity && "ISD::PARITY of vector types not supported"); + if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT)) return LowerToPredicatedOp(Op, DAG, AArch64ISD::CTPOP_MERGE_PASSTHRU); @@ -11811,6 +11828,12 @@ bool AArch64TargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const { isConcatMask(M, VT, VT.getSizeInBits() == 128)); } +bool AArch64TargetLowering::isVectorClearMaskLegal(ArrayRef<int> M, + EVT VT) const { + // Just delegate to the generic legality, clear masks aren't special. + return isShuffleMaskLegal(M, VT); +} + /// getVShiftImm - Check if this is a valid build_vector for the immediate /// operand of a vector shift operation, where all the elements of the /// build_vector must have the same constant integer value. @@ -11969,6 +11992,11 @@ static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS, if (IsZero) return DAG.getNode(AArch64ISD::FCMGTz, dl, VT, LHS); return DAG.getNode(AArch64ISD::FCMGT, dl, VT, LHS, RHS); + case AArch64CC::LE: + if (!NoNans) + return SDValue(); + // If we ignore NaNs then we can use to the LS implementation. + LLVM_FALLTHROUGH; case AArch64CC::LS: if (IsZero) return DAG.getNode(AArch64ISD::FCMLEz, dl, VT, LHS); @@ -12073,7 +12101,7 @@ SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op, bool ShouldInvert; changeVectorFPCCToAArch64CC(CC, CC1, CC2, ShouldInvert); - bool NoNaNs = getTargetMachine().Options.NoNaNsFPMath; + bool NoNaNs = getTargetMachine().Options.NoNaNsFPMath || Op->getFlags().hasNoNaNs(); SDValue Cmp = EmitVectorComparison(LHS, RHS, CC1, NoNaNs, CmpVT, dl, DAG); if (!Cmp.getNode()) @@ -13587,21 +13615,50 @@ AArch64TargetLowering::getScratchRegisters(CallingConv::ID) const { bool AArch64TargetLowering::isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const { - N = N->getOperand(0).getNode(); + assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA || + N->getOpcode() == ISD::SRL) && + "Expected shift op"); + + SDValue ShiftLHS = N->getOperand(0); EVT VT = N->getValueType(0); - // If N is unsigned bit extraction: ((x >> C) & mask), then do not combine - // it with shift to let it be lowered to UBFX. - if (N->getOpcode() == ISD::AND && (VT == MVT::i32 || VT == MVT::i64) && - isa<ConstantSDNode>(N->getOperand(1))) { - uint64_t TruncMask = N->getConstantOperandVal(1); + + // If ShiftLHS is unsigned bit extraction: ((x >> C) & mask), then do not combine + // it with shift 'N' to let it be lowered to UBFX. + if (ShiftLHS.getOpcode() == ISD::AND && (VT == MVT::i32 || VT == MVT::i64) && + isa<ConstantSDNode>(ShiftLHS.getOperand(1))) { + uint64_t TruncMask = ShiftLHS.getConstantOperandVal(1); if (isMask_64(TruncMask) && - N->getOperand(0).getOpcode() == ISD::SRL && - isa<ConstantSDNode>(N->getOperand(0)->getOperand(1))) + ShiftLHS.getOperand(0).getOpcode() == ISD::SRL && + isa<ConstantSDNode>(ShiftLHS.getOperand(0).getOperand(1))) return false; } return true; } +bool AArch64TargetLowering::isDesirableToCommuteXorWithShift( + const SDNode *N) const { + assert(N->getOpcode() == ISD::XOR && + (N->getOperand(0).getOpcode() == ISD::SHL || + N->getOperand(0).getOpcode() == ISD::SRL) && + "Expected XOR(SHIFT) pattern"); + + // Only commute if the entire NOT mask is a hidden shifted mask. + auto *XorC = dyn_cast<ConstantSDNode>(N->getOperand(1)); + auto *ShiftC = dyn_cast<ConstantSDNode>(N->getOperand(0).getOperand(1)); + if (XorC && ShiftC) { + unsigned MaskIdx, MaskLen; + if (XorC->getAPIntValue().isShiftedMask(MaskIdx, MaskLen)) { + unsigned ShiftAmt = ShiftC->getZExtValue(); + unsigned BitWidth = N->getValueType(0).getScalarSizeInBits(); + if (N->getOperand(0).getOpcode() == ISD::SHL) + return MaskIdx == ShiftAmt && MaskLen == (BitWidth - ShiftAmt); + return MaskIdx == 0 && MaskLen == (BitWidth - ShiftAmt); + } + } + + return false; +} + bool AArch64TargetLowering::shouldFoldConstantShiftPairToMask( const SDNode *N, CombineLevel Level) const { assert(((N->getOpcode() == ISD::SHL && @@ -19221,6 +19278,41 @@ static SDValue performBSPExpandForSVE(SDNode *N, SelectionDAG &DAG, return DAG.getNode(ISD::OR, DL, VT, Sel, SelInv); } +static SDValue performDupLane128Combine(SDNode *N, SelectionDAG &DAG) { + EVT VT = N->getValueType(0); + + SDValue Insert = N->getOperand(0); + if (Insert.getOpcode() != ISD::INSERT_SUBVECTOR) + return SDValue(); + + if (!Insert.getOperand(0).isUndef()) + return SDValue(); + + uint64_t IdxInsert = Insert.getConstantOperandVal(2); + uint64_t IdxDupLane = N->getConstantOperandVal(1); + if (IdxInsert != IdxDupLane) + return SDValue(); + + SDValue Bitcast = Insert.getOperand(1); + if (Bitcast.getOpcode() != ISD::BITCAST) + return SDValue(); + + SDValue Subvec = Bitcast.getOperand(0); + EVT SubvecVT = Subvec.getValueType(); + if (!SubvecVT.is128BitVector()) + return SDValue(); + EVT NewSubvecVT = + getPackedSVEVectorVT(Subvec.getValueType().getVectorElementType()); + + SDLoc DL(N); + SDValue NewInsert = + DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewSubvecVT, + DAG.getUNDEF(NewSubvecVT), Subvec, Insert->getOperand(2)); + SDValue NewDuplane128 = DAG.getNode(AArch64ISD::DUPLANE128, DL, NewSubvecVT, + NewInsert, N->getOperand(1)); + return DAG.getNode(ISD::BITCAST, DL, VT, NewDuplane128); +} + SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -19307,6 +19399,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, return performCSELCombine(N, DCI, DAG); case AArch64ISD::DUP: return performDUPCombine(N, DCI); + case AArch64ISD::DUPLANE128: + return performDupLane128Combine(N, DAG); case AArch64ISD::NVCAST: return performNVCASTCombine(N); case AArch64ISD::SPLICE: @@ -19981,7 +20075,8 @@ void AArch64TargetLowering::ReplaceNodeResults( return; case ISD::CTPOP: - if (SDValue Result = LowerCTPOP(SDValue(N, 0), DAG)) + case ISD::PARITY: + if (SDValue Result = LowerCTPOP_PARITY(SDValue(N, 0), DAG)) Results.push_back(Result); return; case AArch64ISD::SADDV: diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.h index e02b5e56fd2e..1ba2e2f315ec 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -549,6 +549,10 @@ public: /// should be stack expanded. bool isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const override; + /// Similar to isShuffleMaskLegal. Return true is the given 'select with zero' + /// shuffle mask can be codegen'd directly. + bool isVectorClearMaskLegal(ArrayRef<int> M, EVT VT) const override; + /// Return the ISD::SETCC ValueType. EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override; @@ -653,6 +657,9 @@ public: bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const override; + /// Returns false if N is a bit extraction pattern of (X >> C) & Mask. + bool isDesirableToCommuteXorWithShift(const SDNode *N) const override; + /// Return true if it is profitable to fold a pair of shifts into a mask. bool shouldFoldConstantShiftPairToMask(const SDNode *N, CombineLevel Level) const override; @@ -995,7 +1002,7 @@ private: SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const; SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerCTPOP_PARITY(SDValue Op, SelectionDAG &DAG) const; SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBitreverse(SDValue Op, SelectionDAG &DAG) const; SDValue LowerMinMax(SDValue Op, SelectionDAG &DAG) const; diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrFormats.td index 02fa36a1df4b..e70d304f37b9 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -1168,6 +1168,8 @@ def gi_arith_extended_reg32to64_i64 : GIComplexOperandMatcher<s64, "selectArithExtendedRegister">, GIComplexPatternEquiv<arith_extended_reg32to64_i64>; +def arith_uxtx : ComplexPattern<i64, 2, "SelectArithUXTXRegister", []>; + // Floating-point immediate. def fpimm16XForm : SDNodeXForm<fpimm, [{ @@ -1234,6 +1236,10 @@ def fpimm0 : FPImmLeaf<fAny, [{ return Imm.isExactlyValue(+0.0); }]>; +def fpimm_minus0 : FPImmLeaf<fAny, [{ + return Imm.isExactlyValue(-0.0); +}]>; + def fpimm_half : FPImmLeaf<fAny, [{ return Imm.isExactlyValue(+0.5); }]>; diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.td index d444223e4494..a7b7e5270888 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -1691,6 +1691,11 @@ def : InstAlias<"mov $dst, $src", defm ADDS : AddSubS<0, "adds", AArch64add_flag, "cmn", "subs", "cmp">; defm SUBS : AddSubS<1, "subs", AArch64sub_flag, "cmp", "adds", "cmn">; +def copyFromSP: PatLeaf<(i64 GPR64:$src), [{ + return N->getOpcode() == ISD::CopyFromReg && + cast<RegisterSDNode>(N->getOperand(1))->getReg() == AArch64::SP; +}]>; + // Use SUBS instead of SUB to enable CSE between SUBS and SUB. def : Pat<(sub GPR32sp:$Rn, addsub_shifted_imm32:$imm), (SUBSWri GPR32sp:$Rn, addsub_shifted_imm32:$imm)>; @@ -1709,6 +1714,8 @@ def : Pat<(sub GPR32sp:$R2, arith_extended_reg32_i32:$R3), (SUBSWrx GPR32sp:$R2, arith_extended_reg32_i32:$R3)>; def : Pat<(sub GPR64sp:$R2, arith_extended_reg32to64_i64:$R3), (SUBSXrx GPR64sp:$R2, arith_extended_reg32to64_i64:$R3)>; +def : Pat<(sub copyFromSP:$R2, (arith_uxtx GPR64:$R3, arith_extendlsl64:$imm)), + (SUBXrx64 GPR64sp:$R2, GPR64:$R3, arith_extendlsl64:$imm)>; } // Because of the immediate format for add/sub-imm instructions, the @@ -5293,6 +5300,9 @@ def : Pat<(int_aarch64_neon_pmull64 (extractelt (v2i64 V128:$Rn), (i64 1)), // CodeGen patterns for addhn and subhn instructions, which can actually be // written in LLVM IR without too much difficulty. +// Prioritize ADDHN and SUBHN over UZP2. +let AddedComplexity = 10 in { + // ADDHN def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm), (i32 8))))), (ADDHNv8i16_v8i8 V128:$Rn, V128:$Rm)>; @@ -5343,6 +5353,8 @@ def : Pat<(concat_vectors (v2i32 V64:$Rd), (SUBHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), V128:$Rn, V128:$Rm)>; +} // AddedComplexity = 10 + //---------------------------------------------------------------------------- // AdvSIMD bitwise extract from vector instruction. //---------------------------------------------------------------------------- @@ -5409,6 +5421,19 @@ def : Pat<(v4i32 (concat_vectors (v2i32 (trunc (v2i64 V128:$Vn))), (v2i32 (trunc (v2i64 V128:$Vm))))), (UZP1v4i32 V128:$Vn, V128:$Vm)>; +def : Pat<(v16i8 (concat_vectors + (v8i8 (trunc (AArch64vlshr (v8i16 V128:$Vn), (i32 8)))), + (v8i8 (trunc (AArch64vlshr (v8i16 V128:$Vm), (i32 8)))))), + (UZP2v16i8 V128:$Vn, V128:$Vm)>; +def : Pat<(v8i16 (concat_vectors + (v4i16 (trunc (AArch64vlshr (v4i32 V128:$Vn), (i32 16)))), + (v4i16 (trunc (AArch64vlshr (v4i32 V128:$Vm), (i32 16)))))), + (UZP2v8i16 V128:$Vn, V128:$Vm)>; +def : Pat<(v4i32 (concat_vectors + (v2i32 (trunc (AArch64vlshr (v2i64 V128:$Vn), (i32 32)))), + (v2i32 (trunc (AArch64vlshr (v2i64 V128:$Vm), (i32 32)))))), + (UZP2v4i32 V128:$Vn, V128:$Vm)>; + //---------------------------------------------------------------------------- // AdvSIMD TBL/TBX instructions //---------------------------------------------------------------------------- diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MachineScheduler.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MachineScheduler.cpp index 6c8845ee8598..79866c9b0a05 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MachineScheduler.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64MachineScheduler.cpp @@ -22,8 +22,8 @@ static bool needReorderStoreMI(const MachineInstr *MI) { return false; case AArch64::STURQi: case AArch64::STRQui: - if (MI->getMF()->getSubtarget<AArch64Subtarget>().isStoreAddressAscend()) - return false; + if (!MI->getMF()->getSubtarget<AArch64Subtarget>().isStoreAddressAscend()) + return false; LLVM_FALLTHROUGH; case AArch64::STPQi: return AArch64InstrInfo::getLdStOffsetOp(*MI).isImm(); diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index c66f9cfd9c22..4032c4667bc7 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -278,10 +278,18 @@ def AArch64scvtf_mt : SDNode<"AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU", SDT_AArch def AArch64fcvtzu_mt : SDNode<"AArch64ISD::FCVTZU_MERGE_PASSTHRU", SDT_AArch64FCVT>; def AArch64fcvtzs_mt : SDNode<"AArch64ISD::FCVTZS_MERGE_PASSTHRU", SDT_AArch64FCVT>; -def SDT_AArch64ReduceWithInit : SDTypeProfile<1, 3, [SDTCisVec<1>, SDTCisVec<3>]>; -def AArch64clasta_n : SDNode<"AArch64ISD::CLASTA_N", SDT_AArch64ReduceWithInit>; -def AArch64clastb_n : SDNode<"AArch64ISD::CLASTB_N", SDT_AArch64ReduceWithInit>; -def AArch64fadda_p : SDNode<"AArch64ISD::FADDA_PRED", SDT_AArch64ReduceWithInit>; +def SDT_AArch64ReduceWithInit : SDTypeProfile<1, 3, + [SDTCisVec<1>, SDTCVecEltisVT<1,i1>, SDTCisVec<3>, SDTCisSameNumEltsAs<1,3>]>; +def AArch64clasta_n : SDNode<"AArch64ISD::CLASTA_N", SDT_AArch64ReduceWithInit>; +def AArch64clastb_n : SDNode<"AArch64ISD::CLASTB_N", SDT_AArch64ReduceWithInit>; +def AArch64fadda_p_node : SDNode<"AArch64ISD::FADDA_PRED", SDT_AArch64ReduceWithInit>; + +def AArch64fadda_p : PatFrags<(ops node:$op1, node:$op2, node:$op3), + [(AArch64fadda_p_node node:$op1, node:$op2, node:$op3), + (AArch64fadda_p_node (SVEAllActive), node:$op2, + (vselect node:$op1, node:$op3, (splat_vector (f32 fpimm_minus0)))), + (AArch64fadda_p_node (SVEAllActive), node:$op2, + (vselect node:$op1, node:$op3, (splat_vector (f64 fpimm_minus0))))]>; def SDT_AArch64PTest : SDTypeProfile<0, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>]>; def AArch64ptest : SDNode<"AArch64ISD::PTEST", SDT_AArch64PTest>; @@ -447,6 +455,16 @@ let Predicates = [HasSVEorSME] in { defm FABS_ZPmZ : sve_int_un_pred_arit_1_fp<0b100, "fabs", AArch64fabs_mt>; defm FNEG_ZPmZ : sve_int_un_pred_arit_1_fp<0b101, "fneg", AArch64fneg_mt>; + // zext(cmpeq(x, splat(0))) -> cnot(x) + def : Pat<(nxv16i8 (zext (nxv16i1 (AArch64setcc_z (nxv16i1 (SVEAllActive):$Pg), nxv16i8:$Op2, (SVEDup0), SETEQ)))), + (CNOT_ZPmZ_B $Op2, $Pg, $Op2)>; + def : Pat<(nxv8i16 (zext (nxv8i1 (AArch64setcc_z (nxv8i1 (SVEAllActive):$Pg), nxv8i16:$Op2, (SVEDup0), SETEQ)))), + (CNOT_ZPmZ_H $Op2, $Pg, $Op2)>; + def : Pat<(nxv4i32 (zext (nxv4i1 (AArch64setcc_z (nxv4i1 (SVEAllActive):$Pg), nxv4i32:$Op2, (SVEDup0), SETEQ)))), + (CNOT_ZPmZ_S $Op2, $Pg, $Op2)>; + def : Pat<(nxv2i64 (zext (nxv2i1 (AArch64setcc_z (nxv2i1 (SVEAllActive):$Pg), nxv2i64:$Op2, (SVEDup0), SETEQ)))), + (CNOT_ZPmZ_D $Op2, $Pg, $Op2)>; + defm SMAX_ZPmZ : sve_int_bin_pred_arit_1<0b000, "smax", "SMAX_ZPZZ", int_aarch64_sve_smax, DestructiveBinaryComm>; defm UMAX_ZPmZ : sve_int_bin_pred_arit_1<0b001, "umax", "UMAX_ZPZZ", int_aarch64_sve_umax, DestructiveBinaryComm>; defm SMIN_ZPmZ : sve_int_bin_pred_arit_1<0b010, "smin", "SMIN_ZPZZ", int_aarch64_sve_smin, DestructiveBinaryComm>; @@ -857,6 +875,16 @@ let Predicates = [HasSVEorSME] in { defm LD1RQ_W : sve_mem_ldqr_ss<0b10, "ld1rqw", Z_s, ZPR32, GPR64NoXZRshifted32>; defm LD1RQ_D : sve_mem_ldqr_ss<0b11, "ld1rqd", Z_d, ZPR64, GPR64NoXZRshifted64>; + let AddedComplexity = 1 in { + class LD1RQPat<ValueType vt1, ValueType vt2, SDPatternOperator op, Instruction load_instr, Instruction ptrue> : + Pat<(vt1 (op (vt1 (vector_insert_subvec (vt1 undef), (vt2 (load GPR64sp:$Xn)), (i64 0))), (i64 0))), + (load_instr (ptrue 31), GPR64sp:$Xn, 0)>; + } + def : LD1RQPat<nxv16i8, v16i8, AArch64duplane128, LD1RQ_B_IMM, PTRUE_B>; + def : LD1RQPat<nxv8i16, v8i16, AArch64duplane128, LD1RQ_H_IMM, PTRUE_H>; + def : LD1RQPat<nxv4i32, v4i32, AArch64duplane128, LD1RQ_W_IMM, PTRUE_S>; + def : LD1RQPat<nxv2i64, v2i64, AArch64duplane128, LD1RQ_D_IMM, PTRUE_D>; + // continuous load with reg+reg addressing. defm LD1B : sve_mem_cld_ss<0b0000, "ld1b", Z_b, ZPR8, GPR64NoXZRshifted8>; defm LD1B_H : sve_mem_cld_ss<0b0001, "ld1b", Z_h, ZPR16, GPR64NoXZRshifted8>; diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp index 3f9795f5198b..47e4c6589c26 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -128,7 +128,7 @@ static cl::opt<bool> static cl::opt<bool> EnableGEPOpt("aarch64-enable-gep-opt", cl::Hidden, cl::desc("Enable optimizations on complex GEPs"), - cl::init(false)); + cl::init(true)); static cl::opt<bool> BranchRelaxation("aarch64-enable-branch-relax", cl::Hidden, cl::init(true), @@ -563,17 +563,6 @@ void AArch64PassConfig::addIRPasses() { addPass(createFalkorMarkStridedAccessesPass()); } - TargetPassConfig::addIRPasses(); - - addPass(createAArch64StackTaggingPass( - /*IsOptNone=*/TM->getOptLevel() == CodeGenOpt::None)); - - // Match interleaved memory accesses to ldN/stN intrinsics. - if (TM->getOptLevel() != CodeGenOpt::None) { - addPass(createInterleavedLoadCombinePass()); - addPass(createInterleavedAccessPass()); - } - if (TM->getOptLevel() == CodeGenOpt::Aggressive && EnableGEPOpt) { // Call SeparateConstOffsetFromGEP pass to extract constants within indices // and lower a GEP with multiple indices to either arithmetic operations or @@ -587,6 +576,17 @@ void AArch64PassConfig::addIRPasses() { addPass(createLICMPass()); } + TargetPassConfig::addIRPasses(); + + addPass(createAArch64StackTaggingPass( + /*IsOptNone=*/TM->getOptLevel() == CodeGenOpt::None)); + + // Match interleaved memory accesses to ldN/stN intrinsics. + if (TM->getOptLevel() != CodeGenOpt::None) { + addPass(createInterleavedLoadCombinePass()); + addPass(createInterleavedAccessPass()); + } + // Add Control Flow Guard checks. if (TM->getTargetTriple().isOSWindows()) addPass(createCFGuardCheckPass()); diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 274a025e82a0..66617393c9ae 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -22,6 +22,7 @@ #include "llvm/IR/PatternMatch.h" #include "llvm/Support/Debug.h" #include "llvm/Transforms/InstCombine/InstCombiner.h" +#include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h" #include <algorithm> using namespace llvm; using namespace llvm::PatternMatch; @@ -37,6 +38,74 @@ static cl::opt<unsigned> SVEGatherOverhead("sve-gather-overhead", cl::init(10), static cl::opt<unsigned> SVEScatterOverhead("sve-scatter-overhead", cl::init(10), cl::Hidden); +class TailFoldingKind { +private: + uint8_t Bits = 0; // Currently defaults to disabled. + +public: + enum TailFoldingOpts { + TFDisabled = 0x0, + TFReductions = 0x01, + TFRecurrences = 0x02, + TFSimple = 0x80, + TFAll = TFReductions | TFRecurrences | TFSimple + }; + + void operator=(const std::string &Val) { + if (Val.empty()) + return; + SmallVector<StringRef, 6> TailFoldTypes; + StringRef(Val).split(TailFoldTypes, '+', -1, false); + for (auto TailFoldType : TailFoldTypes) { + if (TailFoldType == "disabled") + Bits = 0; + else if (TailFoldType == "all") + Bits = TFAll; + else if (TailFoldType == "default") + Bits = 0; // Currently defaults to never tail-folding. + else if (TailFoldType == "simple") + add(TFSimple); + else if (TailFoldType == "reductions") + add(TFReductions); + else if (TailFoldType == "recurrences") + add(TFRecurrences); + else if (TailFoldType == "noreductions") + remove(TFReductions); + else if (TailFoldType == "norecurrences") + remove(TFRecurrences); + else { + errs() + << "invalid argument " << TailFoldType.str() + << " to -sve-tail-folding=; each element must be one of: disabled, " + "all, default, simple, reductions, noreductions, recurrences, " + "norecurrences\n"; + } + } + } + + operator uint8_t() const { return Bits; } + + void add(uint8_t Flag) { Bits |= Flag; } + void remove(uint8_t Flag) { Bits &= ~Flag; } +}; + +TailFoldingKind TailFoldingKindLoc; + +cl::opt<TailFoldingKind, true, cl::parser<std::string>> SVETailFolding( + "sve-tail-folding", + cl::desc( + "Control the use of vectorisation using tail-folding for SVE:" + "\ndisabled No loop types will vectorize using tail-folding" + "\ndefault Uses the default tail-folding settings for the target " + "CPU" + "\nall All legal loop types will vectorize using tail-folding" + "\nsimple Use tail-folding for simple loops (not reductions or " + "recurrences)" + "\nreductions Use tail-folding for loops containing reductions" + "\nrecurrences Use tail-folding for loops containing first order " + "recurrences"), + cl::location(TailFoldingKindLoc)); + bool AArch64TTIImpl::areInlineCompatible(const Function *Caller, const Function *Callee) const { const TargetMachine &TM = getTLI()->getTargetMachine(); @@ -2955,3 +3024,20 @@ InstructionCost AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, return BaseT::getShuffleCost(Kind, Tp, Mask, Index, SubTp); } + +bool AArch64TTIImpl::preferPredicateOverEpilogue( + Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC, + TargetLibraryInfo *TLI, DominatorTree *DT, LoopVectorizationLegality *LVL) { + if (!ST->hasSVE() || TailFoldingKindLoc == TailFoldingKind::TFDisabled) + return false; + + TailFoldingKind Required; // Defaults to 0. + if (LVL->getReductionVars().size()) + Required.add(TailFoldingKind::TFReductions); + if (LVL->getFirstOrderRecurrences().size()) + Required.add(TailFoldingKind::TFRecurrences); + if (!Required) + Required.add(TailFoldingKind::TFSimple); + + return (TailFoldingKindLoc & Required) == Required; +} diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h index 59ec91843266..2231f8705998 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -340,6 +340,11 @@ public: return PredicationStyle::None; } + bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, + AssumptionCache &AC, TargetLibraryInfo *TLI, + DominatorTree *DT, + LoopVectorizationLegality *LVL); + bool supportsScalableVectors() const { return ST->hasSVE(); } bool enableScalableVectorization() const { return ST->hasSVE(); } @@ -347,6 +352,11 @@ public: bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const; + bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, + TTI::ReductionFlags Flags) const { + return ST->hasSVE(); + } + InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, Optional<FastMathFlags> FMF, TTI::TargetCostKind CostKind); diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp index f129bfe11e4d..3fe3b2a69855 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp @@ -231,7 +231,70 @@ void AArch64_MC::initLLVMToCVRegMapping(MCRegisterInfo *MRI) { {codeview::RegisterId::ARM64_Q29, AArch64::Q29}, {codeview::RegisterId::ARM64_Q30, AArch64::Q30}, {codeview::RegisterId::ARM64_Q31, AArch64::Q31}, - + {codeview::RegisterId::ARM64_B0, AArch64::B0}, + {codeview::RegisterId::ARM64_B1, AArch64::B1}, + {codeview::RegisterId::ARM64_B2, AArch64::B2}, + {codeview::RegisterId::ARM64_B3, AArch64::B3}, + {codeview::RegisterId::ARM64_B4, AArch64::B4}, + {codeview::RegisterId::ARM64_B5, AArch64::B5}, + {codeview::RegisterId::ARM64_B6, AArch64::B6}, + {codeview::RegisterId::ARM64_B7, AArch64::B7}, + {codeview::RegisterId::ARM64_B8, AArch64::B8}, + {codeview::RegisterId::ARM64_B9, AArch64::B9}, + {codeview::RegisterId::ARM64_B10, AArch64::B10}, + {codeview::RegisterId::ARM64_B11, AArch64::B11}, + {codeview::RegisterId::ARM64_B12, AArch64::B12}, + {codeview::RegisterId::ARM64_B13, AArch64::B13}, + {codeview::RegisterId::ARM64_B14, AArch64::B14}, + {codeview::RegisterId::ARM64_B15, AArch64::B15}, + {codeview::RegisterId::ARM64_B16, AArch64::B16}, + {codeview::RegisterId::ARM64_B17, AArch64::B17}, + {codeview::RegisterId::ARM64_B18, AArch64::B18}, + {codeview::RegisterId::ARM64_B19, AArch64::B19}, + {codeview::RegisterId::ARM64_B20, AArch64::B20}, + {codeview::RegisterId::ARM64_B21, AArch64::B21}, + {codeview::RegisterId::ARM64_B22, AArch64::B22}, + {codeview::RegisterId::ARM64_B23, AArch64::B23}, + {codeview::RegisterId::ARM64_B24, AArch64::B24}, + {codeview::RegisterId::ARM64_B25, AArch64::B25}, + {codeview::RegisterId::ARM64_B26, AArch64::B26}, + {codeview::RegisterId::ARM64_B27, AArch64::B27}, + {codeview::RegisterId::ARM64_B28, AArch64::B28}, + {codeview::RegisterId::ARM64_B29, AArch64::B29}, + {codeview::RegisterId::ARM64_B30, AArch64::B30}, + {codeview::RegisterId::ARM64_B31, AArch64::B31}, + {codeview::RegisterId::ARM64_H0, AArch64::H0}, + {codeview::RegisterId::ARM64_H1, AArch64::H1}, + {codeview::RegisterId::ARM64_H2, AArch64::H2}, + {codeview::RegisterId::ARM64_H3, AArch64::H3}, + {codeview::RegisterId::ARM64_H4, AArch64::H4}, + {codeview::RegisterId::ARM64_H5, AArch64::H5}, + {codeview::RegisterId::ARM64_H6, AArch64::H6}, + {codeview::RegisterId::ARM64_H7, AArch64::H7}, + {codeview::RegisterId::ARM64_H8, AArch64::H8}, + {codeview::RegisterId::ARM64_H9, AArch64::H9}, + {codeview::RegisterId::ARM64_H10, AArch64::H10}, + {codeview::RegisterId::ARM64_H11, AArch64::H11}, + {codeview::RegisterId::ARM64_H12, AArch64::H12}, + {codeview::RegisterId::ARM64_H13, AArch64::H13}, + {codeview::RegisterId::ARM64_H14, AArch64::H14}, + {codeview::RegisterId::ARM64_H15, AArch64::H15}, + {codeview::RegisterId::ARM64_H16, AArch64::H16}, + {codeview::RegisterId::ARM64_H17, AArch64::H17}, + {codeview::RegisterId::ARM64_H18, AArch64::H18}, + {codeview::RegisterId::ARM64_H19, AArch64::H19}, + {codeview::RegisterId::ARM64_H20, AArch64::H20}, + {codeview::RegisterId::ARM64_H21, AArch64::H21}, + {codeview::RegisterId::ARM64_H22, AArch64::H22}, + {codeview::RegisterId::ARM64_H23, AArch64::H23}, + {codeview::RegisterId::ARM64_H24, AArch64::H24}, + {codeview::RegisterId::ARM64_H25, AArch64::H25}, + {codeview::RegisterId::ARM64_H26, AArch64::H26}, + {codeview::RegisterId::ARM64_H27, AArch64::H27}, + {codeview::RegisterId::ARM64_H28, AArch64::H28}, + {codeview::RegisterId::ARM64_H29, AArch64::H29}, + {codeview::RegisterId::ARM64_H30, AArch64::H30}, + {codeview::RegisterId::ARM64_H31, AArch64::H31}, }; for (const auto &I : RegMap) MRI->mapLLVMRegToCVReg(I.Reg, static_cast<int>(I.CVReg)); diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPU.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPU.td index 48b5814cd482..2d6f1438e315 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -585,6 +585,12 @@ def FeatureMAIInsts : SubtargetFeature<"mai-insts", "Has mAI instructions" >; +def FeatureFP8Insts : SubtargetFeature<"fp8-insts", + "HasFP8Insts", + "true", + "Has fp8 and bf8 instructions" +>; + def FeaturePkFmacF16Inst : SubtargetFeature<"pk-fmac-f16-inst", "HasPkFmacF16Inst", "true", @@ -1124,6 +1130,7 @@ def FeatureISAVersion9_4_0 : FeatureSet< Feature64BitDPP, FeaturePackedFP32Ops, FeatureMAIInsts, + FeatureFP8Insts, FeaturePkFmacF16Inst, FeatureAtomicFaddRtnInsts, FeatureAtomicFaddNoRtnInsts, @@ -1265,11 +1272,14 @@ def FeatureISAVersion11_Common : FeatureSet< FeaturePackedTID, FeatureVcmpxPermlaneHazard]>; -// Features for GFX 11.0.0 and 11.0.1 -def FeatureISAVersion11_0 : FeatureSet< +def FeatureISAVersion11_0_0 : FeatureSet< !listconcat(FeatureISAVersion11_Common.Features, [FeatureUserSGPRInit16Bug])>; +def FeatureISAVersion11_0_1 : FeatureSet< + !listconcat(FeatureISAVersion11_Common.Features, + [])>; + def FeatureISAVersion11_0_2 : FeatureSet< !listconcat(FeatureISAVersion11_Common.Features, [FeatureUserSGPRInit16Bug])>; @@ -1704,6 +1714,9 @@ def HasSMemTimeInst : Predicate<"Subtarget->hasSMemTimeInst()">, def HasShaderCyclesRegister : Predicate<"Subtarget->hasShaderCyclesRegister()">, AssemblerPredicate<(all_of FeatureShaderCyclesRegister)>; +def HasFP8Insts : Predicate<"Subtarget->hasFP8Insts()">, + AssemblerPredicate<(all_of FeatureFP8Insts)>; + def HasPkFmacF16Inst : Predicate<"Subtarget->hasPkFmacF16Inst()">, AssemblerPredicate<(all_of FeaturePkFmacF16Inst)>; diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp index d28f38e42430..d361e33995cf 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp @@ -74,6 +74,7 @@ void AMDGPUArgumentUsageInfo::print(raw_ostream &OS, const Module *M) const { << " WorkGroupIDY: " << FI.second.WorkGroupIDY << " WorkGroupIDZ: " << FI.second.WorkGroupIDZ << " WorkGroupInfo: " << FI.second.WorkGroupInfo + << " LDSKernelId: " << FI.second.LDSKernelId << " PrivateSegmentWaveByteOffset: " << FI.second.PrivateSegmentWaveByteOffset << " ImplicitBufferPtr: " << FI.second.ImplicitBufferPtr @@ -107,6 +108,9 @@ AMDGPUFunctionArgInfo::getPreloadedValue( case AMDGPUFunctionArgInfo::WORKGROUP_ID_Z: return std::make_tuple(WorkGroupIDZ ? &WorkGroupIDZ : nullptr, &AMDGPU::SGPR_32RegClass, LLT::scalar(32)); + case AMDGPUFunctionArgInfo::LDS_KERNEL_ID: + return std::make_tuple(LDSKernelId ? &LDSKernelId : nullptr, + &AMDGPU::SGPR_32RegClass, LLT::scalar(32)); case AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET: return std::make_tuple( PrivateSegmentWaveByteOffset ? &PrivateSegmentWaveByteOffset : nullptr, @@ -162,6 +166,7 @@ constexpr AMDGPUFunctionArgInfo AMDGPUFunctionArgInfo::fixedABILayout() { AI.WorkGroupIDX = ArgDescriptor::createRegister(AMDGPU::SGPR12); AI.WorkGroupIDY = ArgDescriptor::createRegister(AMDGPU::SGPR13); AI.WorkGroupIDZ = ArgDescriptor::createRegister(AMDGPU::SGPR14); + AI.LDSKernelId = ArgDescriptor::createRegister(AMDGPU::SGPR15); const unsigned Mask = 0x3ff; AI.WorkItemIDX = ArgDescriptor::createRegister(AMDGPU::VGPR31, Mask); diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h index e9ed45d8cd14..f595e469f998 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h @@ -103,6 +103,7 @@ struct AMDGPUFunctionArgInfo { KERNARG_SEGMENT_PTR = 3, DISPATCH_ID = 4, FLAT_SCRATCH_INIT = 5, + LDS_KERNEL_ID = 6, // LLVM internal, not part of the ABI WORKGROUP_ID_X = 10, WORKGROUP_ID_Y = 11, WORKGROUP_ID_Z = 12, @@ -128,6 +129,7 @@ struct AMDGPUFunctionArgInfo { ArgDescriptor DispatchID; ArgDescriptor FlatScratchInit; ArgDescriptor PrivateSegmentSize; + ArgDescriptor LDSKernelId; // System SGPRs in kernels. ArgDescriptor WorkGroupIDX; diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 57a4660bc1eb..13a65f1ad601 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -27,8 +27,10 @@ #include "SIMachineFunctionInfo.h" #include "TargetInfo/AMDGPUTargetInfo.h" #include "Utils/AMDGPUBaseInfo.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" @@ -415,6 +417,10 @@ uint16_t AMDGPUAsmPrinter::getAmdhsaKernelCodeProperties( amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32; } + if (CurrentProgramInfo.DynamicCallStack) { + KernelCodeProperties |= amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK; + } + return KernelCodeProperties; } @@ -506,6 +512,9 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { emitFunctionBody(); + emitResourceUsageRemarks(MF, CurrentProgramInfo, MFI->isModuleEntryFunction(), + STM.hasMAIInsts()); + if (isVerbose()) { MCSectionELF *CommentSection = Context.getELFSection(".AMDGPU.csdata", ELF::SHT_PROGBITS, 0); @@ -875,6 +884,9 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, LDSAlignShift = 9; } + ProgInfo.SGPRSpill = MFI->getNumSpilledSGPRs(); + ProgInfo.VGPRSpill = MFI->getNumSpilledVGPRs(); + ProgInfo.LDSSize = MFI->getLDSSize(); ProgInfo.LDSBlocks = alignTo(ProgInfo.LDSSize, 1ULL << LDSAlignShift) >> LDSAlignShift; @@ -1180,3 +1192,58 @@ void AMDGPUAsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<AMDGPUResourceUsageAnalysis>(); AsmPrinter::getAnalysisUsage(AU); } + +void AMDGPUAsmPrinter::emitResourceUsageRemarks( + const MachineFunction &MF, const SIProgramInfo &CurrentProgramInfo, + bool isModuleEntryFunction, bool hasMAIInsts) { + if (!ORE) + return; + + const char *Name = "kernel-resource-usage"; + const char *Indent = " "; + + // If the remark is not specifically enabled, do not output to yaml + LLVMContext &Ctx = MF.getFunction().getContext(); + if (!Ctx.getDiagHandlerPtr()->isAnalysisRemarkEnabled(Name)) + return; + + auto EmitResourceUsageRemark = [&](StringRef RemarkName, + StringRef RemarkLabel, auto Argument) { + // Add an indent for every line besides the line with the kernel name. This + // makes it easier to tell which resource usage go with which kernel since + // the kernel name will always be displayed first. + std::string LabelStr = RemarkLabel.str() + ": "; + if (!RemarkName.equals("FunctionName")) + LabelStr = Indent + LabelStr; + + ORE->emit([&]() { + return MachineOptimizationRemarkAnalysis(Name, RemarkName, + MF.getFunction().getSubprogram(), + &MF.front()) + << LabelStr << ore::NV(RemarkName, Argument); + }); + }; + + // FIXME: Formatting here is pretty nasty because clang does not accept + // newlines from diagnostics. This forces us to emit multiple diagnostic + // remarks to simulate newlines. If and when clang does accept newlines, this + // formatting should be aggregated into one remark with newlines to avoid + // printing multiple diagnostic location and diag opts. + EmitResourceUsageRemark("FunctionName", "Function Name", + MF.getFunction().getName()); + EmitResourceUsageRemark("NumSGPR", "SGPRs", CurrentProgramInfo.NumSGPR); + EmitResourceUsageRemark("NumVGPR", "VGPRs", CurrentProgramInfo.NumArchVGPR); + if (hasMAIInsts) + EmitResourceUsageRemark("NumAGPR", "AGPRs", CurrentProgramInfo.NumAccVGPR); + EmitResourceUsageRemark("ScratchSize", "ScratchSize [bytes/lane]", + CurrentProgramInfo.ScratchSize); + EmitResourceUsageRemark("Occupancy", "Occupancy [waves/SIMD]", + CurrentProgramInfo.Occupancy); + EmitResourceUsageRemark("SGPRSpill", "SGPRs Spill", + CurrentProgramInfo.SGPRSpill); + EmitResourceUsageRemark("VGPRSpill", "VGPRs Spill", + CurrentProgramInfo.VGPRSpill); + if (isModuleEntryFunction) + EmitResourceUsageRemark("BytesLDS", "LDS Size [bytes/block]", + CurrentProgramInfo.LDSSize); +} diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h index ddda2cf107b1..2881b8d7bcca 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h @@ -69,6 +69,9 @@ private: uint64_t ScratchSize, uint64_t CodeSize, const AMDGPUMachineFunction* MFI); + void emitResourceUsageRemarks(const MachineFunction &MF, + const SIProgramInfo &CurrentProgramInfo, + bool isModuleEntryFunction, bool hasMAIInsts); uint16_t getAmdhsaKernelCodeProperties( const MachineFunction &MF) const; diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAttributes.def b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAttributes.def index 0a2cf3874245..c7a060c5db5b 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAttributes.def +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAttributes.def @@ -27,5 +27,6 @@ AMDGPU_ATTRIBUTE(WORKGROUP_ID_Z, "amdgpu-no-workgroup-id-z") AMDGPU_ATTRIBUTE(WORKITEM_ID_X, "amdgpu-no-workitem-id-x") AMDGPU_ATTRIBUTE(WORKITEM_ID_Y, "amdgpu-no-workitem-id-y") AMDGPU_ATTRIBUTE(WORKITEM_ID_Z, "amdgpu-no-workitem-id-z") +AMDGPU_ATTRIBUTE(LDS_KERNEL_ID, "amdgpu-no-lds-kernel-id") #undef AMDGPU_ATTRIBUTE diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp index 8de0d7e6bff1..a3634d2440c3 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp @@ -72,6 +72,8 @@ intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit, case Intrinsic::amdgcn_workgroup_id_z: case Intrinsic::r600_read_tgid_z: return WORKGROUP_ID_Z; + case Intrinsic::amdgcn_lds_kernel_id: + return LDS_KERNEL_ID; case Intrinsic::amdgcn_dispatch_ptr: return DISPATCH_PTR; case Intrinsic::amdgcn_dispatch_id: @@ -457,6 +459,10 @@ struct AAAMDAttributesFunction : public AAAMDAttributes { removeAssumedBits(QUEUE_PTR); } + if (isAssumed(LDS_KERNEL_ID) && funcRetrievesLDSKernelId(A)) { + removeAssumedBits(LDS_KERNEL_ID); + } + return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED; } @@ -591,6 +597,16 @@ private: return !A.checkForAllCallLikeInstructions(DoesNotLeadToKernelArgLoc, *this, UsedAssumedInformation); } + + bool funcRetrievesLDSKernelId(Attributor &A) { + auto DoesNotRetrieve = [&](Instruction &I) { + auto &Call = cast<CallBase>(I); + return Call.getIntrinsicID() != Intrinsic::amdgcn_lds_kernel_id; + }; + bool UsedAssumedInformation = false; + return !A.checkForAllCallLikeInstructions(DoesNotRetrieve, *this, + UsedAssumedInformation); + } }; AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP, @@ -743,7 +759,8 @@ public: AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, *TM); DenseSet<const char *> Allowed( {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID, - &AAAMDFlatWorkGroupSize::ID, &AACallEdges::ID, &AAPointerInfo::ID}); + &AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID, &AACallEdges::ID, + &AAPointerInfo::ID}); AttributorConfig AC(CGUpdater); AC.Allowed = &Allowed; diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp index fd812eb676ef..4550cfdcf883 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -764,7 +764,8 @@ bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder, AMDGPUFunctionArgInfo::DISPATCH_ID, AMDGPUFunctionArgInfo::WORKGROUP_ID_X, AMDGPUFunctionArgInfo::WORKGROUP_ID_Y, - AMDGPUFunctionArgInfo::WORKGROUP_ID_Z + AMDGPUFunctionArgInfo::WORKGROUP_ID_Z, + AMDGPUFunctionArgInfo::LDS_KERNEL_ID, }; static constexpr StringLiteral ImplicitAttrNames[] = { @@ -774,7 +775,8 @@ bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder, "amdgpu-no-dispatch-id", "amdgpu-no-workgroup-id-x", "amdgpu-no-workgroup-id-y", - "amdgpu-no-workgroup-id-z" + "amdgpu-no-workgroup-id-z", + "amdgpu-no-lds-kernel-id", }; MachineRegisterInfo &MRI = MF.getRegInfo(); @@ -810,6 +812,14 @@ bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder, LI->loadInputValue(InputReg, MIRBuilder, IncomingArg, ArgRC, ArgTy); } else if (InputID == AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR) { LI->getImplicitArgPtr(InputReg, MRI, MIRBuilder); + } else if (InputID == AMDGPUFunctionArgInfo::LDS_KERNEL_ID) { + Optional<uint32_t> Id = + AMDGPUMachineFunction::getLDSKernelIdMetadata(MF.getFunction()); + if (Id.has_value()) { + MIRBuilder.buildConstant(InputReg, Id.value()); + } else { + MIRBuilder.buildUndef(InputReg); + } } else { // We may have proven the input wasn't needed, although the ABI is // requiring it. We just need to allocate the register appropriately. diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGISel.td index 5747fc0ca8e6..229dfb62ef6e 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGISel.td +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGISel.td @@ -88,6 +88,10 @@ def gi_smrd_sgpr : GIComplexOperandMatcher<s64, "selectSmrdSgpr">, GIComplexPatternEquiv<SMRDSgpr>; +def gi_smrd_sgpr_imm : + GIComplexOperandMatcher<s64, "selectSmrdSgprImm">, + GIComplexPatternEquiv<SMRDSgprImm>; + def gi_flat_offset : GIComplexOperandMatcher<s64, "selectFlatOffset">, GIComplexPatternEquiv<FlatOffset>; diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp index 6fa44ffcbfaa..632a76b32009 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp @@ -875,6 +875,8 @@ MetadataStreamerV3::getHSAKernelProps(const MachineFunction &MF, Kern.getDocument()->getNode(ProgramInfo.LDSSize); Kern[".private_segment_fixed_size"] = Kern.getDocument()->getNode(ProgramInfo.ScratchSize); + Kern[".uses_dynamic_stack"] = + Kern.getDocument()->getNode(ProgramInfo.DynamicCallStack); // FIXME: The metadata treats the minimum as 16? Kern[".kernarg_segment_align"] = diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 589992c7a7ec..147c8850587e 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -33,7 +33,7 @@ #include "llvm/IR/Dominators.h" #endif -#define DEBUG_TYPE "isel" +#define DEBUG_TYPE "amdgpu-isel" using namespace llvm; @@ -1886,21 +1886,21 @@ bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr, // Match an immediate (if Imm is true) or an SGPR (if Imm is false) // offset. If Imm32Only is true, match only 32-bit immediate offsets // available on CI. -bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode, - SDValue &Offset, bool Imm, +bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue Addr, SDValue ByteOffsetNode, + SDValue *SOffset, SDValue *Offset, bool Imm32Only) const { ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode); if (!C) { - if (Imm) + if (!SOffset) return false; if (ByteOffsetNode.getValueType().isScalarInteger() && ByteOffsetNode.getValueType().getSizeInBits() == 32) { - Offset = ByteOffsetNode; + *SOffset = ByteOffsetNode; return true; } if (ByteOffsetNode.getOpcode() == ISD::ZERO_EXTEND) { if (ByteOffsetNode.getOperand(0).getValueType().getSizeInBits() == 32) { - Offset = ByteOffsetNode.getOperand(0); + *SOffset = ByteOffsetNode.getOperand(0); return true; } } @@ -1912,8 +1912,8 @@ bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode, int64_t ByteOffset = C->getSExtValue(); Optional<int64_t> EncodedOffset = AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset, false); - if (EncodedOffset && Imm && !Imm32Only) { - Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32); + if (EncodedOffset && Offset && !Imm32Only) { + *Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32); return true; } @@ -1922,17 +1922,17 @@ bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode, return false; EncodedOffset = AMDGPU::getSMRDEncodedLiteralOffset32(*Subtarget, ByteOffset); - if (EncodedOffset && Imm32Only) { - Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32); + if (EncodedOffset && Offset && Imm32Only) { + *Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32); return true; } if (!isUInt<32>(ByteOffset) && !isInt<32>(ByteOffset)) return false; - if (!Imm) { + if (SOffset) { SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32); - Offset = SDValue( + *SOffset = SDValue( CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, C32Bit), 0); return true; } @@ -1968,11 +1968,18 @@ SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(SDValue Addr) const { // Match a base and an immediate (if Imm is true) or an SGPR // (if Imm is false) offset. If Imm32Only is true, match only 32-bit // immediate offsets available on CI. -bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase, - SDValue &Offset, bool Imm, - bool Imm32Only) const { +bool AMDGPUDAGToDAGISel::SelectSMRDBaseOffset(SDValue Addr, SDValue &SBase, + SDValue *SOffset, SDValue *Offset, + bool Imm32Only) const { SDLoc SL(Addr); + if (SOffset && Offset) { + assert(!Imm32Only); + SDValue B; + return SelectSMRDBaseOffset(Addr, B, nullptr, Offset) && + SelectSMRDBaseOffset(B, SBase, SOffset, nullptr); + } + // A 32-bit (address + offset) should not cause unsigned 32-bit integer // wraparound, because s_load instructions perform the addition in 64 bits. if ((Addr.getValueType() != MVT::i32 || @@ -1987,34 +1994,55 @@ bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase, assert(N0 && N1 && isa<ConstantSDNode>(N1)); } if (N0 && N1) { - if (SelectSMRDOffset(N1, Offset, Imm, Imm32Only)) { - SBase = Expand32BitAddress(N0); + if (SelectSMRDOffset(N0, N1, SOffset, Offset, Imm32Only)) { + SBase = N0; + return true; + } + if (SelectSMRDOffset(N1, N0, SOffset, Offset, Imm32Only)) { + SBase = N1; return true; } } return false; } - if (!Imm) + if (Offset && !SOffset) { + SBase = Addr; + *Offset = CurDAG->getTargetConstant(0, SL, MVT::i32); + return true; + } + return false; +} + +bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase, + SDValue *SOffset, SDValue *Offset, + bool Imm32Only) const { + if (!SelectSMRDBaseOffset(Addr, SBase, SOffset, Offset, Imm32Only)) return false; - SBase = Expand32BitAddress(Addr); - Offset = CurDAG->getTargetConstant(0, SL, MVT::i32); + SBase = Expand32BitAddress(SBase); return true; } bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const { - return SelectSMRD(Addr, SBase, Offset, /* Imm */ true); + return SelectSMRD(Addr, SBase, /* SOffset */ nullptr, &Offset); } bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const { assert(Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS); - return SelectSMRD(Addr, SBase, Offset, /* Imm */ true, /* Imm32Only */ true); + return SelectSMRD(Addr, SBase, /* SOffset */ nullptr, &Offset, + /* Imm32Only */ true); } bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase, - SDValue &Offset) const { - return SelectSMRD(Addr, SBase, Offset, /* Imm */ false); + SDValue &SOffset) const { + return SelectSMRD(Addr, SBase, &SOffset, /* Offset */ nullptr); +} + +bool AMDGPUDAGToDAGISel::SelectSMRDSgprImm(SDValue Addr, SDValue &SBase, + SDValue &SOffset, + SDValue &Offset) const { + return SelectSMRD(Addr, SBase, &SOffset, &Offset); } bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr, diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h index 7894b8eb5b67..fda2bfac71fc 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h @@ -193,14 +193,18 @@ private: bool SelectScratchSVAddr(SDNode *N, SDValue Addr, SDValue &VAddr, SDValue &SAddr, SDValue &Offset) const; - bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset, bool Imm, - bool Imm32Only) const; + bool SelectSMRDOffset(SDValue Base, SDValue ByteOffsetNode, SDValue *SOffset, + SDValue *Offset, bool Imm32Only = false) const; SDValue Expand32BitAddress(SDValue Addr) const; - bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset, bool Imm, - bool Imm32Only = false) const; + bool SelectSMRDBaseOffset(SDValue Addr, SDValue &SBase, SDValue *SOffset, + SDValue *Offset, bool Imm32Only = false) const; + bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue *SOffset, + SDValue *Offset, bool Imm32Only = false) const; bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const; bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const; - bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const; + bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &SOffset) const; + bool SelectSMRDSgprImm(SDValue Addr, SDValue &SBase, SDValue &SOffset, + SDValue &Offset) const; bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const; bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const; bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const; diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 70fae9d784a2..f2e5c2fe00e8 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -1006,6 +1006,14 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I) const { case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16: case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8: case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8: + case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8: + case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8: + case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8: + case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8: + case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8: + case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8: + case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8: + case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8: return selectSMFMACIntrin(I); default: return selectImpl(I, *CoverageInfo); @@ -2361,7 +2369,7 @@ void AMDGPUInstructionSelector::getAddrModeInfo(const MachineInstr &Load, if (PtrMI->getOpcode() != TargetOpcode::G_PTR_ADD) return; - GEPInfo GEPInfo(*PtrMI); + GEPInfo GEPInfo; for (unsigned i = 1; i != 3; ++i) { const MachineOperand &GEPOp = PtrMI->getOperand(i); @@ -3237,6 +3245,8 @@ static Register matchZeroExtendFromS32(MachineRegisterInfo &MRI, Register Reg) { if (Def->getOpcode() != AMDGPU::G_MERGE_VALUES) return Register(); + assert(Def->getNumOperands() == 3 && + MRI.getType(Def->getOperand(0).getReg()) == LLT::scalar(64)); if (mi_match(Def->getOperand(2).getReg(), MRI, m_ZeroInt())) { return Def->getOperand(1).getReg(); } @@ -3354,6 +3364,30 @@ bool AMDGPUInstructionSelector::selectSMFMACIntrin(MachineInstr &MI) const { case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8: Opc = AMDGPU::V_SMFMAC_I32_32X32X32_I8_e64; break; + case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8: + Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_BF8_e64; + break; + case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8: + Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_FP8_e64; + break; + case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8: + Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_BF8_e64; + break; + case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8: + Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_FP8_e64; + break; + case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8: + Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_BF8_e64; + break; + case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8: + Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_FP8_e64; + break; + case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8: + Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_BF8_e64; + break; + case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8: + Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_FP8_e64; + break; default: llvm_unreachable("unhandled smfmac intrinsic"); } @@ -3800,25 +3834,82 @@ AMDGPUInstructionSelector::selectVINTERPModsHi(MachineOperand &Root) const { }}; } -InstructionSelector::ComplexRendererFns -AMDGPUInstructionSelector::selectSmrdImm(MachineOperand &Root) const { +bool AMDGPUInstructionSelector::selectSmrdOffset(MachineOperand &Root, + Register &Base, + Register *SOffset, + int64_t *Offset) const { + MachineInstr *MI = Root.getParent(); + MachineBasicBlock *MBB = MI->getParent(); + + // FIXME: We should shrink the GEP if the offset is known to be <= 32-bits, + // then we can select all ptr + 32-bit offsets. SmallVector<GEPInfo, 4> AddrInfo; - getAddrModeInfo(*Root.getParent(), *MRI, AddrInfo); + getAddrModeInfo(*MI, *MRI, AddrInfo); - if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1) - return None; + if (AddrInfo.empty()) + return false; - const GEPInfo &GEPInfo = AddrInfo[0]; + const GEPInfo &GEPI = AddrInfo[0]; Optional<int64_t> EncodedImm = - AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm, false); - if (!EncodedImm) + AMDGPU::getSMRDEncodedOffset(STI, GEPI.Imm, false); + + if (SOffset && Offset) { + if (GEPI.SgprParts.size() == 1 && GEPI.Imm != 0 && EncodedImm && + AddrInfo.size() > 1) { + const GEPInfo &GEPI2 = AddrInfo[1]; + if (GEPI2.SgprParts.size() == 2 && GEPI2.Imm == 0) { + if (Register OffsetReg = + matchZeroExtendFromS32(*MRI, GEPI2.SgprParts[1])) { + Base = GEPI2.SgprParts[0]; + *SOffset = OffsetReg; + *Offset = *EncodedImm; + return true; + } + } + } + return false; + } + + if (Offset && GEPI.SgprParts.size() == 1 && EncodedImm) { + Base = GEPI.SgprParts[0]; + *Offset = *EncodedImm; + return true; + } + + // SGPR offset is unsigned. + if (SOffset && GEPI.SgprParts.size() == 1 && isUInt<32>(GEPI.Imm) && + GEPI.Imm != 0) { + // If we make it this far we have a load with an 32-bit immediate offset. + // It is OK to select this using a sgpr offset, because we have already + // failed trying to select this load into one of the _IMM variants since + // the _IMM Patterns are considered before the _SGPR patterns. + Base = GEPI.SgprParts[0]; + *SOffset = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass); + BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), *SOffset) + .addImm(GEPI.Imm); + return true; + } + + if (SOffset && GEPI.SgprParts.size() && GEPI.Imm == 0) { + if (Register OffsetReg = matchZeroExtendFromS32(*MRI, GEPI.SgprParts[1])) { + Base = GEPI.SgprParts[0]; + *SOffset = OffsetReg; + return true; + } + } + + return false; +} + +InstructionSelector::ComplexRendererFns +AMDGPUInstructionSelector::selectSmrdImm(MachineOperand &Root) const { + Register Base; + int64_t Offset; + if (!selectSmrdOffset(Root, Base, /* SOffset= */ nullptr, &Offset)) return None; - unsigned PtrReg = GEPInfo.SgprParts[0]; - return {{ - [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); }, - [=](MachineInstrBuilder &MIB) { MIB.addImm(*EncodedImm); } - }}; + return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(Base); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset); }}}; } InstructionSelector::ComplexRendererFns @@ -3844,43 +3935,24 @@ AMDGPUInstructionSelector::selectSmrdImm32(MachineOperand &Root) const { InstructionSelector::ComplexRendererFns AMDGPUInstructionSelector::selectSmrdSgpr(MachineOperand &Root) const { - MachineInstr *MI = Root.getParent(); - MachineBasicBlock *MBB = MI->getParent(); - - SmallVector<GEPInfo, 4> AddrInfo; - getAddrModeInfo(*MI, *MRI, AddrInfo); - - // FIXME: We should shrink the GEP if the offset is known to be <= 32-bits, - // then we can select all ptr + 32-bit offsets. - if (AddrInfo.empty()) + Register Base, SOffset; + if (!selectSmrdOffset(Root, Base, &SOffset, /* Offset= */ nullptr)) return None; - const GEPInfo &GEPInfo = AddrInfo[0]; - Register PtrReg = GEPInfo.SgprParts[0]; - - // SGPR offset is unsigned. - if (AddrInfo[0].SgprParts.size() == 1 && isUInt<32>(GEPInfo.Imm) && - GEPInfo.Imm != 0) { - // If we make it this far we have a load with an 32-bit immediate offset. - // It is OK to select this using a sgpr offset, because we have already - // failed trying to select this load into one of the _IMM variants since - // the _IMM Patterns are considered before the _SGPR patterns. - Register OffsetReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass); - BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), OffsetReg) - .addImm(GEPInfo.Imm); - return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); }, - [=](MachineInstrBuilder &MIB) { MIB.addReg(OffsetReg); }}}; - } + return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(Base); }, + [=](MachineInstrBuilder &MIB) { MIB.addReg(SOffset); }}}; +} - if (AddrInfo[0].SgprParts.size() == 2 && GEPInfo.Imm == 0) { - if (Register OffsetReg = - matchZeroExtendFromS32(*MRI, GEPInfo.SgprParts[1])) { - return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); }, - [=](MachineInstrBuilder &MIB) { MIB.addReg(OffsetReg); }}}; - } - } +InstructionSelector::ComplexRendererFns +AMDGPUInstructionSelector::selectSmrdSgprImm(MachineOperand &Root) const { + Register Base, SOffset; + int64_t Offset; + if (!selectSmrdOffset(Root, Base, &SOffset, &Offset)) + return None; - return None; + return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(Base); }, + [=](MachineInstrBuilder &MIB) { MIB.addReg(SOffset); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset); }}}; } std::pair<Register, int> diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h index 22672ba59e76..5baf55d23480 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -63,11 +63,9 @@ public: private: struct GEPInfo { - const MachineInstr &GEP; SmallVector<unsigned, 2> SgprParts; SmallVector<unsigned, 2> VgprParts; - int64_t Imm; - GEPInfo(const MachineInstr &GEP) : GEP(GEP), Imm(0) { } + int64_t Imm = 0; }; bool isSGPR(Register Reg) const; @@ -200,12 +198,16 @@ private: InstructionSelector::ComplexRendererFns selectVINTERPModsHi(MachineOperand &Root) const; + bool selectSmrdOffset(MachineOperand &Root, Register &Base, Register *SOffset, + int64_t *Offset) const; InstructionSelector::ComplexRendererFns selectSmrdImm(MachineOperand &Root) const; InstructionSelector::ComplexRendererFns selectSmrdImm32(MachineOperand &Root) const; InstructionSelector::ComplexRendererFns selectSmrdSgpr(MachineOperand &Root) const; + InstructionSelector::ComplexRendererFns + selectSmrdSgprImm(MachineOperand &Root) const; std::pair<Register, int> selectFlatOffsetImpl(MachineOperand &Root, uint64_t FlatVariant) const; diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 01a3e78ea48c..0979debe9777 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -4197,6 +4197,35 @@ bool AMDGPULegalizerInfo::legalizeImplicitArgPtr(MachineInstr &MI, return true; } +bool AMDGPULegalizerInfo::getLDSKernelId(Register DstReg, + MachineRegisterInfo &MRI, + MachineIRBuilder &B) const { + Function &F = B.getMF().getFunction(); + Optional<uint32_t> KnownSize = + AMDGPUMachineFunction::getLDSKernelIdMetadata(F); + if (KnownSize.has_value()) + B.buildConstant(DstReg, KnownSize.value()); + return false; +} + +bool AMDGPULegalizerInfo::legalizeLDSKernelId(MachineInstr &MI, + MachineRegisterInfo &MRI, + MachineIRBuilder &B) const { + + const SIMachineFunctionInfo *MFI = B.getMF().getInfo<SIMachineFunctionInfo>(); + if (!MFI->isEntryFunction()) { + return legalizePreloadedArgIntrin(MI, MRI, B, + AMDGPUFunctionArgInfo::LDS_KERNEL_ID); + } + + Register DstReg = MI.getOperand(0).getReg(); + if (!getLDSKernelId(DstReg, MRI, B)) + return false; + + MI.eraseFromParent(); + return true; +} + bool AMDGPULegalizerInfo::legalizeIsAddrSpace(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B, @@ -5636,6 +5665,9 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, case Intrinsic::amdgcn_workgroup_id_z: return legalizePreloadedArgIntrin(MI, MRI, B, AMDGPUFunctionArgInfo::WORKGROUP_ID_Z); + case Intrinsic::amdgcn_lds_kernel_id: + return legalizePreloadedArgIntrin(MI, MRI, B, + AMDGPUFunctionArgInfo::LDS_KERNEL_ID); case Intrinsic::amdgcn_dispatch_ptr: return legalizePreloadedArgIntrin(MI, MRI, B, AMDGPUFunctionArgInfo::DISPATCH_PTR); diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h index cee533aa34ec..5e8111e22aad 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h @@ -155,6 +155,13 @@ public: bool legalizeImplicitArgPtr(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const; + + bool getLDSKernelId(Register DstReg, MachineRegisterInfo &MRI, + MachineIRBuilder &B) const; + + bool legalizeLDSKernelId(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B) const; + bool legalizeIsAddrSpace(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B, unsigned AddrSpace) const; diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp index 78e092b2e872..7e49a6117ebd 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp @@ -376,15 +376,7 @@ static bool HasNative(AMDGPULibFunc::EFuncId id) { return false; } -struct TableRef { - size_t size; - const TableEntry *table; // variable size: from 0 to (size - 1) - - TableRef() : size(0), table(nullptr) {} - - template <size_t N> - TableRef(const TableEntry (&tbl)[N]) : size(N), table(&tbl[0]) {} -}; +using TableRef = ArrayRef<TableEntry>; static TableRef getOptTable(AMDGPULibFunc::EFuncId id) { switch(id) { @@ -698,11 +690,10 @@ bool AMDGPULibCalls::fold(CallInst *CI, AliasAnalysis *AA) { bool AMDGPULibCalls::TDOFold(CallInst *CI, const FuncInfo &FInfo) { // Table-Driven optimization const TableRef tr = getOptTable(FInfo.getId()); - if (tr.size==0) + if (tr.empty()) return false; - int const sz = (int)tr.size; - const TableEntry * const ftbl = tr.table; + int const sz = (int)tr.size(); Value *opr0 = CI->getArgOperand(0); if (getVecSize(FInfo) > 1) { @@ -714,8 +705,8 @@ bool AMDGPULibCalls::TDOFold(CallInst *CI, const FuncInfo &FInfo) { assert(eltval && "Non-FP arguments in math function!"); bool found = false; for (int i=0; i < sz; ++i) { - if (eltval->isExactlyValue(ftbl[i].input)) { - DVal.push_back(ftbl[i].result); + if (eltval->isExactlyValue(tr[i].input)) { + DVal.push_back(tr[i].result); found = true; break; } @@ -746,8 +737,8 @@ bool AMDGPULibCalls::TDOFold(CallInst *CI, const FuncInfo &FInfo) { // Scalar version if (ConstantFP *CF = dyn_cast<ConstantFP>(opr0)) { for (int i = 0; i < sz; ++i) { - if (CF->isExactlyValue(ftbl[i].input)) { - Value *nval = ConstantFP::get(CF->getType(), ftbl[i].result); + if (CF->isExactlyValue(tr[i].input)) { + Value *nval = ConstantFP::get(CF->getType(), tr[i].result); LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n"); replaceCall(nval); return true; diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp index 35922341de26..b4a8766d682e 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp @@ -55,21 +55,6 @@ static cl::opt<bool> SuperAlignLDSGlobals( cl::init(true), cl::Hidden); namespace { - -SmallPtrSet<GlobalValue *, 32> getUsedList(Module &M) { - SmallPtrSet<GlobalValue *, 32> UsedList; - - SmallVector<GlobalValue *, 32> TmpVec; - collectUsedGlobalVariables(M, TmpVec, true); - UsedList.insert(TmpVec.begin(), TmpVec.end()); - - TmpVec.clear(); - collectUsedGlobalVariables(M, TmpVec, false); - UsedList.insert(TmpVec.begin(), TmpVec.end()); - - return UsedList; -} - class AMDGPULowerModuleLDS : public ModulePass { static void removeFromUsedList(Module &M, StringRef Name, @@ -153,9 +138,6 @@ class AMDGPULowerModuleLDS : public ModulePass { ""); } -private: - SmallPtrSet<GlobalValue *, 32> UsedList; - public: static char ID; @@ -165,9 +147,10 @@ public: bool runOnModule(Module &M) override { CallGraph CG = CallGraph(M); - UsedList = getUsedList(M); bool Changed = superAlignLDSGlobals(M); - Changed |= processUsedLDS(CG, M); + std::vector<GlobalVariable *> ModuleScopeVariables = + AMDGPU::findVariablesToLower(M, nullptr); + Changed |= processUsedLDS(CG, M, ModuleScopeVariables); for (Function &F : M.functions()) { if (F.isDeclaration()) @@ -176,10 +159,11 @@ public: // Only lower compute kernels' LDS. if (!AMDGPU::isKernel(F.getCallingConv())) continue; - Changed |= processUsedLDS(CG, M, &F); + std::vector<GlobalVariable *> KernelUsedVariables = + AMDGPU::findVariablesToLower(M, &F); + Changed |= processUsedLDS(CG, M, KernelUsedVariables, &F); } - UsedList.clear(); return Changed; } @@ -228,22 +212,20 @@ private: return Changed; } - bool processUsedLDS(CallGraph const &CG, Module &M, Function *F = nullptr) { + bool processUsedLDS(CallGraph const &CG, Module &M, + std::vector<GlobalVariable *> const &LDSVarsToTransform, + Function *F = nullptr) { LLVMContext &Ctx = M.getContext(); const DataLayout &DL = M.getDataLayout(); - // Find variables to move into new struct instance - std::vector<GlobalVariable *> FoundLocalVars = - AMDGPU::findVariablesToLower(M, F); - - if (FoundLocalVars.empty()) { + if (LDSVarsToTransform.empty()) { // No variables to rewrite, no changes made. return false; } SmallVector<OptimizedStructLayoutField, 8> LayoutFields; - LayoutFields.reserve(FoundLocalVars.size()); - for (GlobalVariable *GV : FoundLocalVars) { + LayoutFields.reserve(LDSVarsToTransform.size()); + for (GlobalVariable *GV : LDSVarsToTransform) { OptimizedStructLayoutField F(GV, DL.getTypeAllocSize(GV->getValueType()), AMDGPU::getAlign(DL, GV)); LayoutFields.emplace_back(F); @@ -252,7 +234,7 @@ private: performOptimizedStructLayout(LayoutFields); std::vector<GlobalVariable *> LocalVars; - LocalVars.reserve(FoundLocalVars.size()); // will be at least this large + LocalVars.reserve(LDSVarsToTransform.size()); // will be at least this large { // This usually won't need to insert any padding, perhaps avoid the alloc uint64_t CurrentOffset = 0; @@ -352,7 +334,6 @@ private: GV->replaceAllUsesWith(GEP); } if (GV->use_empty()) { - UsedList.erase(GV); GV->eraseFromParent(); } diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp index b461c3c4bfdc..f5e12fd960d0 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp @@ -11,6 +11,7 @@ #include "AMDGPUPerfHintAnalysis.h" #include "AMDGPUSubtarget.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/IR/Constants.h" #include "llvm/Target/TargetMachine.h" using namespace llvm; @@ -101,6 +102,21 @@ void AMDGPUMachineFunction::allocateModuleLDSGlobal(const Function &F) { } } +Optional<uint32_t> +AMDGPUMachineFunction::getLDSKernelIdMetadata(const Function &F) { + auto MD = F.getMetadata("llvm.amdgcn.lds.kernel.id"); + if (MD && MD->getNumOperands() == 1) { + ConstantInt *KnownSize = mdconst::extract<ConstantInt>(MD->getOperand(0)); + if (KnownSize) { + uint64_t V = KnownSize->getZExtValue(); + if (V <= UINT32_MAX) { + return V; + } + } + } + return {}; +} + void AMDGPUMachineFunction::setDynLDSAlign(const DataLayout &DL, const GlobalVariable &GV) { assert(DL.getTypeAllocSize(GV.getValueType()).isZero()); diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h index df62c2314617..97db8b7eb8d6 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h @@ -11,11 +11,12 @@ #include "Utils/AMDGPUBaseInfo.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Optional.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/Function.h" namespace llvm { @@ -104,6 +105,8 @@ public: unsigned allocateLDSGlobal(const DataLayout &DL, const GlobalVariable &GV); void allocateModuleLDSGlobal(const Function &F); + static Optional<uint32_t> getLDSKernelIdMetadata(const Function &F); + Align getDynLDSAlign() const { return DynLDSAlign; } void setDynLDSAlign(const DataLayout &DL, const GlobalVariable &GV); diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp index 09dbd2150db6..a9f1e9bd0996 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp @@ -74,10 +74,10 @@ public: private: struct MemAccessInfo { - const Value *V; - const Value *Base; - int64_t Offset; - MemAccessInfo() : V(nullptr), Base(nullptr), Offset(0) {} + const Value *V = nullptr; + const Value *Base = nullptr; + int64_t Offset = 0; + MemAccessInfo() = default; bool isLargeStride(MemAccessInfo &Reference) const; #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) Printable print() const { @@ -116,6 +116,7 @@ private: bool isGlobalAddr(const Value *V) const; bool isLocalAddr(const Value *V) const; + bool isGlobalLoadUsedInBB(const Instruction &) const; }; static std::pair<const Value *, const Type *> getMemoryInstrPtrAndType( @@ -196,6 +197,24 @@ bool AMDGPUPerfHint::isIndirectAccess(const Instruction *Inst) const { return false; } +// Returns true if the global load `I` is used in its own basic block. +bool AMDGPUPerfHint::isGlobalLoadUsedInBB(const Instruction &I) const { + const auto *Ld = dyn_cast<LoadInst>(&I); + if (!Ld) + return false; + if (!isGlobalAddr(Ld->getPointerOperand())) + return false; + + for (const User *Usr : Ld->users()) { + if (const Instruction *UsrInst = dyn_cast<Instruction>(Usr)) { + if (UsrInst->getParent() == I.getParent()) + return true; + } + } + + return false; +} + AMDGPUPerfHintAnalysis::FuncInfo *AMDGPUPerfHint::visit(const Function &F) { AMDGPUPerfHintAnalysis::FuncInfo &FI = FIM[&F]; @@ -203,9 +222,14 @@ AMDGPUPerfHintAnalysis::FuncInfo *AMDGPUPerfHint::visit(const Function &F) { for (auto &B : F) { LastAccess = MemAccessInfo(); + unsigned UsedGlobalLoadsInBB = 0; for (auto &I : B) { if (const Type *Ty = getMemoryInstrPtrAndType(&I).second) { unsigned Size = divideCeil(Ty->getPrimitiveSizeInBits(), 32); + // TODO: Check if the global load and its user are close to each other + // instead (Or do this analysis in GCNSchedStrategy?). + if (isGlobalLoadUsedInBB(I)) + UsedGlobalLoadsInBB += Size; if (isIndirectAccess(&I)) FI.IAMInstCost += Size; if (isLargeStride(&I)) @@ -245,6 +269,16 @@ AMDGPUPerfHintAnalysis::FuncInfo *AMDGPUPerfHint::visit(const Function &F) { ++FI.InstCost; } } + + if (!FI.HasDenseGlobalMemAcc) { + unsigned GlobalMemAccPercentage = UsedGlobalLoadsInBB * 100 / B.size(); + if (GlobalMemAccPercentage > 50) { + LLVM_DEBUG(dbgs() << "[HasDenseGlobalMemAcc] Set to true since " + << B.getName() << " has " << GlobalMemAccPercentage + << "% global memory access\n"); + FI.HasDenseGlobalMemAcc = true; + } + } } return &FI; @@ -286,6 +320,11 @@ bool AMDGPUPerfHint::runOnFunction(Function &F) { } bool AMDGPUPerfHint::isMemBound(const AMDGPUPerfHintAnalysis::FuncInfo &FI) { + // Reverting optimal scheduling in favour of occupancy with basic block(s) + // having dense global memory access can potentially hurt performance. + if (FI.HasDenseGlobalMemAcc) + return true; + return FI.MemInstCost * 100 / FI.InstCost > MemBoundThresh; } diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h index 31ff80f5f431..2db8db6957ce 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h @@ -41,7 +41,11 @@ public: unsigned InstCost; unsigned IAMInstCost; // Indirect access memory instruction count unsigned LSMInstCost; // Large stride memory instruction count - FuncInfo() : MemInstCost(0), InstCost(0), IAMInstCost(0), LSMInstCost(0) {} + bool HasDenseGlobalMemAcc; // Set if at least 1 basic block has relatively + // high global memory access + FuncInfo() + : MemInstCost(0), InstCost(0), IAMInstCost(0), LSMInstCost(0), + HasDenseGlobalMemAcc(false) {} }; typedef ValueMap<const Function*, FuncInfo> FuncInfoMap; diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp index 0df6f4d45b06..bd8e568213b7 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp @@ -153,7 +153,10 @@ bool AMDGPURegBankCombinerHelper::matchIntMinMaxToMed3( if (!isVgprRegBank(Dst)) return false; - if (MRI.getType(Dst).isVector()) + // med3 for i16 is only available on gfx9+, and not available for v2i16. + LLT Ty = MRI.getType(Dst); + if ((Ty != LLT::scalar(16) || !Subtarget.hasMed3_16()) && + Ty != LLT::scalar(32)) return false; MinMaxMedOpc OpcodeTriple = getMinMaxPair(MI.getOpcode()); diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 0830cbd919a0..887341e67454 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -4426,7 +4426,15 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case Intrinsic::amdgcn_mfma_i32_16x16x32_i8: case Intrinsic::amdgcn_mfma_i32_32x32x16_i8: case Intrinsic::amdgcn_mfma_f32_16x16x8_xf32: - case Intrinsic::amdgcn_mfma_f32_32x32x4_xf32: { + case Intrinsic::amdgcn_mfma_f32_32x32x4_xf32: + case Intrinsic::amdgcn_mfma_f32_16x16x32_bf8_bf8: + case Intrinsic::amdgcn_mfma_f32_16x16x32_bf8_fp8: + case Intrinsic::amdgcn_mfma_f32_16x16x32_fp8_bf8: + case Intrinsic::amdgcn_mfma_f32_16x16x32_fp8_fp8: + case Intrinsic::amdgcn_mfma_f32_32x32x16_bf8_bf8: + case Intrinsic::amdgcn_mfma_f32_32x32x16_bf8_fp8: + case Intrinsic::amdgcn_mfma_f32_32x32x16_fp8_bf8: + case Intrinsic::amdgcn_mfma_f32_32x32x16_fp8_fp8: { // Default for MAI intrinsics. // srcC can also be an immediate which can be folded later. // FIXME: Should we eventually add an alternative mapping with AGPR src @@ -4451,7 +4459,15 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16: case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16: case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8: - case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8: { + case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8: + case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8: + case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8: + case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8: + case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8: + case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8: + case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8: + case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8: + case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8: { // vdst, srcA, srcB, srcC, idx OpdsMapping[0] = getAGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI); OpdsMapping[2] = getVGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI); diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUReplaceLDSUseWithPointer.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUReplaceLDSUseWithPointer.cpp index 4d7a3f4028e8..aa51c5d20bdc 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUReplaceLDSUseWithPointer.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUReplaceLDSUseWithPointer.cpp @@ -141,7 +141,7 @@ class ReplaceLDSUseImpl { std::vector<GlobalVariable *> collectLDSRequiringPointerReplace() { // Collect LDS which requires module lowering. std::vector<GlobalVariable *> LDSGlobals = - llvm::AMDGPU::findVariablesToLower(M); + llvm::AMDGPU::findVariablesToLower(M, nullptr); // Remove LDS which don't qualify for replacement. llvm::erase_if(LDSGlobals, [&](GlobalVariable *GV) { diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td index 8297635d7bb2..5d7bade00a3e 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td @@ -340,12 +340,28 @@ def : SourceOfDivergence<int_amdgcn_mfma_i32_16x16x32_i8>; def : SourceOfDivergence<int_amdgcn_mfma_i32_32x32x16_i8>; def : SourceOfDivergence<int_amdgcn_mfma_f32_16x16x8_xf32>; def : SourceOfDivergence<int_amdgcn_mfma_f32_32x32x4_xf32>; +def : SourceOfDivergence<int_amdgcn_mfma_f32_16x16x32_bf8_bf8>; +def : SourceOfDivergence<int_amdgcn_mfma_f32_16x16x32_bf8_fp8>; +def : SourceOfDivergence<int_amdgcn_mfma_f32_16x16x32_fp8_bf8>; +def : SourceOfDivergence<int_amdgcn_mfma_f32_16x16x32_fp8_fp8>; +def : SourceOfDivergence<int_amdgcn_mfma_f32_32x32x16_bf8_bf8>; +def : SourceOfDivergence<int_amdgcn_mfma_f32_32x32x16_bf8_fp8>; +def : SourceOfDivergence<int_amdgcn_mfma_f32_32x32x16_fp8_bf8>; +def : SourceOfDivergence<int_amdgcn_mfma_f32_32x32x16_fp8_fp8>; def : SourceOfDivergence<int_amdgcn_smfmac_f32_16x16x32_f16>; def : SourceOfDivergence<int_amdgcn_smfmac_f32_32x32x16_f16>; def : SourceOfDivergence<int_amdgcn_smfmac_f32_16x16x32_bf16>; def : SourceOfDivergence<int_amdgcn_smfmac_f32_32x32x16_bf16>; def : SourceOfDivergence<int_amdgcn_smfmac_i32_16x16x64_i8>; def : SourceOfDivergence<int_amdgcn_smfmac_i32_32x32x32_i8>; +def : SourceOfDivergence<int_amdgcn_smfmac_f32_16x16x64_bf8_bf8>; +def : SourceOfDivergence<int_amdgcn_smfmac_f32_16x16x64_bf8_fp8>; +def : SourceOfDivergence<int_amdgcn_smfmac_f32_16x16x64_fp8_bf8>; +def : SourceOfDivergence<int_amdgcn_smfmac_f32_16x16x64_fp8_fp8>; +def : SourceOfDivergence<int_amdgcn_smfmac_f32_32x32x32_bf8_bf8>; +def : SourceOfDivergence<int_amdgcn_smfmac_f32_32x32x32_bf8_fp8>; +def : SourceOfDivergence<int_amdgcn_smfmac_f32_32x32x32_fp8_bf8>; +def : SourceOfDivergence<int_amdgcn_smfmac_f32_32x32x32_fp8_fp8>; // The dummy boolean output is divergent from the IR's perspective, // but the mask results are uniform. These produce a divergent and diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index 6bd906439ee8..cf4826d81b4b 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -736,13 +736,18 @@ static unsigned getMaxNumPreloadedSGPRs() { 2 + // dispatch ID 2 + // flat scratch init 2; // Implicit buffer ptr + // Max number of system SGPRs unsigned MaxSystemSGPRs = 1 + // WorkGroupIDX 1 + // WorkGroupIDY 1 + // WorkGroupIDZ 1 + // WorkGroupInfo 1; // private segment wave byte offset - return MaxUserSGPRs + MaxSystemSGPRs; + + // Max number of synthetic SGPRs + unsigned SyntheticSGPRs = 1; // LDSKernelId + + return MaxUserSGPRs + MaxSystemSGPRs + SyntheticSGPRs; } unsigned GCNSubtarget::getMaxNumSGPRs(const Function &F) const { @@ -852,34 +857,6 @@ struct FillMFMAShadowMutation : ScheduleDAGMutation { return MI && TII->isVALU(*MI); } - bool canAddEdge(const SUnit *Succ, const SUnit *Pred) const { - if (Pred->NodeNum < Succ->NodeNum) - return true; - - SmallVector<const SUnit*, 64> Succs({Succ}), Preds({Pred}); - - for (unsigned I = 0; I < Succs.size(); ++I) { - for (const SDep &SI : Succs[I]->Succs) { - const SUnit *SU = SI.getSUnit(); - if (SU != Succs[I] && !llvm::is_contained(Succs, SU)) - Succs.push_back(SU); - } - } - - SmallPtrSet<const SUnit*, 32> Visited; - while (!Preds.empty()) { - const SUnit *SU = Preds.pop_back_val(); - if (llvm::is_contained(Succs, SU)) - return false; - Visited.insert(SU); - for (const SDep &SI : SU->Preds) - if (SI.getSUnit() != SU && !Visited.count(SI.getSUnit())) - Preds.push_back(SI.getSUnit()); - } - - return true; - } - // Link as many SALU instructions in chain as possible. Return the size // of the chain. Links up to MaxChain instructions. unsigned linkSALUChain(SUnit *From, SUnit *To, unsigned MaxChain, @@ -895,18 +872,20 @@ struct FillMFMAShadowMutation : ScheduleDAGMutation { LLVM_DEBUG(dbgs() << "Inserting edge from\n" ; DAG->dumpNode(*From); dbgs() << "to\n"; DAG->dumpNode(*SU); dbgs() << '\n'); - if (SU->addPred(SDep(From, SDep::Artificial), false)) - ++Linked; + if (SU != From && From != &DAG->ExitSU && DAG->canAddEdge(SU, From)) + if (DAG->addEdge(SU, SDep(From, SDep::Artificial))) + ++Linked; for (SDep &SI : From->Succs) { SUnit *SUv = SI.getSUnit(); - if (SUv != From && isVALU(SUv) && canAddEdge(SUv, SU)) - SUv->addPred(SDep(SU, SDep::Artificial), false); + if (SUv != From && SU != &DAG->ExitSU && isVALU(SUv) && + DAG->canAddEdge(SUv, SU)) + DAG->addEdge(SUv, SDep(SU, SDep::Artificial)); } for (SDep &SI : SU->Succs) { SUnit *Succ = SI.getSUnit(); - if (Succ != SU && isSALU(Succ) && canAddEdge(From, Succ)) + if (Succ != SU && isSALU(Succ)) Worklist.push_back(Succ); } } @@ -949,7 +928,8 @@ struct FillMFMAShadowMutation : ScheduleDAGMutation { if (Visited.count(&*LastSALU)) continue; - if (!isSALU(&*LastSALU) || !canAddEdge(&*LastSALU, &SU)) + if (&SU == &DAG->ExitSU || &SU == &*LastSALU || !isSALU(&*LastSALU) || + !DAG->canAddEdge(&*LastSALU, &SU)) continue; Lat -= linkSALUChain(&SU, &*LastSALU, Lat, Visited); diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 971e44723758..dca926867300 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -1584,6 +1584,9 @@ bool GCNTargetMachine::parseMachineFunctionInfo( parseAndCheckArgument(YamlMFI.ArgInfo->PrivateSegmentSize, AMDGPU::SGPR_32RegClass, MFI->ArgInfo.PrivateSegmentSize, 0, 0) || + parseAndCheckArgument(YamlMFI.ArgInfo->LDSKernelId, + AMDGPU::SGPR_32RegClass, + MFI->ArgInfo.LDSKernelId, 0, 1) || parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupIDX, AMDGPU::SGPR_32RegClass, MFI->ArgInfo.WorkGroupIDX, 0, 1) || diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index e12d0ffef35c..2a9393fc1595 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -1739,6 +1739,8 @@ public: void cvtVOP3(MCInst &Inst, const OperandVector &Operands); void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); void cvtVOPD(MCInst &Inst, const OperandVector &Operands); + void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands, + OptionalImmIndexMap &OptionalIdx); void cvtVOP3P(MCInst &Inst, const OperandVector &Operands, OptionalImmIndexMap &OptionalIdx); @@ -1767,21 +1769,11 @@ public: void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } - void cvtVOPCNoDstDPP(MCInst &Inst, const OperandVector &Operands, - bool IsDPP8 = false); - void cvtVOPCNoDstDPP8(MCInst &Inst, const OperandVector &Operands) { - cvtVOPCNoDstDPP(Inst, Operands, true); - } void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) { cvtVOP3DPP(Inst, Operands, true); } - void cvtVOPC64NoDstDPP(MCInst &Inst, const OperandVector &Operands, - bool IsDPP8 = false); - void cvtVOPC64NoDstDPP8(MCInst &Inst, const OperandVector &Operands) { - cvtVOPC64NoDstDPP(Inst, Operands, true); - } OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, AMDGPUOperand::ImmTy Type); @@ -4177,7 +4169,9 @@ bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { return false; } - if (isGFX940() && (MII.get(Opc).TSFlags & SIInstrFlags::IsDOT)) { + uint64_t TSFlags = MII.get(Opc).TSFlags; + + if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) { int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); if (OpSelIdx != -1) { if (Inst.getOperand(OpSelIdx).getImm() != 0) @@ -4190,6 +4184,15 @@ bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { } } + // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot). + if ((TSFlags & SIInstrFlags::IsDOT) && (TSFlags & SIInstrFlags::VOP3) && + !(TSFlags & SIInstrFlags::VOP3P)) { + int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); + unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); + if (OpSel & 3) + return false; + } + return true; } @@ -4636,9 +4639,6 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, Error(IDLoc, "ABS not allowed in VOP3B instructions"); return false; } - if (!validateCoherencyBits(Inst, Operands, IDLoc)) { - return false; - } if (!validateExeczVcczOperands(Operands)) { return false; } @@ -5004,6 +5004,9 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { PARSE_BITS_ENTRY(KD.kernel_code_properties, KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, Val, ValRange); + } else if (ID == ".amdhsa_uses_dynamic_stack") { + PARSE_BITS_ENTRY(KD.kernel_code_properties, + KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, Val, ValRange); } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { if (hasArchitectedFlatScratch()) return Error(IDRange.Start, @@ -8024,10 +8027,13 @@ OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) return MatchOperand_NoMatch; } -void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { - cvtVOP3P(Inst, Operands); - +// Determines which bit DST_OP_SEL occupies in the op_sel operand according to +// the number of src operands present, then copies that bit into src0_modifiers. +void cvtVOP3DstOpSelOnly(MCInst &Inst) { int Opc = Inst.getOpcode(); + int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); + if (OpSelIdx == -1) + return; int SrcNum; const int Ops[] = { AMDGPU::OpName::src0, @@ -8038,7 +8044,6 @@ void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) ++SrcNum); assert(SrcNum > 0); - int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); if ((OpSel & (1 << SrcNum)) != 0) { @@ -8048,6 +8053,18 @@ void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) } } +void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, + const OperandVector &Operands) { + cvtVOP3P(Inst, Operands); + cvtVOP3DstOpSelOnly(Inst); +} + +void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands, + OptionalImmIndexMap &OptionalIdx) { + cvtVOP3P(Inst, Operands, OptionalIdx); + cvtVOP3DstOpSelOnly(Inst); +} + static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { // 1. This operand is input modifiers return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS @@ -8241,6 +8258,12 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; + if (Opc == AMDGPU::V_CVT_SR_BF8_F32_vi || + Opc == AMDGPU::V_CVT_SR_FP8_F32_vi) { + Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods + Inst.addOperand(Inst.getOperand(0)); + } + if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { assert(!IsPacked); Inst.addOperand(Inst.getOperand(0)); @@ -8747,14 +8770,6 @@ AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); } -// Add dummy $old operand -void AMDGPUAsmParser::cvtVOPC64NoDstDPP(MCInst &Inst, - const OperandVector &Operands, - bool IsDPP8) { - Inst.addOperand(MCOperand::createReg(0)); - cvtVOP3DPP(Inst, Operands, IsDPP8); -} - void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { OptionalImmIndexMap OptionalIdx; unsigned Opc = Inst.getOpcode(); @@ -8802,6 +8817,8 @@ void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, bo } if (Desc.TSFlags & SIInstrFlags::VOP3P) cvtVOP3P(Inst, Operands, OptionalIdx); + else if (Desc.TSFlags & SIInstrFlags::VOP3) + cvtVOP3OpSel(Inst, Operands, OptionalIdx); else if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel) != -1) { addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel); } @@ -8821,14 +8838,6 @@ void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, bo } } -// Add dummy $old operand -void AMDGPUAsmParser::cvtVOPCNoDstDPP(MCInst &Inst, - const OperandVector &Operands, - bool IsDPP8) { - Inst.addOperand(MCOperand::createReg(0)); - cvtDPP(Inst, Operands, IsDPP8); -} - void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { OptionalImmIndexMap OptionalIdx; @@ -9043,12 +9052,27 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments switch (BasicInstType) { case SIInstrFlags::VOP1: - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); - if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); + if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), + AMDGPU::OpName::clamp) != -1) { + addOptionalImmOperand(Inst, Operands, OptionalIdx, + AMDGPUOperand::ImmTyClampSI, 0); + } + if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), + AMDGPU::OpName::omod) != -1) { + addOptionalImmOperand(Inst, Operands, OptionalIdx, + AMDGPUOperand::ImmTyOModSI, 0); + } + if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), + AMDGPU::OpName::dst_sel) != -1) { + addOptionalImmOperand(Inst, Operands, OptionalIdx, + AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); + } + if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), + AMDGPU::OpName::dst_unused) != -1) { + addOptionalImmOperand(Inst, Operands, OptionalIdx, + AMDGPUOperand::ImmTySdwaDstUnused, + DstUnused::UNUSED_PRESERVE); } - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); break; diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index ccaf646008b1..98ee720200b4 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -451,7 +451,11 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3P) convertVOP3PDPPInst(MI); else if (AMDGPU::isVOPC64DPP(MI.getOpcode())) - convertVOPCDPPInst(MI); + convertVOPCDPPInst(MI); // Special VOP3 case + else { + assert(MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3); + convertVOP3DPPInst(MI); // Regular VOP3 case + } break; } Res = tryDecodeInst(DecoderTableGFX1196, MI, DecW, Address); @@ -745,6 +749,43 @@ DecodeStatus AMDGPUDisassembler::convertSDWAInst(MCInst &MI) const { return MCDisassembler::Success; } +struct VOPModifiers { + unsigned OpSel = 0; + unsigned OpSelHi = 0; + unsigned NegLo = 0; + unsigned NegHi = 0; +}; + +// Reconstruct values of VOP3/VOP3P operands such as op_sel. +// Note that these values do not affect disassembler output, +// so this is only necessary for consistency with src_modifiers. +static VOPModifiers collectVOPModifiers(const MCInst &MI, + bool IsVOP3P = false) { + VOPModifiers Modifiers; + unsigned Opc = MI.getOpcode(); + const int ModOps[] = {AMDGPU::OpName::src0_modifiers, + AMDGPU::OpName::src1_modifiers, + AMDGPU::OpName::src2_modifiers}; + for (int J = 0; J < 3; ++J) { + int OpIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); + if (OpIdx == -1) + continue; + + unsigned Val = MI.getOperand(OpIdx).getImm(); + + Modifiers.OpSel |= !!(Val & SISrcMods::OP_SEL_0) << J; + if (IsVOP3P) { + Modifiers.OpSelHi |= !!(Val & SISrcMods::OP_SEL_1) << J; + Modifiers.NegLo |= !!(Val & SISrcMods::NEG) << J; + Modifiers.NegHi |= !!(Val & SISrcMods::NEG_HI) << J; + } else if (J == 0) { + Modifiers.OpSel |= !!(Val & SISrcMods::DST_OP_SEL) << 3; + } + } + + return Modifiers; +} + // We must check FI == literal to reject not genuine dpp8 insts, and we must // first add optional MI operands to check FI DecodeStatus AMDGPUDisassembler::convertDPP8Inst(MCInst &MI) const { @@ -755,6 +796,11 @@ DecodeStatus AMDGPUDisassembler::convertDPP8Inst(MCInst &MI) const { } else if ((MCII->get(Opc).TSFlags & SIInstrFlags::VOPC) || AMDGPU::isVOPC64DPP(Opc)) { convertVOPCDPPInst(MI); + } else if (MI.getNumOperands() < DescNumOps && + AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel) != -1) { + auto Mods = collectVOPModifiers(MI); + insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSel), + AMDGPU::OpName::op_sel); } else { // Insert dummy unused src modifiers. if (MI.getNumOperands() < DescNumOps && @@ -770,6 +816,18 @@ DecodeStatus AMDGPUDisassembler::convertDPP8Inst(MCInst &MI) const { return isValidDPP8(MI) ? MCDisassembler::Success : MCDisassembler::SoftFail; } +DecodeStatus AMDGPUDisassembler::convertVOP3DPPInst(MCInst &MI) const { + unsigned Opc = MI.getOpcode(); + unsigned DescNumOps = MCII->get(Opc).getNumOperands(); + if (MI.getNumOperands() < DescNumOps && + AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel) != -1) { + auto Mods = collectVOPModifiers(MI); + insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSel), + AMDGPU::OpName::op_sel); + } + return MCDisassembler::Success; +} + // Note that before gfx10, the MIMG encoding provided no information about // VADDR size. Consequently, decoded instructions always show address as if it // has 1 dword, which could be not really so. @@ -914,45 +972,27 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const { DecodeStatus AMDGPUDisassembler::convertVOP3PDPPInst(MCInst &MI) const { unsigned Opc = MI.getOpcode(); unsigned DescNumOps = MCII->get(Opc).getNumOperands(); + auto Mods = collectVOPModifiers(MI, true); if (MI.getNumOperands() < DescNumOps && AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vdst_in); - const int ModOps[] = {AMDGPU::OpName::src0_modifiers, - AMDGPU::OpName::src1_modifiers, - AMDGPU::OpName::src2_modifiers}; - unsigned OpSel = 0; - unsigned OpSelHi = 0; - unsigned NegLo = 0; - unsigned NegHi = 0; - for (int J = 0; J < 3; ++J) { - int OpIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); - if (OpIdx == -1) - break; - unsigned Val = MI.getOperand(OpIdx).getImm(); - - OpSel |= !!(Val & SISrcMods::OP_SEL_0) << J; - OpSelHi |= !!(Val & SISrcMods::OP_SEL_1) << J; - NegLo |= !!(Val & SISrcMods::NEG) << J; - NegHi |= !!(Val & SISrcMods::NEG_HI) << J; - } - if (MI.getNumOperands() < DescNumOps && AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel) != -1) - insertNamedMCOperand(MI, MCOperand::createImm(OpSel), + insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSel), AMDGPU::OpName::op_sel); if (MI.getNumOperands() < DescNumOps && AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi) != -1) - insertNamedMCOperand(MI, MCOperand::createImm(OpSelHi), + insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSelHi), AMDGPU::OpName::op_sel_hi); if (MI.getNumOperands() < DescNumOps && AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo) != -1) - insertNamedMCOperand(MI, MCOperand::createImm(NegLo), + insertNamedMCOperand(MI, MCOperand::createImm(Mods.NegLo), AMDGPU::OpName::neg_lo); if (MI.getNumOperands() < DescNumOps && AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi) != -1) - insertNamedMCOperand(MI, MCOperand::createImm(NegHi), + insertNamedMCOperand(MI, MCOperand::createImm(Mods.NegHi), AMDGPU::OpName::neg_hi); return MCDisassembler::Success; @@ -2000,6 +2040,9 @@ AMDGPUDisassembler::decodeKernelDescriptorDirective( KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32); } + PRINT_DIRECTIVE(".amdhsa_uses_dynamic_stack", + KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK); + if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1) return MCDisassembler::Fail; diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h index 31869f0917ae..d17e2d8d5082 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h @@ -162,6 +162,7 @@ public: DecodeStatus convertSDWAInst(MCInst &MI) const; DecodeStatus convertDPP8Inst(MCInst &MI) const; DecodeStatus convertMIMGInst(MCInst &MI) const; + DecodeStatus convertVOP3DPPInst(MCInst &MI) const; DecodeStatus convertVOP3PDPPInst(MCInst &MI) const; DecodeStatus convertVOPCDPPInst(MCInst &MI) const; diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp index 5d254518c67a..4558ddf6dbfe 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp @@ -202,6 +202,19 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI, LLVM_DEBUG(dbgs() << " failed: no DPP opcode\n"); return nullptr; } + int OrigOpE32 = AMDGPU::getVOPe32(OrigOp); + // Prior checks cover Mask with VOPC condition, but not on purpose + auto *RowMaskOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask); + assert(RowMaskOpnd && RowMaskOpnd->isImm()); + auto *BankMaskOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask); + assert(BankMaskOpnd && BankMaskOpnd->isImm()); + const bool MaskAllLanes = + RowMaskOpnd->getImm() == 0xF && BankMaskOpnd->getImm() == 0xF; + (void)MaskAllLanes; + assert(MaskAllLanes || + !(TII->isVOPC(DPPOp) || + (TII->isVOP3(DPPOp) && OrigOpE32 != -1 && TII->isVOPC(OrigOpE32))) && + "VOPC cannot form DPP unless mask is full"); auto DPPInst = BuildMI(*OrigMI.getParent(), OrigMI, OrigMI.getDebugLoc(), TII->get(DPPOp)) @@ -234,6 +247,10 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI, DPPInst.addReg(CombOldVGPR.Reg, Def ? 0 : RegState::Undef, CombOldVGPR.SubReg); ++NumOperands; + } else if (TII->isVOPC(DPPOp) || (TII->isVOP3(DPPOp) && OrigOpE32 != -1 && + TII->isVOPC(OrigOpE32))) { + // VOPC DPP and VOPC promoted to VOP3 DPP do not have an old operand + // because they write to SGPRs not VGPRs } else { // TODO: this discards MAC/FMA instructions for now, let's add it later LLVM_DEBUG(dbgs() << " failed: no old operand in DPP instruction," diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNProcessors.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNProcessors.td index 281474994bca..6ff349e31f22 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNProcessors.td +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNProcessors.td @@ -249,11 +249,11 @@ def : ProcessorModel<"gfx1036", GFX10SpeedModel, //===----------------------------------------------------------------------===// def : ProcessorModel<"gfx1100", GFX11SpeedModel, - FeatureISAVersion11_0.Features + FeatureISAVersion11_0_0.Features >; def : ProcessorModel<"gfx1101", GFX11SpeedModel, - FeatureISAVersion11_0.Features + FeatureISAVersion11_0_1.Features >; def : ProcessorModel<"gfx1102", GFX11SpeedModel, diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp index 100410bb7644..04da14cc4916 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -733,7 +733,7 @@ void GCNScheduleDAGMILive::collectRematerializableInstructions() { MachineOperand *Op = MRI.getOneDef(Reg); MachineInstr *Def = Op->getParent(); - if (Op->getSubReg() != 0 || !isTriviallyReMaterializable(*Def, AA)) + if (Op->getSubReg() != 0 || !isTriviallyReMaterializable(*Def)) continue; MachineInstr *UseI = &*MRI.use_instr_nodbg_begin(Reg); @@ -943,9 +943,8 @@ bool GCNScheduleDAGMILive::sinkTriviallyRematInsts(const GCNSubtarget &ST, } // Copied from MachineLICM -bool GCNScheduleDAGMILive::isTriviallyReMaterializable(const MachineInstr &MI, - AAResults *AA) { - if (!TII->isTriviallyReMaterializable(MI, AA)) +bool GCNScheduleDAGMILive::isTriviallyReMaterializable(const MachineInstr &MI) { + if (!TII->isTriviallyReMaterializable(MI)) return false; for (const MachineOperand &MO : MI.operands()) diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h index 97f94f69b70e..c3db849cf81a 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h @@ -142,7 +142,7 @@ class GCNScheduleDAGMILive final : public ScheduleDAGMILive { // and single use outside the defining block into RematerializableInsts. void collectRematerializableInstructions(); - bool isTriviallyReMaterializable(const MachineInstr &MI, AAResults *AA); + bool isTriviallyReMaterializable(const MachineInstr &MI); // TODO: Should also attempt to reduce RP of SGPRs and AGPRs // Attempt to reduce RP of VGPR by sinking trivially rematerializable diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSubtarget.h index d269d0945f3b..d71f80c5f458 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -145,6 +145,7 @@ protected: bool HasDot7Insts = false; bool HasDot8Insts = false; bool HasMAIInsts = false; + bool HasFP8Insts = false; bool HasPkFmacF16Inst = false; bool HasAtomicFaddRtnInsts = false; bool HasAtomicFaddNoRtnInsts = false; @@ -721,6 +722,10 @@ public: return HasMAIInsts; } + bool hasFP8Insts() const { + return HasFP8Insts; + } + bool hasPkFmacF16Inst() const { return HasPkFmacF16Inst; } @@ -930,7 +935,7 @@ public: } bool hasUserSGPRInit16Bug() const { - return UserSGPRInit16Bug; + return UserSGPRInit16Bug && isWave32(); } bool hasNegativeScratchOffsetBug() const { return NegativeScratchOffsetBug; } diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp index bd938d829953..21ff2744e5b4 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp @@ -627,7 +627,7 @@ void AMDGPUInstPrinter::printWaitEXP(const MCInst *MI, unsigned OpNo, bool AMDGPUInstPrinter::needsImpliedVcc(const MCInstrDesc &Desc, unsigned OpNo) const { - return OpNo == 1 && (Desc.TSFlags & SIInstrFlags::DPP) && + return OpNo == 0 && (Desc.TSFlags & SIInstrFlags::DPP) && (Desc.TSFlags & SIInstrFlags::VOPC) && (Desc.hasImplicitDefOfPhysReg(AMDGPU::VCC) || Desc.hasImplicitDefOfPhysReg(AMDGPU::VCC_LO)); @@ -644,8 +644,7 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, // If there are printed modifiers, printOperandAndFPInputMods or // printOperandAndIntInputMods will be called instead if ((OpNo == 0 || - (OpNo == 1 && (Desc.TSFlags & SIInstrFlags::DPP)) || - (OpNo == 2 && (Desc.TSFlags & SIInstrFlags::DPP) && ModIdx != -1)) && + (OpNo == 1 && (Desc.TSFlags & SIInstrFlags::DPP) && ModIdx != -1)) && (Desc.TSFlags & SIInstrFlags::VOPC) && (Desc.hasImplicitDefOfPhysReg(AMDGPU::VCC) || Desc.hasImplicitDefOfPhysReg(AMDGPU::VCC_LO))) diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp index 078133469549..0e71509cf2bd 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -367,6 +367,8 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor( PRINT_FIELD(OS, ".amdhsa_wavefront_size32", KD, kernel_code_properties, amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32); + PRINT_FIELD(OS, ".amdhsa_uses_dynamic_stack", KD, kernel_code_properties, + amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK); PRINT_FIELD(OS, (hasArchitectedFlatScratch(STI) ? ".amdhsa_enable_private_segment" diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp index f54778535b7c..3e95c55df57e 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp @@ -67,6 +67,7 @@ #include "AMDGPU.h" #include "GCNSubtarget.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "llvm/ADT/SetOperations.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/InitializePasses.h" #include "llvm/Target/TargetMachine.h" @@ -81,9 +82,9 @@ static cl::opt<bool> EnableM0Merge( cl::init(true)); namespace { - class SIFixSGPRCopies : public MachineFunctionPass { MachineDominatorTree *MDT; + unsigned NextVGPRToSGPRCopyID; public: static char ID; @@ -92,9 +93,16 @@ public: const SIRegisterInfo *TRI; const SIInstrInfo *TII; - SIFixSGPRCopies() : MachineFunctionPass(ID) {} + SIFixSGPRCopies() : MachineFunctionPass(ID), NextVGPRToSGPRCopyID(0) {} bool runOnMachineFunction(MachineFunction &MF) override; + unsigned getNextVGPRToSGPRCopyId() { return ++NextVGPRToSGPRCopyID; } + void lowerVGPR2SGPRCopies(MachineFunction &MF); + // Handles copies which source register is: + // 1. Physical register + // 2. AGPR + // 3. Defined by the instruction the merely moves the immediate + bool lowerSpecialCase(MachineInstr &MI); MachineBasicBlock *processPHINode(MachineInstr &MI); @@ -569,6 +577,14 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { TII = ST.getInstrInfo(); MDT = &getAnalysis<MachineDominatorTree>(); + // We have to lower VGPR to SGPR copies before the main loop + // because the REG_SEQUENCE and PHI lowering in main loop + // convert the def-use chains to VALU and close the opportunities + // for keeping them scalar. + // TODO: REG_SEQENCE and PHIs are semantically copies. The next patch + // addresses their lowering and unify the processing in one main loop. + lowerVGPR2SGPRCopies(MF); + for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE; ++BI) { MachineBasicBlock *MBB = &*BI; @@ -640,42 +656,7 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { continue; } - if (isVGPRToSGPRCopy(SrcRC, DstRC, *TRI)) { - Register SrcReg = MI.getOperand(1).getReg(); - if (!SrcReg.isVirtual()) { - MachineBasicBlock *NewBB = TII->moveToVALU(MI, MDT); - if (NewBB && NewBB != MBB) { - MBB = NewBB; - E = MBB->end(); - BI = MachineFunction::iterator(MBB); - BE = MF.end(); - } - assert((!NewBB || NewBB == I->getParent()) && - "moveToVALU did not return the right basic block"); - break; - } - - MachineInstr *DefMI = MRI->getVRegDef(SrcReg); - unsigned SMovOp; - int64_t Imm; - // If we are just copying an immediate, we can replace the copy with - // s_mov_b32. - if (isSafeToFoldImmIntoCopy(&MI, DefMI, TII, SMovOp, Imm)) { - MI.getOperand(1).ChangeToImmediate(Imm); - MI.addImplicitDefUseOperands(MF); - MI.setDesc(TII->get(SMovOp)); - break; - } - MachineBasicBlock *NewBB = TII->moveToVALU(MI, MDT); - if (NewBB && NewBB != MBB) { - MBB = NewBB; - E = MBB->end(); - BI = MachineFunction::iterator(MBB); - BE = MF.end(); - } - assert((!NewBB || NewBB == I->getParent()) && - "moveToVALU did not return the right basic block"); - } else if (isSGPRToVGPRCopy(SrcRC, DstRC, *TRI)) { + if (isSGPRToVGPRCopy(SrcRC, DstRC, *TRI)) { tryChangeVGPRtoSGPRinCopy(MI, TRI, TII); } @@ -916,3 +897,269 @@ MachineBasicBlock *SIFixSGPRCopies::processPHINode(MachineInstr &MI) { } return CreatedBB; } + +bool SIFixSGPRCopies::lowerSpecialCase(MachineInstr &MI) { + MachineBasicBlock *MBB = MI.getParent(); + const TargetRegisterClass *SrcRC, *DstRC; + std::tie(SrcRC, DstRC) = getCopyRegClasses(MI, *TRI, *MRI); + + // We return true to indicate that no further processing needed + if (!isVGPRToSGPRCopy(SrcRC, DstRC, *TRI)) + return true; + + Register SrcReg = MI.getOperand(1).getReg(); + if (!SrcReg.isVirtual() || TRI->isAGPR(*MRI, SrcReg)) { + TII->moveToVALU(MI, MDT); + return true; + } + + unsigned SMovOp; + int64_t Imm; + // If we are just copying an immediate, we can replace the copy with + // s_mov_b32. + if (isSafeToFoldImmIntoCopy(&MI, MRI->getVRegDef(SrcReg), TII, SMovOp, Imm)) { + MI.getOperand(1).ChangeToImmediate(Imm); + MI.addImplicitDefUseOperands(*MBB->getParent()); + MI.setDesc(TII->get(SMovOp)); + return true; + } + return false; +} + +class V2SCopyInfo { +public: + // VGPR to SGPR copy being processed + MachineInstr *Copy; + // All SALU instructions reachable from this copy in SSA graph + DenseSet<MachineInstr *> SChain; + // Number of SGPR to VGPR copies that are used to put the SALU computation + // results back to VALU. + unsigned NumSVCopies; + + unsigned Score; + // Actual count of v_readfirstlane_b32 + // which need to be inserted to keep SChain SALU + unsigned NumReadfirstlanes; + // Current score state. To speedup selection V2SCopyInfos for processing + bool NeedToBeConvertedToVALU = false; + // Unique ID. Used as a key for mapping to keep permanent order. + unsigned ID; + + // Count of another VGPR to SGPR copies that contribute to the + // current copy SChain + unsigned SiblingPenalty = 0; + SetVector<unsigned> Siblings; + V2SCopyInfo() : Copy(nullptr), ID(0){}; + V2SCopyInfo(unsigned Id, MachineInstr *C, unsigned Width) + : Copy(C), NumSVCopies(0), NumReadfirstlanes(Width / 32), ID(Id){}; +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + void dump() { + dbgs() << ID << " : " << *Copy << "\n\tS:" << SChain.size() + << "\n\tSV:" << NumSVCopies << "\n\tSP: " << SiblingPenalty + << "\nScore: " << Score << "\n"; + } +#endif +}; + +void SIFixSGPRCopies::lowerVGPR2SGPRCopies(MachineFunction &MF) { + + DenseMap<unsigned, V2SCopyInfo> Copies; + DenseMap<MachineInstr *, SetVector<unsigned>> SiblingPenalty; + + // The main function that computes the VGPR to SGPR copy score + // and determines copy further lowering way: v_readfirstlane_b32 or moveToVALU + auto needToBeConvertedToVALU = [&](V2SCopyInfo *I) -> bool { + if (I->SChain.empty()) + return true; + I->Siblings = SiblingPenalty[*std::max_element( + I->SChain.begin(), I->SChain.end(), + [&](MachineInstr *A, MachineInstr *B) -> bool { + return SiblingPenalty[A].size() < SiblingPenalty[B].size(); + })]; + I->Siblings.remove_if([&](unsigned ID) { return ID == I->ID; }); + // The loop below computes the number of another VGPR to SGPR copies + // which contribute to the current copy SALU chain. We assume that all the + // copies with the same source virtual register will be squashed to one by + // regalloc. Also we take careof the copies of the differnt subregs of the + // same register. + SmallSet<std::pair<Register, unsigned>, 4> SrcRegs; + for (auto J : I->Siblings) { + auto InfoIt = Copies.find(J); + if (InfoIt != Copies.end()) { + MachineInstr *SiblingCopy = InfoIt->getSecond().Copy; + if (SiblingCopy->isImplicitDef()) + // the COPY has already been MoveToVALUed + continue; + + SrcRegs.insert(std::make_pair(SiblingCopy->getOperand(1).getReg(), + SiblingCopy->getOperand(1).getSubReg())); + } + } + I->SiblingPenalty = SrcRegs.size(); + + unsigned Penalty = + I->NumSVCopies + I->SiblingPenalty + I->NumReadfirstlanes; + unsigned Profit = I->SChain.size(); + I->Score = Penalty > Profit ? 0 : Profit - Penalty; + I->NeedToBeConvertedToVALU = I->Score < 3; + return I->NeedToBeConvertedToVALU; + }; + + auto needProcessing = [](MachineInstr &MI) -> bool { + switch (MI.getOpcode()) { + case AMDGPU::COPY: + case AMDGPU::WQM: + case AMDGPU::STRICT_WQM: + case AMDGPU::SOFT_WQM: + case AMDGPU::STRICT_WWM: + return true; + default: + return false; + } + }; + + for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE; + ++BI) { + MachineBasicBlock *MBB = &*BI; + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; + ++I) { + MachineInstr &MI = *I; + if (!needProcessing(MI)) + continue; + if (lowerSpecialCase(MI)) + continue; + + // Compute the COPY width to pass it to V2SCopyInfo Ctor + Register DstReg = MI.getOperand(0).getReg(); + + const TargetRegisterClass *DstRC = TRI->getRegClassForReg(*MRI, DstReg); + + V2SCopyInfo In(getNextVGPRToSGPRCopyId(), &MI, + TRI->getRegSizeInBits(*DstRC)); + + SmallVector<MachineInstr *, 8> AnalysisWorklist; + // Needed because the SSA is not a tree but a graph and may have + // forks and joins. We should not then go same way twice. + DenseSet<MachineInstr *> Visited; + AnalysisWorklist.push_back(&MI); + while (!AnalysisWorklist.empty()) { + + MachineInstr *Inst = AnalysisWorklist.pop_back_val(); + + if (!Visited.insert(Inst).second) + continue; + + // Copies and REG_SEQUENCE do not contribute to the final assembly + // So, skip them but take care of the SGPR to VGPR copies bookkeeping. + if (Inst->isCopy() || Inst->isRegSequence()) { + if (TRI->isVGPR(*MRI, Inst->getOperand(0).getReg())) { + if (!Inst->isCopy() || + !tryChangeVGPRtoSGPRinCopy(*Inst, TRI, TII)) { + In.NumSVCopies++; + continue; + } + } + } + + SiblingPenalty[Inst].insert(In.ID); + + SmallVector<MachineInstr *, 4> Users; + if ((TII->isSALU(*Inst) && Inst->isCompare()) || + (Inst->isCopy() && Inst->getOperand(0).getReg() == AMDGPU::SCC)) { + auto I = Inst->getIterator(); + auto E = Inst->getParent()->end(); + while (++I != E && !I->findRegisterDefOperand(AMDGPU::SCC)) { + if (I->readsRegister(AMDGPU::SCC)) + Users.push_back(&*I); + } + } else if (Inst->getNumExplicitDefs() != 0) { + Register Reg = Inst->getOperand(0).getReg(); + if (TRI->isSGPRReg(*MRI, Reg)) + for (auto &U : MRI->use_instructions(Reg)) + Users.push_back(&U); + } + for (auto U : Users) { + if (TII->isSALU(*U)) + In.SChain.insert(U); + AnalysisWorklist.push_back(U); + } + } + Copies[In.ID] = In; + } + } + + SmallVector<unsigned, 8> LoweringWorklist; + for (auto &C : Copies) { + if (needToBeConvertedToVALU(&C.second)) + LoweringWorklist.push_back(C.second.ID); + } + + while (!LoweringWorklist.empty()) { + unsigned CurID = LoweringWorklist.pop_back_val(); + auto CurInfoIt = Copies.find(CurID); + if (CurInfoIt != Copies.end()) { + V2SCopyInfo C = CurInfoIt->getSecond(); + LLVM_DEBUG(dbgs() << "Processing ...\n"; C.dump()); + for (auto S : C.Siblings) { + auto SibInfoIt = Copies.find(S); + if (SibInfoIt != Copies.end()) { + V2SCopyInfo &SI = SibInfoIt->getSecond(); + LLVM_DEBUG(dbgs() << "Sibling:\n"; SI.dump()); + if (!SI.NeedToBeConvertedToVALU) { + set_subtract(SI.SChain, C.SChain); + if (needToBeConvertedToVALU(&SI)) + LoweringWorklist.push_back(SI.ID); + } + SI.Siblings.remove_if([&](unsigned ID) { return ID == C.ID; }); + } + } + LLVM_DEBUG(dbgs() << "V2S copy " << *C.Copy + << " is being turned to VALU\n"); + Copies.erase(C.ID); + TII->moveToVALU(*C.Copy, MDT); + } + } + + // Now do actual lowering + for (auto C : Copies) { + MachineInstr *MI = C.second.Copy; + MachineBasicBlock *MBB = MI->getParent(); + // We decide to turn V2S copy to v_readfirstlane_b32 + // remove it from the V2SCopies and remove it from all its siblings + LLVM_DEBUG(dbgs() << "V2S copy " << *MI + << " is being turned to v_readfirstlane_b32" + << " Score: " << C.second.Score << "\n"); + Register DstReg = MI->getOperand(0).getReg(); + Register SrcReg = MI->getOperand(1).getReg(); + unsigned SubReg = MI->getOperand(1).getSubReg(); + const TargetRegisterClass *SrcRC = TRI->getRegClassForReg(*MRI, SrcReg); + SrcRC = TRI->getSubRegClass(SrcRC, SubReg); + size_t SrcSize = TRI->getRegSizeInBits(*SrcRC); + if (SrcSize == 16) { + // HACK to handle possible 16bit VGPR source + auto MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), + TII->get(AMDGPU::V_READFIRSTLANE_B32), DstReg); + MIB.addReg(SrcReg, 0, AMDGPU::NoSubRegister); + } else if (SrcSize == 32) { + auto MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), + TII->get(AMDGPU::V_READFIRSTLANE_B32), DstReg); + MIB.addReg(SrcReg, 0, SubReg); + } else { + auto Result = BuildMI(*MBB, MI, MI->getDebugLoc(), + TII->get(AMDGPU::REG_SEQUENCE), DstReg); + int N = TRI->getRegSizeInBits(*SrcRC) / 32; + for (int i = 0; i < N; i++) { + Register PartialSrc = TII->buildExtractSubReg( + Result, *MRI, MI->getOperand(1), SrcRC, + TRI->getSubRegFromChannel(i), &AMDGPU::VGPR_32RegClass); + Register PartialDst = + MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass); + BuildMI(*MBB, *Result, Result->getDebugLoc(), + TII->get(AMDGPU::V_READFIRSTLANE_B32), PartialDst) + .addReg(PartialSrc); + Result.addReg(PartialDst).addImm(TRI->getSubRegFromChannel(i)); + } + } + MI->eraseFromParent(); + } +} diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index d16da2a8b86b..438e8b200ecc 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1664,6 +1664,17 @@ SDValue SITargetLowering::getImplicitArgPtr(SelectionDAG &DAG, return lowerKernArgParameterPtr(DAG, SL, DAG.getEntryNode(), Offset); } +SDValue SITargetLowering::getLDSKernelId(SelectionDAG &DAG, + const SDLoc &SL) const { + + Function &F = DAG.getMachineFunction().getFunction(); + Optional<uint32_t> KnownSize = + AMDGPUMachineFunction::getLDSKernelIdMetadata(F); + if (KnownSize.has_value()) + return DAG.getConstant(KnownSize.value(), SL, MVT::i32); + return SDValue(); +} + SDValue SITargetLowering::convertArgType(SelectionDAG &DAG, EVT VT, EVT MemVT, const SDLoc &SL, SDValue Val, bool Signed, @@ -2049,6 +2060,9 @@ void SITargetLowering::allocateSpecialInputSGPRs( if (Info.hasWorkGroupIDZ()) allocateSGPR32Input(CCInfo, ArgInfo.WorkGroupIDZ); + + if (Info.hasLDSKernelId()) + allocateSGPR32Input(CCInfo, ArgInfo.LDSKernelId); } // Allocate special inputs passed in user SGPRs. @@ -2102,6 +2116,12 @@ void SITargetLowering::allocateHSAUserSGPRs(CCState &CCInfo, CCInfo.AllocateReg(FlatScratchInitReg); } + if (Info.hasLDSKernelId()) { + Register Reg = Info.addLDSKernelId(); + MF.addLiveIn(Reg, &AMDGPU::SGPR_32RegClass); + CCInfo.AllocateReg(Reg); + } + // TODO: Add GridWorkGroupCount user SGPRs when used. For now with HSA we read // these from the dispatch pointer. } @@ -2347,8 +2367,8 @@ SDValue SITargetLowering::LowerFormalArguments( (!Info->hasFlatScratchInit() || Subtarget->enableFlatScratch()) && !Info->hasWorkGroupIDX() && !Info->hasWorkGroupIDY() && !Info->hasWorkGroupIDZ() && !Info->hasWorkGroupInfo() && - !Info->hasWorkItemIDX() && !Info->hasWorkItemIDY() && - !Info->hasWorkItemIDZ()); + !Info->hasLDSKernelId() && !Info->hasWorkItemIDX() && + !Info->hasWorkItemIDY() && !Info->hasWorkItemIDZ()); } if (CallConv == CallingConv::AMDGPU_PS) { @@ -2762,7 +2782,8 @@ void SITargetLowering::passSpecialInputs( {AMDGPUFunctionArgInfo::DISPATCH_ID, "amdgpu-no-dispatch-id"}, {AMDGPUFunctionArgInfo::WORKGROUP_ID_X, "amdgpu-no-workgroup-id-x"}, {AMDGPUFunctionArgInfo::WORKGROUP_ID_Y,"amdgpu-no-workgroup-id-y"}, - {AMDGPUFunctionArgInfo::WORKGROUP_ID_Z,"amdgpu-no-workgroup-id-z"} + {AMDGPUFunctionArgInfo::WORKGROUP_ID_Z,"amdgpu-no-workgroup-id-z"}, + {AMDGPUFunctionArgInfo::LDS_KERNEL_ID,"amdgpu-no-lds-kernel-id"}, }; for (auto Attr : ImplicitAttrs) { @@ -2798,6 +2819,13 @@ void SITargetLowering::passSpecialInputs( // The implicit arg ptr is special because it doesn't have a corresponding // input for kernels, and is computed from the kernarg segment pointer. InputReg = getImplicitArgPtr(DAG, DL); + } else if (InputID == AMDGPUFunctionArgInfo::LDS_KERNEL_ID) { + Optional<uint32_t> Id = AMDGPUMachineFunction::getLDSKernelIdMetadata(F); + if (Id.has_value()) { + InputReg = DAG.getConstant(Id.value(), DL, ArgVT); + } else { + InputReg = DAG.getUNDEF(ArgVT); + } } else { // We may have proven the input wasn't needed, although the ABI is // requiring it. We just need to allocate the register appropriately. @@ -6887,6 +6915,12 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::amdgcn_workgroup_id_z: return getPreloadedValue(DAG, *MFI, VT, AMDGPUFunctionArgInfo::WORKGROUP_ID_Z); + case Intrinsic::amdgcn_lds_kernel_id: { + if (MFI->isEntryFunction()) + return getLDSKernelId(DAG, DL); + return getPreloadedValue(DAG, *MFI, VT, + AMDGPUFunctionArgInfo::LDS_KERNEL_ID); + } case Intrinsic::amdgcn_workitem_id_x: return lowerWorkitemID(DAG, Op, 0, MFI->getArgInfo().WorkItemIDX); case Intrinsic::amdgcn_workitem_id_y: diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.h index 4fbccf0c5850..d1fecc1afc7f 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -48,6 +48,7 @@ private: SDValue lowerKernArgParameterPtr(SelectionDAG &DAG, const SDLoc &SL, SDValue Chain, uint64_t Offset) const; SDValue getImplicitArgPtr(SelectionDAG &DAG, const SDLoc &SL) const; + SDValue getLDSKernelId(SelectionDAG &DAG, const SDLoc &SL) const; SDValue lowerKernargMemParameter(SelectionDAG &DAG, EVT VT, EVT MemVT, const SDLoc &SL, SDValue Chain, uint64_t Offset, Align Alignment, diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrFormats.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrFormats.td index b398e108bf62..7c1d8d32b624 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrFormats.td +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrFormats.td @@ -85,7 +85,7 @@ class InstSI <dag outs, dag ins, string asm = "", field bit VOPAsmPrefer32Bit = 0; // This bit indicates that this is a VOP3 opcode which supports op_sel - // modifier (gfx9 only). + // modifier. field bit VOP3_OPSEL = 0; // Is it possible for this instruction to be atomic? diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 799d34e32d27..8916f06598c6 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -108,8 +108,8 @@ static bool nodesHaveSameOperandValue(SDNode *N0, SDNode* N1, unsigned OpName) { return N0->getOperand(Op0Idx) == N1->getOperand(Op1Idx); } -bool SIInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI, - AAResults *AA) const { +bool SIInstrInfo::isReallyTriviallyReMaterializable( + const MachineInstr &MI) const { if (isVOP1(MI) || isVOP2(MI) || isVOP3(MI) || isSDWA(MI) || isSALU(MI)) { // Normally VALU use of exec would block the rematerialization, but that // is OK in this case to have an implicit exec read as all VALU do. @@ -220,16 +220,23 @@ bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::sbase) == -1) return false; - assert(getNumOperandsNoGlue(Load0) == getNumOperandsNoGlue(Load1)); + unsigned NumOps = getNumOperandsNoGlue(Load0); + if (NumOps != getNumOperandsNoGlue(Load1)) + return false; // Check base reg. if (Load0->getOperand(0) != Load1->getOperand(0)) return false; + // Match register offsets, if both register and immediate offsets present. + assert(NumOps == 4 || NumOps == 5); + if (NumOps == 5 && Load0->getOperand(1) != Load1->getOperand(1)) + return false; + const ConstantSDNode *Load0Offset = - dyn_cast<ConstantSDNode>(Load0->getOperand(1)); + dyn_cast<ConstantSDNode>(Load0->getOperand(NumOps - 3)); const ConstantSDNode *Load1Offset = - dyn_cast<ConstantSDNode>(Load1->getOperand(1)); + dyn_cast<ConstantSDNode>(Load1->getOperand(NumOps - 3)); if (!Load0Offset || !Load1Offset) return false; @@ -5011,10 +5018,8 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx, } if (MO->isReg()) { - if (!DefinedRC) { - // This operand allows any register. - return true; - } + if (!DefinedRC) + return OpInfo.OperandType == MCOI::OPERAND_UNKNOWN; if (!isLegalRegOperand(MRI, OpInfo, *MO)) return false; bool IsAGPR = RI.isAGPR(MRI, MO->getReg()); diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.h index 1b411eb83eb3..5840f45bdc5a 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -184,8 +184,7 @@ public: return ST; } - bool isReallyTriviallyReMaterializable(const MachineInstr &MI, - AAResults *AA) const override; + bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override; bool isIgnorableUse(const MachineOperand &MO) const override; diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 23afd6556bc9..81f8dcc482da 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -324,7 +324,8 @@ class isFloatType<ValueType SrcVT> { // XXX - do v2i16 instructions? class isIntType<ValueType SrcVT> { - bit ret = !or(!eq(SrcVT.Value, i16.Value), + bit ret = !or(!eq(SrcVT.Value, i8.Value), + !eq(SrcVT.Value, i16.Value), !eq(SrcVT.Value, i32.Value), !eq(SrcVT.Value, i64.Value), !eq(SrcVT.Value, v4i16.Value), @@ -1411,6 +1412,10 @@ class IntSDWAInputModsMatchClass <int opSize> : AsmOperandClass { def Int16SDWAInputModsMatchClass : IntSDWAInputModsMatchClass<16>; def Int32SDWAInputModsMatchClass : IntSDWAInputModsMatchClass<32>; +def Bin32SDWAInputModsMatchClass : IntSDWAInputModsMatchClass<32> { + let Name = "SDWAWithBin32InputMods"; + let ParserMethod = "parseRegOrImm"; +} class IntSDWAInputMods <IntSDWAInputModsMatchClass matchClass> : InputMods <matchClass> { @@ -1419,6 +1424,7 @@ class IntSDWAInputMods <IntSDWAInputModsMatchClass matchClass> : def Int16SDWAInputMods : IntSDWAInputMods<Int16SDWAInputModsMatchClass>; def Int32SDWAInputMods : IntSDWAInputMods<Int32SDWAInputModsMatchClass>; +def Bin32SDWAInputMods : IntSDWAInputMods<Bin32SDWAInputModsMatchClass>; def IntVRegInputModsMatchClass : AsmOperandClass { let Name = "VRegWithIntInputMods"; @@ -1897,94 +1903,94 @@ class getInsVOP3OpSel <RegisterOperand Src0RC, RegisterOperand Src1RC, class getInsDPPBase <RegisterOperand OldRC, RegisterClass Src0RC, RegisterClass Src1RC, RegisterClass Src2RC, int NumSrcArgs, bit HasModifiers, - Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> { + Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOld> { - dag ret = !if (!eq(NumSrcArgs, 0), + dag ret = !if(!eq(NumSrcArgs, 0), // VOP1 without input operands (V_NOP) (ins ), - !if (!eq(NumSrcArgs, 1), - !if (HasModifiers, - // VOP1_DPP with modifiers - (ins OldRC:$old, Src0Mod:$src0_modifiers, - Src0RC:$src0) - /* else */, - // VOP1_DPP without modifiers - (ins OldRC:$old, Src0RC:$src0) - /* endif */), - !if (!eq(NumSrcArgs, 2), - !if (HasModifiers, - // VOP2_DPP with modifiers - (ins OldRC:$old, - Src0Mod:$src0_modifiers, Src0RC:$src0, - Src1Mod:$src1_modifiers, Src1RC:$src1) - /* else */, - // VOP2_DPP without modifiers - (ins OldRC:$old, - Src0RC:$src0, Src1RC:$src1) - ) - /* NumSrcArgs == 3, VOP3 */, - !if (HasModifiers, - // VOP3_DPP with modifiers - (ins OldRC:$old, - Src0Mod:$src0_modifiers, Src0RC:$src0, - Src1Mod:$src1_modifiers, Src1RC:$src1, - Src2Mod:$src2_modifiers, Src2RC:$src2) - /* else */, - // VOP3_DPP without modifiers - (ins OldRC:$old, - Src0RC:$src0, Src1RC:$src1, - Src2RC:$src2) + !con( + !if(HasOld ,(ins OldRC:$old), (ins)), + !if (!eq(NumSrcArgs, 1), + !if (HasModifiers, + // VOP1_DPP with modifiers + (ins Src0Mod:$src0_modifiers, Src0RC:$src0) + /* else */, + // VOP1_DPP without modifiers + (ins Src0RC:$src0) + /* endif */), + !if (!eq(NumSrcArgs, 2), + !if (HasModifiers, + // VOP2_DPP with modifiers + (ins Src0Mod:$src0_modifiers, Src0RC:$src0, + Src1Mod:$src1_modifiers, Src1RC:$src1) + /* else */, + // VOP2_DPP without modifiers + (ins Src0RC:$src0, Src1RC:$src1) + ) + /* NumSrcArgs == 3, VOP3 */, + !if (HasModifiers, + // VOP3_DPP with modifiers + (ins Src0Mod:$src0_modifiers, Src0RC:$src0, + Src1Mod:$src1_modifiers, Src1RC:$src1, + Src2Mod:$src2_modifiers, Src2RC:$src2) + /* else */, + // VOP3_DPP without modifiers + (ins Src0RC:$src0, Src1RC:$src1, + Src2RC:$src2) + ) + ) + ) ) - /* endif */))); + ); } class getInsDPP <RegisterOperand OldRC, RegisterClass Src0RC, RegisterClass Src1RC, RegisterClass Src2RC, int NumSrcArgs, bit HasModifiers, - Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> { + Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOld = 1> { dag ret = !con(getInsDPPBase<OldRC, Src0RC, Src1RC, Src2RC, NumSrcArgs, - HasModifiers, Src0Mod, Src1Mod, Src2Mod>.ret, + HasModifiers, Src0Mod, Src1Mod, Src2Mod, HasOld>.ret, (ins dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, bank_mask:$bank_mask, bound_ctrl:$bound_ctrl)); } class getInsDPP16 <RegisterOperand OldRC, RegisterClass Src0RC, RegisterClass Src1RC, RegisterClass Src2RC, int NumSrcArgs, bit HasModifiers, - Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> { + Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOld = 1> { dag ret = !con(getInsDPP<OldRC, Src0RC, Src1RC, Src2RC, NumSrcArgs, - HasModifiers, Src0Mod, Src1Mod, Src2Mod>.ret, + HasModifiers, Src0Mod, Src1Mod, Src2Mod, HasOld>.ret, (ins FI:$fi)); } class getInsDPP8 <RegisterOperand OldRC, RegisterClass Src0RC, RegisterClass Src1RC, RegisterClass Src2RC, int NumSrcArgs, bit HasModifiers, - Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> { + Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOld = 1> { dag ret = !con(getInsDPPBase<OldRC, Src0RC, Src1RC, Src2RC, NumSrcArgs, - HasModifiers, Src0Mod, Src1Mod, Src2Mod>.ret, + HasModifiers, Src0Mod, Src1Mod, Src2Mod, HasOld>.ret, (ins dpp8:$dpp8, FI:$fi)); } -class getInsVOP3DPPBase<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs> { +class getInsVOP3DPPBase<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs, bit HasOld> { dag old = ( ins OldRC:$old ); dag base = VOP3Base; dag ret = !con( - !if(!ne(NumSrcArgs, 0), old, (ins)), + !if(!and(HasOld,!ne(NumSrcArgs, 0)), old, (ins)), base ); } -class getInsVOP3DPP<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs> { - dag ret = !con(getInsVOP3DPPBase<VOP3Base,OldRC,NumSrcArgs>.ret, +class getInsVOP3DPP<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs, bit HasOld = 1> { + dag ret = !con(getInsVOP3DPPBase<VOP3Base,OldRC,NumSrcArgs,HasOld>.ret, (ins dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, bank_mask:$bank_mask, bound_ctrl:$bound_ctrl)); } -class getInsVOP3DPP16<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs> { - dag ret = !con(getInsVOP3DPP<VOP3Base,OldRC,NumSrcArgs>.ret, +class getInsVOP3DPP16<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs, bit HasOld = 1> { + dag ret = !con(getInsVOP3DPP<VOP3Base,OldRC,NumSrcArgs,HasOld>.ret, (ins FI:$fi)); } -class getInsVOP3DPP8<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs> { - dag ret = !con(getInsVOP3DPPBase<VOP3Base,OldRC,NumSrcArgs>.ret, +class getInsVOP3DPP8<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs, bit HasOld = 1> { + dag ret = !con(getInsVOP3DPPBase<VOP3Base,OldRC,NumSrcArgs,HasOld>.ret, (ins dpp8:$dpp8, FI:$fi)); } @@ -2665,6 +2671,8 @@ def VOP_V4I32_I64_I64_V4I32 : VOPProfile <[v4i32, i64, i64, v4i32]>; def VOP_V16I32_I64_I64_V16I32 : VOPProfile <[v16i32, i64, i64, v16i32]>; def VOP_V4F32_V2F32_V2F32_V4F32 : VOPProfile <[v4f32, v2f32, v2f32, v4f32]>; def VOP_V16F32_V2F32_V2F32_V16F32 : VOPProfile <[v16f32, v2f32, v2f32, v16f32]>; +def VOP_V4F32_I64_I64_V4F32 : VOPProfile <[v4f32, i64, i64, v4f32]>; +def VOP_V16F32_I64_I64_V16F32 : VOPProfile <[v16f32, i64, i64, v16f32]>; def VOP_V4F32_V4F16_V8F16_I32 : VOPProfile <[v4f32, v4f16, v8f16, i32]>; def VOP_V16F32_V4F16_V8F16_I32 : VOPProfile <[v16f32, v4f16, v8f16, i32]>; @@ -2672,6 +2680,8 @@ def VOP_V4F32_V4I16_V8I16_I32 : VOPProfile <[v4f32, v4i16, v8i16, i32]>; def VOP_V16F32_V4I16_V8I16_I32 : VOPProfile <[v16f32, v4i16, v8i16, i32]>; def VOP_V4I32_V2I32_V4I32_I32 : VOPProfile <[v4i32, v2i32, v4i32, i32]>; def VOP_V16I32_V2I32_V4I32_I32 : VOPProfile <[v16i32, v2i32, v4i32, i32]>; +def VOP_V4F32_V2I32_V4I32_I32 : VOPProfile <[v4f32, v2i32, v4i32, i32]>; +def VOP_V16F32_V2I32_V4I32_I32 : VOPProfile <[v16f32, v2i32, v4i32, i32]>; class Commutable_REV <string revOp, bit isOrig> { string RevOp = revOp; diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index 0504c59ebd9e..9176e85568ee 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -44,6 +44,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) WorkGroupIDY(false), WorkGroupIDZ(false), WorkGroupInfo(false), + LDSKernelId(false), PrivateSegmentWaveByteOffset(false), WorkItemIDX(false), WorkItemIDY(false), @@ -143,6 +144,9 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) if (!F.hasFnAttribute("amdgpu-no-dispatch-id")) DispatchID = true; + + if (!IsKernel && !F.hasFnAttribute("amdgpu-no-lds-kernel-id")) + LDSKernelId = true; } // FIXME: This attribute is a hack, we just need an analysis on the function @@ -261,6 +265,12 @@ Register SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) return ArgInfo.ImplicitBufferPtr.getRegister(); } +Register SIMachineFunctionInfo::addLDSKernelId() { + ArgInfo.LDSKernelId = ArgDescriptor::createRegister(getNextUserSGPR()); + NumUserSGPRs += 1; + return ArgInfo.LDSKernelId.getRegister(); +} + bool SIMachineFunctionInfo::isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg) { for (unsigned I = 0; CSRegs[I]; ++I) { @@ -561,6 +571,7 @@ convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo, Any |= convertArg(AI.KernargSegmentPtr, ArgInfo.KernargSegmentPtr); Any |= convertArg(AI.DispatchID, ArgInfo.DispatchID); Any |= convertArg(AI.FlatScratchInit, ArgInfo.FlatScratchInit); + Any |= convertArg(AI.LDSKernelId, ArgInfo.LDSKernelId); Any |= convertArg(AI.PrivateSegmentSize, ArgInfo.PrivateSegmentSize); Any |= convertArg(AI.WorkGroupIDX, ArgInfo.WorkGroupIDX); Any |= convertArg(AI.WorkGroupIDY, ArgInfo.WorkGroupIDY); diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h index bebb13cbf09f..5105587617fd 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -191,6 +191,7 @@ struct SIArgumentInfo { Optional<SIArgument> WorkGroupIDY; Optional<SIArgument> WorkGroupIDZ; Optional<SIArgument> WorkGroupInfo; + Optional<SIArgument> LDSKernelId; Optional<SIArgument> PrivateSegmentWaveByteOffset; Optional<SIArgument> ImplicitArgPtr; @@ -215,6 +216,7 @@ template <> struct MappingTraits<SIArgumentInfo> { YamlIO.mapOptional("workGroupIDY", AI.WorkGroupIDY); YamlIO.mapOptional("workGroupIDZ", AI.WorkGroupIDZ); YamlIO.mapOptional("workGroupInfo", AI.WorkGroupInfo); + YamlIO.mapOptional("LDSKernelId", AI.LDSKernelId); YamlIO.mapOptional("privateSegmentWaveByteOffset", AI.PrivateSegmentWaveByteOffset); @@ -418,6 +420,7 @@ private: bool WorkGroupIDY : 1; bool WorkGroupIDZ : 1; bool WorkGroupInfo : 1; + bool LDSKernelId : 1; bool PrivateSegmentWaveByteOffset : 1; bool WorkItemIDX : 1; // Always initialized. @@ -608,6 +611,7 @@ public: Register addDispatchID(const SIRegisterInfo &TRI); Register addFlatScratchInit(const SIRegisterInfo &TRI); Register addImplicitBufferPtr(const SIRegisterInfo &TRI); + Register addLDSKernelId(); /// Increment user SGPRs used for padding the argument list only. Register addReservedUserSGPR() { @@ -705,6 +709,8 @@ public: return WorkGroupInfo; } + bool hasLDSKernelId() const { return LDSKernelId; } + bool hasPrivateSegmentWaveByteOffset() const { return PrivateSegmentWaveByteOffset; } diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp index 66bc46aaefea..19a83ad53e2e 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp @@ -12,6 +12,8 @@ #include "SIRegisterInfo.h" #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/InitializePasses.h" using namespace llvm; @@ -26,6 +28,10 @@ class SIOptimizeExecMasking : public MachineFunctionPass { const SIRegisterInfo *TRI = nullptr; const SIInstrInfo *TII = nullptr; const MachineRegisterInfo *MRI = nullptr; + MCRegister Exec; + + DenseMap<MachineInstr *, MachineInstr *> SaveExecVCmpMapping; + SmallVector<std::pair<MachineInstr *, MachineInstr *>, 1> OrXors; Register isCopyFromExec(const MachineInstr &MI) const; Register isCopyToExec(const MachineInstr &MI) const; @@ -44,13 +50,13 @@ class SIOptimizeExecMasking : public MachineFunctionPass { std::function<bool(MachineInstr *)> Pred, ArrayRef<MCRegister> NonModifiableRegs, unsigned MaxInstructions = 20) const; - MachineInstr *findPossibleVCMPVCMPXOptimization(MachineInstr &SaveExec, - MCRegister Exec) const; - bool optimizeExecSequence() const; - bool optimizeVCmpxAndSaveexecSequence() const; - bool optimizeSingleVCMPSaveExecSequence(MachineInstr &SaveExecInstr, - MachineInstr &VCmp, - MCRegister Exec) const; + bool optimizeExecSequence(); + void tryRecordVCmpxAndSaveexecSequence(MachineInstr &MI); + bool optimizeVCMPSaveExecSequence(MachineInstr &SaveExecInstr, + MachineInstr &VCmp, MCRegister Exec) const; + + void tryRecordOrSaveexecXorSequence(MachineInstr &MI); + bool optimizeOrSaveexecXorSequences(); public: static char ID; @@ -92,7 +98,7 @@ Register SIOptimizeExecMasking::isCopyFromExec(const MachineInstr &MI) const { case AMDGPU::S_MOV_B32: case AMDGPU::S_MOV_B32_term: { const MachineOperand &Src = MI.getOperand(1); - if (Src.isReg() && Src.getReg() == TRI->getExec()) + if (Src.isReg() && Src.getReg() == Exec) return MI.getOperand(0).getReg(); } } @@ -107,8 +113,7 @@ Register SIOptimizeExecMasking::isCopyToExec(const MachineInstr &MI) const { case AMDGPU::S_MOV_B64: case AMDGPU::S_MOV_B32: { const MachineOperand &Dst = MI.getOperand(0); - if (Dst.isReg() && Dst.getReg() == TRI->getExec() && - MI.getOperand(1).isReg()) + if (Dst.isReg() && Dst.getReg() == Exec && MI.getOperand(1).isReg()) return MI.getOperand(1).getReg(); break; } @@ -394,9 +399,7 @@ bool SIOptimizeExecMasking::isRegisterInUseAfter(MachineInstr &Stop, // => // x = s_<op>_saveexec_b64 y // -bool SIOptimizeExecMasking::optimizeExecSequence() const { - MCRegister Exec = TRI->getExec(); - +bool SIOptimizeExecMasking::optimizeExecSequence() { bool Changed = false; for (MachineBasicBlock &MBB : *MF) { MachineBasicBlock::reverse_iterator I = fixTerminators(MBB); @@ -551,88 +554,9 @@ bool SIOptimizeExecMasking::optimizeExecSequence() const { return Changed; } -// Tries to find a possibility to optimize a v_cmp ..., s_and_saveexec sequence -// by looking at an instance of a s_and_saveexec instruction. Returns a pointer -// to the v_cmp instruction if it is safe to replace the sequence (see the -// conditions in the function body). This is after register allocation, so some -// checks on operand dependencies need to be considered. -MachineInstr *SIOptimizeExecMasking::findPossibleVCMPVCMPXOptimization( - MachineInstr &SaveExec, MCRegister Exec) const { - - MachineInstr *VCmp = nullptr; - - Register SaveExecDest = SaveExec.getOperand(0).getReg(); - if (!TRI->isSGPRReg(*MRI, SaveExecDest)) - return nullptr; - - MachineOperand *SaveExecSrc0 = - TII->getNamedOperand(SaveExec, AMDGPU::OpName::src0); - if (!SaveExecSrc0->isReg()) - return nullptr; - - // Try to find the last v_cmp instruction that defs the saveexec input - // operand without any write to Exec or the saveexec input operand inbetween. - VCmp = findInstrBackwards( - SaveExec, - [&](MachineInstr *Check) { - return AMDGPU::getVCMPXOpFromVCMP(Check->getOpcode()) != -1 && - Check->modifiesRegister(SaveExecSrc0->getReg(), TRI); - }, - {Exec, SaveExecSrc0->getReg()}); - - if (!VCmp) - return nullptr; - - MachineOperand *VCmpDest = TII->getNamedOperand(*VCmp, AMDGPU::OpName::sdst); - assert(VCmpDest && "Should have an sdst operand!"); - - // Check if any of the v_cmp source operands is written by the saveexec. - MachineOperand *Src0 = TII->getNamedOperand(*VCmp, AMDGPU::OpName::src0); - if (Src0->isReg() && TRI->isSGPRReg(*MRI, Src0->getReg()) && - SaveExec.modifiesRegister(Src0->getReg(), TRI)) - return nullptr; - - MachineOperand *Src1 = TII->getNamedOperand(*VCmp, AMDGPU::OpName::src1); - if (Src1->isReg() && TRI->isSGPRReg(*MRI, Src1->getReg()) && - SaveExec.modifiesRegister(Src1->getReg(), TRI)) - return nullptr; - - // Don't do the transformation if the destination operand is included in - // it's MBB Live-outs, meaning it's used in any of it's successors, leading - // to incorrect code if the v_cmp and therefore the def of - // the dest operand is removed. - if (isLiveOut(*VCmp->getParent(), VCmpDest->getReg())) - return nullptr; - - // If the v_cmp target is in use between v_cmp and s_and_saveexec or after the - // s_and_saveexec, skip the optimization. - if (isRegisterInUseBetween(*VCmp, SaveExec, VCmpDest->getReg(), false, - true) || - isRegisterInUseAfter(SaveExec, VCmpDest->getReg())) - return nullptr; - - // Try to determine if there is a write to any of the VCmp - // operands between the saveexec and the vcmp. - // If yes, additional VGPR spilling might need to be inserted. In this case, - // it's not worth replacing the instruction sequence. - SmallVector<MCRegister, 2> NonDefRegs; - if (Src0->isReg()) - NonDefRegs.push_back(Src0->getReg()); - - if (Src1->isReg()) - NonDefRegs.push_back(Src1->getReg()); - - if (!findInstrBackwards( - SaveExec, [&](MachineInstr *Check) { return Check == VCmp; }, - NonDefRegs)) - return nullptr; - - return VCmp; -} - // Inserts the optimized s_mov_b32 / v_cmpx sequence based on the // operands extracted from a v_cmp ..., s_and_saveexec pattern. -bool SIOptimizeExecMasking::optimizeSingleVCMPSaveExecSequence( +bool SIOptimizeExecMasking::optimizeVCMPSaveExecSequence( MachineInstr &SaveExecInstr, MachineInstr &VCmp, MCRegister Exec) const { const int NewOpcode = AMDGPU::getVCMPXOpFromVCMP(VCmp.getOpcode()); @@ -678,50 +602,164 @@ bool SIOptimizeExecMasking::optimizeSingleVCMPSaveExecSequence( if (Src1->isReg()) MRI->clearKillFlags(Src1->getReg()); + SaveExecInstr.eraseFromParent(); + VCmp.eraseFromParent(); + return true; } -// After all s_op_saveexec instructions are inserted, -// replace (on GFX10.3 and later) +// Record (on GFX10.3 and later) occurences of // v_cmp_* SGPR, IMM, VGPR // s_and_saveexec_b32 EXEC_SGPR_DEST, SGPR -// with +// to be replaced with // s_mov_b32 EXEC_SGPR_DEST, exec_lo // v_cmpx_* IMM, VGPR // to reduce pipeline stalls. -bool SIOptimizeExecMasking::optimizeVCmpxAndSaveexecSequence() const { +void SIOptimizeExecMasking::tryRecordVCmpxAndSaveexecSequence( + MachineInstr &MI) { if (!ST->hasGFX10_3Insts()) - return false; + return; - bool Changed = false; - - DenseMap<MachineInstr *, MachineInstr *> SaveExecVCmpMapping; - MCRegister Exec = TRI->getExec(); const unsigned AndSaveExecOpcode = ST->isWave32() ? AMDGPU::S_AND_SAVEEXEC_B32 : AMDGPU::S_AND_SAVEEXEC_B64; - for (MachineBasicBlock &MBB : *MF) { - for (MachineInstr &MI : MBB) { - // Record relevant v_cmp / s_and_saveexec instruction pairs for - // replacement. - if (MI.getOpcode() != AndSaveExecOpcode) - continue; + if (MI.getOpcode() != AndSaveExecOpcode) + return; + + Register SaveExecDest = MI.getOperand(0).getReg(); + if (!TRI->isSGPRReg(*MRI, SaveExecDest)) + return; - if (MachineInstr *VCmp = findPossibleVCMPVCMPXOptimization(MI, Exec)) - SaveExecVCmpMapping[&MI] = VCmp; + MachineOperand *SaveExecSrc0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0); + if (!SaveExecSrc0->isReg()) + return; + + // Tries to find a possibility to optimize a v_cmp ..., s_and_saveexec + // sequence by looking at an instance of a s_and_saveexec instruction. Returns + // a pointer to the v_cmp instruction if it is safe to replace the sequence + // (see the conditions in the function body). This is after register + // allocation, so some checks on operand dependencies need to be considered. + MachineInstr *VCmp = nullptr; + + // Try to find the last v_cmp instruction that defs the saveexec input + // operand without any write to Exec or the saveexec input operand inbetween. + VCmp = findInstrBackwards( + MI, + [&](MachineInstr *Check) { + return AMDGPU::getVCMPXOpFromVCMP(Check->getOpcode()) != -1 && + Check->modifiesRegister(SaveExecSrc0->getReg(), TRI); + }, + {Exec, SaveExecSrc0->getReg()}); + + if (!VCmp) + return; + + MachineOperand *VCmpDest = TII->getNamedOperand(*VCmp, AMDGPU::OpName::sdst); + assert(VCmpDest && "Should have an sdst operand!"); + + // Check if any of the v_cmp source operands is written by the saveexec. + MachineOperand *Src0 = TII->getNamedOperand(*VCmp, AMDGPU::OpName::src0); + if (Src0->isReg() && TRI->isSGPRReg(*MRI, Src0->getReg()) && + MI.modifiesRegister(Src0->getReg(), TRI)) + return; + + MachineOperand *Src1 = TII->getNamedOperand(*VCmp, AMDGPU::OpName::src1); + if (Src1->isReg() && TRI->isSGPRReg(*MRI, Src1->getReg()) && + MI.modifiesRegister(Src1->getReg(), TRI)) + return; + + // Don't do the transformation if the destination operand is included in + // it's MBB Live-outs, meaning it's used in any of it's successors, leading + // to incorrect code if the v_cmp and therefore the def of + // the dest operand is removed. + if (isLiveOut(*VCmp->getParent(), VCmpDest->getReg())) + return; + + // If the v_cmp target is in use between v_cmp and s_and_saveexec or after the + // s_and_saveexec, skip the optimization. + if (isRegisterInUseBetween(*VCmp, MI, VCmpDest->getReg(), false, true) || + isRegisterInUseAfter(MI, VCmpDest->getReg())) + return; + + // Try to determine if there is a write to any of the VCmp + // operands between the saveexec and the vcmp. + // If yes, additional VGPR spilling might need to be inserted. In this case, + // it's not worth replacing the instruction sequence. + SmallVector<MCRegister, 2> NonDefRegs; + if (Src0->isReg()) + NonDefRegs.push_back(Src0->getReg()); + + if (Src1->isReg()) + NonDefRegs.push_back(Src1->getReg()); + + if (!findInstrBackwards( + MI, [&](MachineInstr *Check) { return Check == VCmp; }, NonDefRegs)) + return; + + if (VCmp) + SaveExecVCmpMapping[&MI] = VCmp; +} + +// Record occurences of +// s_or_saveexec s_o, s_i +// s_xor exec, exec, s_o +// to be replaced with +// s_andn2_saveexec s_o, s_i. +void SIOptimizeExecMasking::tryRecordOrSaveexecXorSequence(MachineInstr &MI) { + const unsigned XorOpcode = + ST->isWave32() ? AMDGPU::S_XOR_B32 : AMDGPU::S_XOR_B64; + + if (MI.getOpcode() == XorOpcode && &MI != &MI.getParent()->front()) { + const MachineOperand &XorDst = MI.getOperand(0); + const MachineOperand &XorSrc0 = MI.getOperand(1); + const MachineOperand &XorSrc1 = MI.getOperand(2); + + if (XorDst.isReg() && XorDst.getReg() == Exec && XorSrc0.isReg() && + XorSrc1.isReg() && + (XorSrc0.getReg() == Exec || XorSrc1.getReg() == Exec)) { + const unsigned OrSaveexecOpcode = ST->isWave32() + ? AMDGPU::S_OR_SAVEEXEC_B32 + : AMDGPU::S_OR_SAVEEXEC_B64; + + // Peek at the previous instruction and check if this is a relevant + // s_or_saveexec instruction. + MachineInstr &PossibleOrSaveexec = *MI.getPrevNode(); + if (PossibleOrSaveexec.getOpcode() != OrSaveexecOpcode) + return; + + const MachineOperand &OrDst = PossibleOrSaveexec.getOperand(0); + const MachineOperand &OrSrc0 = PossibleOrSaveexec.getOperand(1); + if (OrDst.isReg() && OrSrc0.isReg()) { + if ((XorSrc0.getReg() == Exec && XorSrc1.getReg() == OrDst.getReg()) || + (XorSrc0.getReg() == OrDst.getReg() && XorSrc1.getReg() == Exec)) { + OrXors.emplace_back(&PossibleOrSaveexec, &MI); + } + } } } +} - for (const auto &Entry : SaveExecVCmpMapping) { - MachineInstr *SaveExecInstr = Entry.getFirst(); - MachineInstr *VCmpInstr = Entry.getSecond(); +bool SIOptimizeExecMasking::optimizeOrSaveexecXorSequences() { + if (OrXors.empty()) { + return false; + } - if (optimizeSingleVCMPSaveExecSequence(*SaveExecInstr, *VCmpInstr, Exec)) { - SaveExecInstr->eraseFromParent(); - VCmpInstr->eraseFromParent(); + bool Changed = false; + const unsigned Andn2Opcode = ST->isWave32() ? AMDGPU::S_ANDN2_SAVEEXEC_B32 + : AMDGPU::S_ANDN2_SAVEEXEC_B64; - Changed = true; - } + for (const auto &Pair : OrXors) { + MachineInstr *Or = nullptr; + MachineInstr *Xor = nullptr; + std::tie(Or, Xor) = Pair; + BuildMI(*Or->getParent(), Or->getIterator(), Or->getDebugLoc(), + TII->get(Andn2Opcode), Or->getOperand(0).getReg()) + .addReg(Or->getOperand(1).getReg()); + + Or->eraseFromParent(); + Xor->eraseFromParent(); + + Changed = true; } return Changed; @@ -736,9 +774,42 @@ bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) { TRI = ST->getRegisterInfo(); TII = ST->getInstrInfo(); MRI = &MF.getRegInfo(); + Exec = TRI->getExec(); bool Changed = optimizeExecSequence(); - Changed |= optimizeVCmpxAndSaveexecSequence(); + + OrXors.clear(); + SaveExecVCmpMapping.clear(); + static unsigned SearchWindow = 10; + for (MachineBasicBlock &MBB : MF) { + unsigned SearchCount = 0; + + for (auto &MI : llvm::reverse(MBB)) { + if (MI.isDebugInstr()) + continue; + + if (SearchCount >= SearchWindow) { + break; + } + + tryRecordOrSaveexecXorSequence(MI); + tryRecordVCmpxAndSaveexecSequence(MI); + + if (MI.modifiesRegister(Exec, TRI)) { + break; + } + + ++SearchCount; + } + } + + Changed |= optimizeOrSaveexecXorSequences(); + for (const auto &Entry : SaveExecVCmpMapping) { + MachineInstr *SaveExecInstr = Entry.getFirst(); + MachineInstr *VCmpInstr = Entry.getSecond(); + + Changed |= optimizeVCMPSaveExecSequence(*SaveExecInstr, *VCmpInstr, Exec); + } return Changed; } diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp index 57dbad468de8..aed84437b890 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp @@ -184,6 +184,16 @@ bool SIOptimizeExecMaskingPreRA::optimizeVcndVcmpPair(MachineBasicBlock &MBB) { if (isDefBetween(*TRI, LIS, CCReg, *Sel, *And)) return false; + // Cannot safely mirror live intervals with PHI nodes, so check for these + // before optimization. + SlotIndex SelIdx = LIS->getInstructionIndex(*Sel); + LiveInterval *SelLI = &LIS->getInterval(SelReg); + if (llvm::any_of(SelLI->vnis(), + [](const VNInfo *VNI) { + return VNI->isPHIDef(); + })) + return false; + // TODO: Guard against implicit def operands? LLVM_DEBUG(dbgs() << "Folding sequence:\n\t" << *Sel << '\t' << *Cmp << '\t' << *And); @@ -204,31 +214,34 @@ bool SIOptimizeExecMaskingPreRA::optimizeVcndVcmpPair(MachineBasicBlock &MBB) { LLVM_DEBUG(dbgs() << "=>\n\t" << *Andn2 << '\n'); - SlotIndex CmpIdx = LIS->getInstructionIndex(*Cmp); - SlotIndex SelIdx = LIS->getInstructionIndex(*Sel); - - LiveInterval *CmpLI = - CmpReg.isVirtual() ? &LIS->getInterval(CmpReg) : nullptr; - LiveInterval *SelLI = - SelReg.isVirtual() ? &LIS->getInterval(SelReg) : nullptr; - // Update live intervals for CCReg before potentially removing CmpReg/SelReg, // and their associated liveness information. + SlotIndex CmpIdx = LIS->getInstructionIndex(*Cmp); if (CCReg.isVirtual()) { - // Note: this ignores that SelLI might have multiple internal values - // or splits and simply extends the live range to cover all cases - // where the result of the v_cndmask_b32 was live (e.g. loops). - // This could yield worse register allocation in rare edge cases. - SlotIndex EndIdx = AndIdx.getRegSlot(); - if (SelLI && SelLI->endIndex() > EndIdx && SelLI->endIndex().isBlock()) - EndIdx = SelLI->endIndex(); + // Apply live ranges from SelLI to CCReg potentially matching splits + // and extending to loop boundaries. + + auto applyLiveRanges = [&](LiveRange &Dst, VNInfo *VNI) { + // Copy live ranges from SelLI, adjusting start and end as required + auto DefSegment = SelLI->FindSegmentContaining(SelIdx.getRegSlot()); + assert(DefSegment != SelLI->end() && + "No live interval segment covering definition?"); + for (auto I = DefSegment; I != SelLI->end(); ++I) { + SlotIndex Start = I->start < SelIdx.getRegSlot() ? + SelIdx.getRegSlot() : I->start; + SlotIndex End = I->end < AndIdx.getRegSlot() || I->end.isBlock() ? + I->end : AndIdx.getRegSlot(); + Dst.addSegment(LiveRange::Segment(Start, End, VNI)); + } + // If SelLI does not cover AndIdx (because Cmp killed Sel) then extend. + if (!SelLI->getSegmentContaining(AndIdx.getRegSlot())) + Dst.addSegment(LiveRange::Segment(CmpIdx.getRegSlot(), AndIdx.getRegSlot(), VNI)); + }; LiveInterval &CCLI = LIS->getInterval(CCReg); auto CCQ = CCLI.Query(SelIdx.getRegSlot()); - if (CCQ.valueIn()) { - CCLI.addSegment(LiveRange::Segment(SelIdx.getRegSlot(), - EndIdx, CCQ.valueIn())); - } + if (CCQ.valueIn()) + applyLiveRanges(CCLI, CCQ.valueIn()); if (CC->getSubReg()) { LaneBitmask Mask = TRI->getSubRegIndexLaneMask(CC->getSubReg()); @@ -237,10 +250,8 @@ bool SIOptimizeExecMaskingPreRA::optimizeVcndVcmpPair(MachineBasicBlock &MBB) { Allocator, Mask, [=](LiveInterval::SubRange &SR) { auto CCQS = SR.Query(SelIdx.getRegSlot()); - if (CCQS.valueIn()) { - SR.addSegment(LiveRange::Segment( - SelIdx.getRegSlot(), EndIdx, CCQS.valueIn())); - } + if (CCQS.valueIn()) + applyLiveRanges(SR, CCQS.valueIn()); }, *LIS->getSlotIndexes(), *TRI); CCLI.removeEmptySubRanges(); @@ -253,7 +264,8 @@ bool SIOptimizeExecMaskingPreRA::optimizeVcndVcmpPair(MachineBasicBlock &MBB) { // Try to remove compare. Cmp value should not used in between of cmp // and s_and_b64 if VCC or just unused if any other register. - if ((CmpReg.isVirtual() && CmpLI && CmpLI->Query(AndIdx.getRegSlot()).isKill()) || + LiveInterval *CmpLI = CmpReg.isVirtual() ? &LIS->getInterval(CmpReg) : nullptr; + if ((CmpLI && CmpLI->Query(AndIdx.getRegSlot()).isKill()) || (CmpReg == Register(CondReg) && std::none_of(std::next(Cmp->getIterator()), Andn2->getIterator(), [&](const MachineInstr &MI) { @@ -266,18 +278,16 @@ bool SIOptimizeExecMaskingPreRA::optimizeVcndVcmpPair(MachineBasicBlock &MBB) { Cmp->eraseFromParent(); // Try to remove v_cndmask_b32. - if (SelLI) { - // Kill status must be checked before shrinking the live range. - bool IsKill = SelLI->Query(CmpIdx.getRegSlot()).isKill(); - LIS->shrinkToUses(SelLI); - bool IsDead = SelLI->Query(SelIdx.getRegSlot()).isDeadDef(); - if (MRI->use_nodbg_empty(SelReg) && (IsKill || IsDead)) { - LLVM_DEBUG(dbgs() << "Erasing: " << *Sel << '\n'); - - LIS->removeVRegDefAt(*SelLI, SelIdx.getRegSlot()); - LIS->RemoveMachineInstrFromMaps(*Sel); - Sel->eraseFromParent(); - } + // Kill status must be checked before shrinking the live range. + bool IsKill = SelLI->Query(CmpIdx.getRegSlot()).isKill(); + LIS->shrinkToUses(SelLI); + bool IsDead = SelLI->Query(SelIdx.getRegSlot()).isDeadDef(); + if (MRI->use_nodbg_empty(SelReg) && (IsKill || IsDead)) { + LLVM_DEBUG(dbgs() << "Erasing: " << *Sel << '\n'); + + LIS->removeVRegDefAt(*SelLI, SelIdx.getRegSlot()); + LIS->RemoveMachineInstrFromMaps(*Sel); + Sel->eraseFromParent(); } } diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIProgramInfo.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIProgramInfo.h index b13afceba20e..553fb4cf496c 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIProgramInfo.h +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIProgramInfo.h @@ -49,6 +49,8 @@ struct SIProgramInfo { uint32_t AccumOffset = 0; uint32_t TgSplit = 0; uint32_t NumSGPR = 0; + unsigned SGPRSpill = 0; + unsigned VGPRSpill = 0; uint32_t LDSSize = 0; bool FlatUsed = false; diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SMInstructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SMInstructions.td index 882d13402a19..b7e8eadfe71d 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SMInstructions.td +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SMInstructions.td @@ -119,13 +119,19 @@ class SM_Probe_Pseudo <string opName, string variant, RegisterClass baseClass, let PseudoInstr = opName # variant; } -class SM_Load_Pseudo <string opName, dag outs, dag ins, string asmOps, list<dag> pattern=[]> - : SM_Pseudo<opName, outs, ins, asmOps, pattern> { - RegisterClass BaseClass; +class SM_Load_Pseudo <string opName, RegisterClass baseClass, + RegisterClass dstClass, OffsetMode offsets> + : SM_Pseudo<opName, (outs dstClass:$sdst), + !con((ins baseClass:$sbase), offsets.Ins, (ins CPol:$cpol)), + " $sdst, $sbase, " # offsets.Asm # "$cpol", []> { + RegisterClass BaseClass = baseClass; let mayLoad = 1; let mayStore = 0; let has_glc = 1; let has_dlc = 1; + let has_offset = offsets.HasOffset; + let has_soffset = offsets.HasSOffset; + let PseudoInstr = opName # offsets.Variant; } class SM_Store_Pseudo <string opName, RegisterClass baseClass, @@ -158,40 +164,9 @@ class SM_Discard_Pseudo <string opName, string variant, dag offsets, multiclass SM_Pseudo_Loads<string opName, RegisterClass baseClass, RegisterClass dstClass> { - def _IMM : SM_Load_Pseudo <opName, - (outs dstClass:$sdst), - (ins baseClass:$sbase, i32imm:$offset, CPol:$cpol), - " $sdst, $sbase, $offset$cpol", []> { - let has_offset = 1; - let BaseClass = baseClass; - let PseudoInstr = opName # "_IMM"; - let has_glc = 1; - let has_dlc = 1; - } - - def _SGPR : SM_Load_Pseudo <opName, - (outs dstClass:$sdst), - (ins baseClass:$sbase, SReg_32:$soffset, CPol:$cpol), - " $sdst, $sbase, $soffset$cpol", []> { - let has_soffset = 1; - let BaseClass = baseClass; - let PseudoInstr = opName # "_SGPR"; - let has_glc = 1; - let has_dlc = 1; - } - - def _SGPR_IMM : SM_Load_Pseudo <opName, - (outs dstClass:$sdst), - (ins baseClass:$sbase, SReg_32:$soffset, - i32imm:$offset, CPol:$cpol), - " $sdst, $sbase, $soffset$offset$cpol", []> { - let has_offset = 1; - let has_soffset = 1; - let BaseClass = baseClass; - let PseudoInstr = opName # "_SGPR_IMM"; - let has_glc = 1; - let has_dlc = 1; - } + def _IMM : SM_Load_Pseudo <opName, baseClass, dstClass, IMM_Offset>; + def _SGPR : SM_Load_Pseudo <opName, baseClass, dstClass, SGPR_Offset>; + def _SGPR_IMM : SM_Load_Pseudo <opName, baseClass, dstClass, SGPR_IMM_Offset>; } multiclass SM_Pseudo_Stores<string opName, @@ -596,10 +571,10 @@ class SMEM_Real_vi <bits<8> op, SM_Pseudo ps> soffset{6-0}, ?); } -class SMEM_Real_Load_vi<bits<8> op, string ps, dag offsets> - : SMEM_Real_vi<op, !cast<SM_Pseudo>(ps)> { - RegisterClass BaseClass = !cast<SM_Load_Pseudo>(ps).BaseClass; - let InOperandList = !con((ins BaseClass:$sbase), offsets, (ins CPol:$cpol)); +class SMEM_Real_Load_vi<bits<8> op, string ps, OffsetMode offsets> + : SMEM_Real_vi<op, !cast<SM_Pseudo>(ps # offsets.Variant)> { + RegisterClass BaseClass = !cast<SM_Load_Pseudo>(ps # offsets.Variant).BaseClass; + let InOperandList = !con((ins BaseClass:$sbase), offsets.Ins, (ins CPol:$cpol)); } // The alternative GFX9 SGPR encoding using soffset to encode the @@ -614,14 +589,12 @@ class SMEM_Real_SGPR_alt_gfx9 { } multiclass SM_Real_Loads_vi<bits<8> op, string ps> { - def _IMM_vi : SMEM_Real_Load_vi <op, ps#"_IMM", (ins smem_offset:$offset)>; - def _SGPR_vi : SMEM_Real_Load_vi <op, ps#"_SGPR", (ins SReg_32:$soffset)>; - def _SGPR_alt_gfx9 : SMEM_Real_Load_vi <op, ps#"_SGPR", - (ins SReg_32:$soffset)>, + def _IMM_vi : SMEM_Real_Load_vi <op, ps, IMM_Offset>; + def _SGPR_vi : SMEM_Real_Load_vi <op, ps, SGPR_Offset>; + def _SGPR_alt_gfx9 : SMEM_Real_Load_vi <op, ps, SGPR_Offset>, SMEM_Real_SGPR_alt_gfx9; let IsGFX9SpecificEncoding = true in - def _SGPR_IMM_gfx9 : SMEM_Real_Load_vi < - op, ps#"_SGPR_IMM", (ins SReg_32:$soffset, smem_offset_mod:$offset)>; + def _SGPR_IMM_gfx9 : SMEM_Real_Load_vi <op, ps, SGPR_IMM_Offset>; } class SMEM_Real_Store_Base_vi <bits<8> op, SM_Pseudo ps> : SMEM_Real_vi <op, ps> { @@ -883,6 +856,7 @@ def smrd_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{ return isUniformL def SMRDImm : ComplexPattern<iPTR, 2, "SelectSMRDImm">; def SMRDImm32 : ComplexPattern<iPTR, 2, "SelectSMRDImm32">; def SMRDSgpr : ComplexPattern<iPTR, 2, "SelectSMRDSgpr">; +def SMRDSgprImm : ComplexPattern<iPTR, 3, "SelectSMRDSgprImm">; def SMRDBufferImm : ComplexPattern<iPTR, 1, "SelectSMRDBufferImm">; def SMRDBufferImm32 : ComplexPattern<iPTR, 1, "SelectSMRDBufferImm32">; @@ -903,11 +877,18 @@ multiclass SMRD_Pattern <string Instr, ValueType vt> { // 3. SGPR offset def : GCNPat < - (smrd_load (SMRDSgpr i64:$sbase, i32:$offset)), - (vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $offset, 0)) + (smrd_load (SMRDSgpr i64:$sbase, i32:$soffset)), + (vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $soffset, 0)) >; - // 4. No offset + // 4. SGPR+IMM offset + def : GCNPat < + (smrd_load (SMRDSgprImm i64:$sbase, i32:$soffset, i32:$offset)), + (vt (!cast<SM_Pseudo>(Instr#"_SGPR_IMM") $sbase, $soffset, $offset, 0))> { + let OtherPredicates = [isGFX9Plus]; + } + + // 5. No offset def : GCNPat < (vt (smrd_load (i64 SReg_64:$sbase))), (vt (!cast<SM_Pseudo>(Instr#"_IMM") i64:$sbase, 0, 0)) @@ -1021,19 +1002,16 @@ class SMEM_Real_gfx10<bits<8> op, SM_Pseudo ps> let Inst{16} = !if(ps.has_glc, cpol{CPolBit.GLC}, ?); } -multiclass SM_Real_Loads_gfx10<bits<8> op, string ps, - SM_Load_Pseudo immPs = !cast<SM_Load_Pseudo>(ps#_IMM), - SM_Load_Pseudo sgprPs = !cast<SM_Load_Pseudo>(ps#_SGPR)> { - def _IMM_gfx10 : SMEM_Real_gfx10<op, immPs> { - let InOperandList = (ins immPs.BaseClass:$sbase, smem_offset:$offset, CPol:$cpol); - } - def _SGPR_gfx10 : SMEM_Real_gfx10<op, sgprPs> { - let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$soffset, CPol:$cpol); - } - def _SGPR_IMM_gfx10 : SMEM_Real_gfx10<op, !cast<SM_Load_Pseudo>(ps#_SGPR_IMM)> { - let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$soffset, - smem_offset_mod:$offset, CPol:$cpol); - } +class SMEM_Real_Load_gfx10<bits<8> op, string ps, OffsetMode offsets> + : SMEM_Real_gfx10<op, !cast<SM_Pseudo>(ps # offsets.Variant)> { + RegisterClass BaseClass = !cast<SM_Load_Pseudo>(ps # offsets.Variant).BaseClass; + let InOperandList = !con((ins BaseClass:$sbase), offsets.Ins, (ins CPol:$cpol)); +} + +multiclass SM_Real_Loads_gfx10<bits<8> op, string ps> { + def _IMM_gfx10 : SMEM_Real_Load_gfx10<op, ps, IMM_Offset>; + def _SGPR_gfx10 : SMEM_Real_Load_gfx10<op, ps, SGPR_Offset>; + def _SGPR_IMM_gfx10 : SMEM_Real_Load_gfx10<op, ps, SGPR_IMM_Offset>; } class SMEM_Real_Store_gfx10<bits<8> op, SM_Pseudo ps> : SMEM_Real_gfx10<op, ps> { @@ -1227,17 +1205,16 @@ class SMEM_Real_gfx11<bits<8> op, SM_Pseudo ps, string opName = ps.Mnemonic> : let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, 0); } -class SMEM_Real_Load_gfx11<bits<8> op, string ps, string opName, dag offsets> : - SMEM_Real_gfx11<op, !cast<SM_Pseudo>(ps), opName> { - RegisterClass BaseClass = !cast<SM_Load_Pseudo>(ps).BaseClass; - let InOperandList = !con((ins BaseClass:$sbase), offsets, (ins CPol:$cpol)); +class SMEM_Real_Load_gfx11<bits<8> op, string ps, string opName, OffsetMode offsets> : + SMEM_Real_gfx11<op, !cast<SM_Pseudo>(ps # offsets.Variant), opName> { + RegisterClass BaseClass = !cast<SM_Load_Pseudo>(ps # offsets.Variant).BaseClass; + let InOperandList = !con((ins BaseClass:$sbase), offsets.Ins, (ins CPol:$cpol)); } multiclass SM_Real_Loads_gfx11<bits<8> op, string ps, string opName> { - def _IMM_gfx11 : SMEM_Real_Load_gfx11<op, ps#"_IMM", opName, (ins smem_offset:$offset)>; - def _SGPR_gfx11 : SMEM_Real_Load_gfx11<op, ps#"_SGPR", opName, (ins SReg_32:$soffset)>; - def _SGPR_IMM_gfx11 : SMEM_Real_Load_gfx11< - op, ps#"_SGPR_IMM", opName, (ins SReg_32:$soffset, smem_offset_mod:$offset)>; + def _IMM_gfx11 : SMEM_Real_Load_gfx11<op, ps, opName, IMM_Offset>; + def _SGPR_gfx11 : SMEM_Real_Load_gfx11<op, ps, opName, SGPR_Offset>; + def _SGPR_IMM_gfx11 : SMEM_Real_Load_gfx11<op, ps, opName, SGPR_IMM_Offset>; def : MnemonicAlias<!cast<SM_Pseudo>(ps#"_IMM").Mnemonic, opName>, Requires<[isGFX11Plus]>; } diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 2f334e211181..b5fb390c08e1 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -417,9 +417,9 @@ bool getMAIIsGFX940XDL(unsigned Opc) { CanBeVOPD getCanBeVOPD(unsigned Opc) { const VOPDComponentInfo *Info = getVOPDComponentHelper(Opc); if (Info) - return {Info->CanBeVOPDX, 1}; + return {Info->CanBeVOPDX, true}; else - return {0, 0}; + return {false, false}; } unsigned getVOPDOpcode(unsigned Opc) { diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h index 65ed02ca62de..a2d59abd3abb 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h @@ -30,7 +30,7 @@ namespace AMDGPU { Align getAlign(DataLayout const &DL, const GlobalVariable *GV); std::vector<GlobalVariable *> findVariablesToLower(Module &M, - const Function *F = nullptr); + const Function *F); /// Replace all uses of constant \p C with instructions in \p F. void replaceConstantUsesInFunction(ConstantExpr *C, const Function *F); diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP1Instructions.td index 1d374a9f90ba..73e4eb8cdc24 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -499,6 +499,59 @@ let SubtargetPredicate = isGFX9Only in { defm V_SCREEN_PARTITION_4SE_B32 : VOP1Inst <"v_screen_partition_4se_b32", VOP_I32_I32>; } // End SubtargetPredicate = isGFX9Only +class VOPProfile_Base_CVT_F32_F8<ValueType vt> : VOPProfileI2F <vt, i32> { + let HasExtSDWA = 1; + let HasExtSDWA9 = 1; + let HasExt = 1; + let DstRCSDWA = getVALUDstForVT<vt>.ret; + let InsSDWA = (ins Bin32SDWAInputMods:$src0_modifiers, Src0SDWA:$src0, + clampmod:$clamp, omod:$omod, src0_sel:$src0_sel); + let AsmSDWA = "$vdst, $src0_modifiers$clamp$omod $src0_sel"; // No dst_sel + let AsmSDWA9 = AsmSDWA; + let EmitDstSel = 0; +} + +def VOPProfileCVT_F32_F8 : VOPProfile_Base_CVT_F32_F8 <f32>; +def VOPProfileCVT_PK_F32_F8 : VOPProfile_Base_CVT_F32_F8 <v2f32>; + +let SubtargetPredicate = HasFP8Insts, mayRaiseFPException = 0, + SchedRW = [WriteFloatCvt] in { + defm V_CVT_F32_FP8 : VOP1Inst<"v_cvt_f32_fp8", VOPProfileCVT_F32_F8>; + defm V_CVT_F32_BF8 : VOP1Inst<"v_cvt_f32_bf8", VOPProfileCVT_F32_F8>; + defm V_CVT_PK_F32_FP8 : VOP1Inst<"v_cvt_pk_f32_fp8", VOPProfileCVT_PK_F32_F8>; + defm V_CVT_PK_F32_BF8 : VOP1Inst<"v_cvt_pk_f32_bf8", VOPProfileCVT_PK_F32_F8>; +} + +class Cvt_F32_F8_Pat<SDPatternOperator node, int index, + VOP1_Pseudo inst_e32, VOP1_SDWA_Pseudo inst_sdwa> : GCNPat< + (f32 (node i32:$src, index)), + !if (index, + (inst_sdwa 0, $src, 0, 0, index), + (inst_e32 $src)) +>; + +foreach Index = [0, 1, 2, 3] in { + def : Cvt_F32_F8_Pat<int_amdgcn_cvt_f32_fp8, Index, + V_CVT_F32_FP8_e32, V_CVT_F32_FP8_sdwa>; + def : Cvt_F32_F8_Pat<int_amdgcn_cvt_f32_bf8, Index, + V_CVT_F32_BF8_e32, V_CVT_F32_BF8_sdwa>; +} + +class Cvt_PK_F32_F8_Pat<SDPatternOperator node, int index, + VOP1_Pseudo inst_e32, VOP1_SDWA_Pseudo inst_sdwa> : GCNPat< + (v2f32 (node i32:$src, index)), + !if (index, + (inst_sdwa 0, $src, 0, 0, SDWA.WORD_1), + (inst_e32 $src)) +>; + +foreach Index = [0, -1] in { + def : Cvt_PK_F32_F8_Pat<int_amdgcn_cvt_pk_f32_fp8, Index, + V_CVT_PK_F32_FP8_e32, V_CVT_PK_F32_FP8_sdwa>; + def : Cvt_PK_F32_F8_Pat<int_amdgcn_cvt_pk_f32_bf8, Index, + V_CVT_PK_F32_BF8_e32, V_CVT_PK_F32_BF8_sdwa>; +} + let SubtargetPredicate = isGFX10Plus in { defm V_PIPEFLUSH : VOP1Inst<"v_pipeflush", VOP_NO_EXT<VOP_NONE>>; @@ -1106,11 +1159,36 @@ multiclass VOP1_Real_gfx9 <bits<10> op> { } +multiclass VOP1_Real_NoDstSel_SDWA_gfx9 <bits<10> op> { + let AssemblerPredicate = isGFX9Only, DecoderNamespace = "GFX9" in { + defm NAME : VOP1_Real_e32e64_vi <op>; + } + + foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9>.ret in + def _sdwa_gfx9 : + VOP_SDWA9_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>, + VOP1_SDWA9Ae <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl> { + let Inst{42-40} = 6; + } + + foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in + def _dpp_gfx9 : + VOP_DPP_Real<!cast<VOP1_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX9>, + VOP1_DPPe<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp")>; +} + defm V_SCREEN_PARTITION_4SE_B32 : VOP1_Real_gfx9 <0x37>; let AssemblerPredicate = isGFX940Plus, DecoderNamespace = "GFX9" in defm V_MOV_B64 : VOP1_Real_gfx9 <0x38>; +let OtherPredicates = [HasFP8Insts] in { +defm V_CVT_F32_FP8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x54>; +defm V_CVT_F32_BF8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x55>; +defm V_CVT_PK_F32_FP8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x56>; +defm V_CVT_PK_F32_BF8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x57>; +} + //===----------------------------------------------------------------------===// // GFX10 //===----------------------------------------------------------------------===// diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3Instructions.td index dddd0aacc140..a911483cade5 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -481,6 +481,30 @@ def shl_0_to_4 : PatFrag< }]; } +def VOP3_CVT_PK_F8_F32_Profile : VOP3_Profile<VOP_I32_F32_F32, VOP3_OPSEL> { + let InsVOP3OpSel = (ins FP32InputMods:$src0_modifiers, Src0RC64:$src0, + FP32InputMods:$src1_modifiers, Src1RC64:$src1, + VGPR_32:$vdst_in, op_sel0:$op_sel); + let HasClamp = 0; + let HasExtVOP3DPP = 0; +} + +def VOP3_CVT_SR_F8_F32_Profile : VOP3_Profile<VOPProfile<[i32, f32, i32, f32]>, + VOP3_OPSEL> { + let InsVOP3OpSel = (ins FP32InputMods:$src0_modifiers, Src0RC64:$src0, + FP32InputMods:$src1_modifiers, Src1RC64:$src1, + FP32InputMods:$src2_modifiers, VGPR_32:$src2, + op_sel0:$op_sel); + let HasClamp = 0; + let HasSrc2 = 0; + let HasSrc2Mods = 1; + let AsmVOP3OpSel = !subst(", $src2_modifiers", "", + getAsmVOP3OpSel<3, HasClamp, + HasSrc0FloatMods, HasSrc1FloatMods, + HasSrc2FloatMods>.ret); + let HasExtVOP3DPP = 0; +} + let SubtargetPredicate = isGFX9Plus in { let isCommutable = 1, isReMaterializable = 1 in { defm V_ADD3_U32 : VOP3Inst <"v_add3_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>; @@ -526,6 +550,43 @@ defm V_LSHL_OR_B32 : VOP3Inst <"v_lshl_or_b32", VOP3_Profile<VOP_I32_I32_I32_I32 let SubtargetPredicate = isGFX940Plus in defm V_LSHL_ADD_U64 : VOP3Inst <"v_lshl_add_u64", VOP3_Profile<VOP_I64_I64_I32_I64>>; +let SubtargetPredicate = HasFP8Insts, mayRaiseFPException = 0, + SchedRW = [WriteFloatCvt] in { + let Constraints = "$vdst = $vdst_in", DisableEncoding = "$vdst_in" in { + defm V_CVT_PK_FP8_F32 : VOP3Inst<"v_cvt_pk_fp8_f32", VOP3_CVT_PK_F8_F32_Profile>; + defm V_CVT_PK_BF8_F32 : VOP3Inst<"v_cvt_pk_bf8_f32", VOP3_CVT_PK_F8_F32_Profile>; + } + + // These instructions have non-standard use of op_sel. In particular they are + // using op_sel bits 2 and 3 while only having two sources. Therefore dummy + // src2 is used to hold the op_sel value. + let Constraints = "$vdst = $src2", DisableEncoding = "$src2" in { + defm V_CVT_SR_FP8_F32 : VOP3Inst<"v_cvt_sr_fp8_f32", VOP3_CVT_SR_F8_F32_Profile>; + defm V_CVT_SR_BF8_F32 : VOP3Inst<"v_cvt_sr_bf8_f32", VOP3_CVT_SR_F8_F32_Profile>; + } +} + +class Cvt_PK_F8_F32_Pat<SDPatternOperator node, int index, VOP3_Pseudo inst> : GCNPat< + (i32 (node f32:$src0, f32:$src1, i32:$old, index)), + (inst !if(index, SRCMODS.DST_OP_SEL, 0), $src0, 0, $src1, $old, !if(index, SRCMODS.OP_SEL_0, 0)) +>; + +class Cvt_SR_F8_F32_Pat<SDPatternOperator node, bits<2> index, VOP3_Pseudo inst> : GCNPat< + (i32 (node f32:$src0, i32:$src1, i32:$old, index)), + (inst !if(index{1}, SRCMODS.DST_OP_SEL, 0), $src0, 0, $src1, + !if(index{0}, SRCMODS.OP_SEL_0, 0), $old, !if(index{1}, SRCMODS.OP_SEL_0, 0)) +>; + +foreach Index = [0, -1] in { + def : Cvt_PK_F8_F32_Pat<int_amdgcn_cvt_pk_fp8_f32, Index, V_CVT_PK_FP8_F32_e64>; + def : Cvt_PK_F8_F32_Pat<int_amdgcn_cvt_pk_bf8_f32, Index, V_CVT_PK_BF8_F32_e64>; +} + +foreach Index = [0, 1, 2, 3] in { + def : Cvt_SR_F8_F32_Pat<int_amdgcn_cvt_sr_fp8_f32, Index, V_CVT_SR_FP8_F32_e64>; + def : Cvt_SR_F8_F32_Pat<int_amdgcn_cvt_sr_bf8_f32, Index, V_CVT_SR_BF8_F32_e64>; +} + class ThreeOp_i32_Pats <SDPatternOperator op1, SDPatternOperator op2, Instruction inst> : GCNPat < // This matches (op2 (op1 i32:$src0, i32:$src1), i32:$src2) with conditions. (ThreeOpFrag<op1, op2> i32:$src0, i32:$src1, i32:$src2), @@ -699,15 +760,19 @@ def : DivFmasPat<f64, V_DIV_FMAS_F64_e64, VCC_LO>; } class VOP3_DOT_Profile<VOPProfile P, VOP3Features Features = VOP3_REGULAR> : VOP3_Profile<P, Features> { - // FIXME VOP3 DPP versions are unsupported - let HasExtVOP3DPP = 0; let HasClamp = 0; let HasOMod = 0; - let InsVOP3OpSel = getInsVOP3OpSel<Src0RC64, Src1RC64, Src2RC64, - NumSrcArgs, HasClamp, HasOMod, - !if(isFloatType<Src0VT>.ret, FPVRegInputMods, IntOpSelMods), - !if(isFloatType<Src1VT>.ret, FPVRegInputMods, IntOpSelMods), - !if(isFloatType<Src2VT>.ret, FPVRegInputMods, IntOpSelMods)>.ret; + // Override modifiers for bf16(i16) (same as float modifiers). + let HasSrc0Mods = 1; + let HasSrc1Mods = 1; + let HasSrc2Mods = 1; + let Src0ModDPP = FPVRegInputMods; + let Src1ModDPP = FPVRegInputMods; + let Src2ModVOP3DPP = FPVRegInputMods; + let InsVOP3OpSel = getInsVOP3OpSel<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs, + HasClamp, HasOMod, FPVRegInputMods, + FPVRegInputMods, FPVRegInputMods>.ret; + let AsmVOP3OpSel = getAsmVOP3OpSel<NumSrcArgs, HasClamp, 1, 1, 1>.ret; } let SubtargetPredicate = isGFX11Plus in { @@ -723,7 +788,7 @@ let SubtargetPredicate = isGFX11Plus in { defm V_CVT_PK_U16_F32 : VOP3Inst<"v_cvt_pk_u16_f32", VOP3_Profile<VOP_V2I16_F32_F32>>; } // End SubtargetPredicate = isGFX11Plus -let SubtargetPredicate = HasDot8Insts in { +let SubtargetPredicate = HasDot8Insts, IsDOT=1 in { defm V_DOT2_F16_F16 : VOP3Inst<"v_dot2_f16_f16", VOP3_DOT_Profile<VOP_F16_V2F16_V2F16_F16>, int_amdgcn_fdot2_f16_f16>; defm V_DOT2_BF16_BF16 : VOP3Inst<"v_dot2_bf16_bf16", VOP3_DOT_Profile<VOP_I16_V2I16_V2I16_I16>, int_amdgcn_fdot2_bf16_bf16>; } @@ -848,9 +913,8 @@ defm V_MAXMIN_U32 : VOP3_Realtriple_gfx11<0x262>; defm V_MINMAX_U32 : VOP3_Realtriple_gfx11<0x263>; defm V_MAXMIN_I32 : VOP3_Realtriple_gfx11<0x264>; defm V_MINMAX_I32 : VOP3_Realtriple_gfx11<0x265>; -// FIXME VOP3 DPP Dot instructions are unsupported -defm V_DOT2_F16_F16 : VOP3_Real_Base_gfx11<0x266>; -defm V_DOT2_BF16_BF16 : VOP3_Real_Base_gfx11<0x267>; +defm V_DOT2_F16_F16 : VOP3Dot_Realtriple_gfx11<0x266>; +defm V_DOT2_BF16_BF16 : VOP3Dot_Realtriple_gfx11<0x267>; defm V_DIV_SCALE_F32 : VOP3be_Real_gfx11<0x2fc, "V_DIV_SCALE_F32", "v_div_scale_f32">; defm V_DIV_SCALE_F64 : VOP3be_Real_gfx11<0x2fd, "V_DIV_SCALE_F64", "v_div_scale_f64">; defm V_MAD_U64_U32_gfx11 : VOP3be_Real_gfx11<0x2fe, "V_MAD_U64_U32_gfx11", "v_mad_u64_u32">; @@ -1161,6 +1225,13 @@ multiclass VOP3OpSel_Real_gfx9<bits<10> op> { VOP3OpSel_gfx9 <op, !cast<VOP_Pseudo>(NAME#"_e64").Pfl>; } +multiclass VOP3OpSel_Real_gfx9_forced_opsel2<bits<10> op> { + def _vi : VOP3_Real<!cast<VOP_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>, + VOP3OpSel_gfx9 <op, !cast<VOP_Pseudo>(NAME#"_e64").Pfl> { + let Inst{13} = src2_modifiers{2}; // op_sel(2) + } +} + multiclass VOP3Interp_Real_vi<bits<10> op> { def _vi : VOP3_Real<!cast<VOP_Pseudo>(NAME), SIEncodingFamily.VI>, VOP3Interp_vi <op, !cast<VOP_Pseudo>(NAME).Pfl>; @@ -1352,3 +1423,10 @@ defm V_CVT_PKNORM_I16_F16 : VOP3OpSel_Real_gfx9 <0x299>; defm V_CVT_PKNORM_U16_F16 : VOP3OpSel_Real_gfx9 <0x29a>; defm V_LSHL_ADD_U64 : VOP3_Real_vi <0x208>; + +let OtherPredicates = [HasFP8Insts] in { +defm V_CVT_PK_FP8_F32 : VOP3OpSel_Real_gfx9 <0x2a2>; +defm V_CVT_PK_BF8_F32 : VOP3OpSel_Real_gfx9 <0x2a3>; +defm V_CVT_SR_FP8_F32 : VOP3OpSel_Real_gfx9_forced_opsel2 <0x2a4>; +defm V_CVT_SR_BF8_F32 : VOP3OpSel_Real_gfx9_forced_opsel2 <0x2a5>; +} diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3PInstructions.td index 59ce532af59b..f1ce613d613b 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3PInstructions.td +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3PInstructions.td @@ -493,6 +493,8 @@ def VOPProfileMAI_I32_I64_X16 : VOPProfileMAI<VOP_V4I32_I64_I64_V4I32, A def VOPProfileMAI_I32_I64_X32 : VOPProfileMAI<VOP_V16I32_I64_I64_V16I32, AISrc_512_b32, ADst_512, AVSrc_64>; def VOPProfileMAI_F32_V2F32_X16 : VOPProfileMAI<VOP_V4F32_V2F32_V2F32_V4F32, AISrc_128_b32, ADst_128, AVSrc_64>; def VOPProfileMAI_F32_V2F32_X32 : VOPProfileMAI<VOP_V16F32_V2F32_V2F32_V16F32, AISrc_512_b32, ADst_512, AVSrc_64>; +def VOPProfileMAI_F32_I64_X32 : VOPProfileMAI<VOP_V4F32_I64_I64_V4F32, AISrc_128_b32, ADst_128, AVSrc_64>; +def VOPProfileMAI_F32_I64_X16 : VOPProfileMAI<VOP_V16F32_I64_I64_V16F32, AISrc_512_b32, ADst_512, AVSrc_64>; def VOPProfileMAI_F32_F32_X4_VCD : VOPProfileMAI<VOP_V4F32_F32_F32_V4F32, VISrc_128_f32, VDst_128>; def VOPProfileMAI_F32_F32_X16_VCD : VOPProfileMAI<VOP_V16F32_F32_F32_V16F32, VISrc_512_f32, VDst_512>; @@ -515,6 +517,8 @@ def VOPProfileMAI_I32_I64_X16_VCD : VOPProfileMAI<VOP_V4I32_I64_I64_V4I32, def VOPProfileMAI_I32_I64_X32_VCD : VOPProfileMAI<VOP_V16I32_I64_I64_V16I32, VISrc_512_b32, VDst_512, AVSrc_64>; def VOPProfileMAI_F32_V2F32_X16_VCD : VOPProfileMAI<VOP_V4F32_V2F32_V2F32_V4F32, VISrc_128_b32, VDst_128, AVSrc_64>; def VOPProfileMAI_F32_V2F32_X32_VCD : VOPProfileMAI<VOP_V16F32_V2F32_V2F32_V16F32, VISrc_512_b32, VDst_512, AVSrc_64>; +def VOPProfileMAI_F32_I64_X32_VCD : VOPProfileMAI<VOP_V4F32_I64_I64_V4F32, VISrc_128_b32, VDst_128, AVSrc_64>; +def VOPProfileMAI_F32_I64_X16_VCD : VOPProfileMAI<VOP_V16F32_I64_I64_V16F32, VISrc_512_b32, VDst_512, AVSrc_64>; def VOPProfileSMFMAC_F32_16X16X32_F16 : VOPProfileSMFMAC<VOP_V4F32_V4F16_V8F16_I32, AVDst_128, AVSrc_64, AVSrc_128>; def VOPProfileSMFMAC_F32_32X32X16_F16 : VOPProfileSMFMAC<VOP_V16F32_V4F16_V8F16_I32, AVDst_512, AVSrc_64, AVSrc_128>; @@ -522,6 +526,8 @@ def VOPProfileSMFMAC_F32_16X16X32_I16 : VOPProfileSMFMAC<VOP_V4F32_V4I16_V8I16_I def VOPProfileSMFMAC_F32_32X32X16_I16 : VOPProfileSMFMAC<VOP_V16F32_V4I16_V8I16_I32, AVDst_512, AVSrc_64, AVSrc_128>; def VOPProfileSMFMAC_I32_16X16X64_I8 : VOPProfileSMFMAC<VOP_V4I32_V2I32_V4I32_I32, AVDst_128, AVSrc_64, AVSrc_128>; def VOPProfileSMFMAC_I32_32X32X32_I8 : VOPProfileSMFMAC<VOP_V16I32_V2I32_V4I32_I32, AVDst_512, AVSrc_64, AVSrc_128>; +def VOPProfileSMFMAC_F32_16X16X64_F8 : VOPProfileSMFMAC<VOP_V4F32_V2I32_V4I32_I32, AVDst_128, AVSrc_64, AVSrc_128>; +def VOPProfileSMFMAC_F32_32X32X32_F8 : VOPProfileSMFMAC<VOP_V16F32_V2I32_V4I32_I32, AVDst_512, AVSrc_64, AVSrc_128>; class MFMATable <bit is_mac, string Name> { bit IsMac = is_mac; @@ -638,6 +644,14 @@ let Predicates = [isGFX940Plus], is_gfx940_xdl = 1 in { defm V_MFMA_I32_16X16X32I8 : MAIInst<"v_mfma_i32_16x16x32i8", "I32_I64_X16", int_amdgcn_mfma_i32_16x16x32_i8>; defm V_MFMA_F32_16X16X8XF32 : MAIInst<"v_mfma_f32_16x16x8xf32", "F32_V2F32_X16", int_amdgcn_mfma_f32_16x16x8_xf32>; defm V_MFMA_F32_32X32X4XF32 : MAIInst<"v_mfma_f32_32x32x4xf32", "F32_V2F32_X32", int_amdgcn_mfma_f32_32x32x4_xf32>; + defm V_MFMA_F32_16X16X32_BF8_BF8 : MAIInst<"v_mfma_f32_16x16x32_bf8_bf8", "F32_I64_X32", int_amdgcn_mfma_f32_16x16x32_bf8_bf8>; + defm V_MFMA_F32_16X16X32_BF8_FP8 : MAIInst<"v_mfma_f32_16x16x32_bf8_fp8", "F32_I64_X32", int_amdgcn_mfma_f32_16x16x32_bf8_fp8>; + defm V_MFMA_F32_16X16X32_FP8_BF8 : MAIInst<"v_mfma_f32_16x16x32_fp8_bf8", "F32_I64_X32", int_amdgcn_mfma_f32_16x16x32_fp8_bf8>; + defm V_MFMA_F32_16X16X32_FP8_FP8 : MAIInst<"v_mfma_f32_16x16x32_fp8_fp8", "F32_I64_X32", int_amdgcn_mfma_f32_16x16x32_fp8_fp8>; + defm V_MFMA_F32_32X32X16_BF8_BF8 : MAIInst<"v_mfma_f32_32x32x16_bf8_bf8", "F32_I64_X16", int_amdgcn_mfma_f32_32x32x16_bf8_bf8>; + defm V_MFMA_F32_32X32X16_BF8_FP8 : MAIInst<"v_mfma_f32_32x32x16_bf8_fp8", "F32_I64_X16", int_amdgcn_mfma_f32_32x32x16_bf8_fp8>; + defm V_MFMA_F32_32X32X16_FP8_BF8 : MAIInst<"v_mfma_f32_32x32x16_fp8_bf8", "F32_I64_X16", int_amdgcn_mfma_f32_32x32x16_fp8_bf8>; + defm V_MFMA_F32_32X32X16_FP8_FP8 : MAIInst<"v_mfma_f32_32x32x16_fp8_fp8", "F32_I64_X16", int_amdgcn_mfma_f32_32x32x16_fp8_fp8>; } // End Predicates = [isGFX940Plus], is_gfx940_xdl = 1 multiclass SMFMACInst<string OpName, string P, SDPatternOperator node> { @@ -654,6 +668,14 @@ defm V_SMFMAC_F32_16X16X32_BF16 : SMFMACInst<"v_smfmac_f32_16x16x32_bf16", defm V_SMFMAC_F32_32X32X16_BF16 : SMFMACInst<"v_smfmac_f32_32x32x16_bf16", "F32_32X32X16_I16", int_amdgcn_smfmac_f32_32x32x16_bf16>; defm V_SMFMAC_I32_16X16X64_I8 : SMFMACInst<"v_smfmac_i32_16x16x64_i8", "I32_16X16X64_I8", int_amdgcn_smfmac_i32_16x16x64_i8>; defm V_SMFMAC_I32_32X32X32_I8 : SMFMACInst<"v_smfmac_i32_32x32x32_i8", "I32_32X32X32_I8", int_amdgcn_smfmac_i32_32x32x32_i8>; +defm V_SMFMAC_F32_16X16X64_BF8_BF8 : SMFMACInst<"v_smfmac_f32_16x16x64_bf8_bf8", "F32_16X16X64_F8", int_amdgcn_smfmac_f32_16x16x64_bf8_bf8>; +defm V_SMFMAC_F32_16X16X64_BF8_FP8 : SMFMACInst<"v_smfmac_f32_16x16x64_bf8_fp8", "F32_16X16X64_F8", int_amdgcn_smfmac_f32_16x16x64_bf8_fp8>; +defm V_SMFMAC_F32_16X16X64_FP8_BF8 : SMFMACInst<"v_smfmac_f32_16x16x64_fp8_bf8", "F32_16X16X64_F8", int_amdgcn_smfmac_f32_16x16x64_fp8_bf8>; +defm V_SMFMAC_F32_16X16X64_FP8_FP8 : SMFMACInst<"v_smfmac_f32_16x16x64_fp8_fp8", "F32_16X16X64_F8", int_amdgcn_smfmac_f32_16x16x64_fp8_fp8>; +defm V_SMFMAC_F32_32X32X32_BF8_BF8 : SMFMACInst<"v_smfmac_f32_32x32x32_bf8_bf8", "F32_32X32X32_F8", int_amdgcn_smfmac_f32_32x32x32_bf8_bf8>; +defm V_SMFMAC_F32_32X32X32_BF8_FP8 : SMFMACInst<"v_smfmac_f32_32x32x32_bf8_fp8", "F32_32X32X32_F8", int_amdgcn_smfmac_f32_32x32x32_bf8_fp8>; +defm V_SMFMAC_F32_32X32X32_FP8_BF8 : SMFMACInst<"v_smfmac_f32_32x32x32_fp8_bf8", "F32_32X32X32_F8", int_amdgcn_smfmac_f32_32x32x32_fp8_bf8>; +defm V_SMFMAC_F32_32X32X32_FP8_FP8 : SMFMACInst<"v_smfmac_f32_32x32x32_fp8_fp8", "F32_32X32X32_F8", int_amdgcn_smfmac_f32_32x32x32_fp8_fp8>; } def MAIInstInfoTable : GenericTable { @@ -1121,6 +1143,14 @@ defm V_MFMA_I32_32X32X16I8 : VOP3P_Real_MFMA_gfx940 <0x56, "v_mfma_i32_32x defm V_MFMA_I32_16X16X32I8 : VOP3P_Real_MFMA_gfx940 <0x57, "v_mfma_i32_16x16x32_i8">; defm V_MFMA_F32_16X16X8XF32 : VOP3P_Real_MFMA_gfx940 <0x3e, "v_mfma_f32_16x16x8_xf32">; defm V_MFMA_F32_32X32X4XF32 : VOP3P_Real_MFMA_gfx940 <0x3f, "v_mfma_f32_32x32x4_xf32">; +defm V_MFMA_F32_16X16X32_BF8_BF8 : VOP3P_Real_MFMA_gfx940 <0x70>; +defm V_MFMA_F32_16X16X32_BF8_FP8 : VOP3P_Real_MFMA_gfx940 <0x71>; +defm V_MFMA_F32_16X16X32_FP8_BF8 : VOP3P_Real_MFMA_gfx940 <0x72>; +defm V_MFMA_F32_16X16X32_FP8_FP8 : VOP3P_Real_MFMA_gfx940 <0x73>; +defm V_MFMA_F32_32X32X16_BF8_BF8 : VOP3P_Real_MFMA_gfx940 <0x74>; +defm V_MFMA_F32_32X32X16_BF8_FP8 : VOP3P_Real_MFMA_gfx940 <0x75>; +defm V_MFMA_F32_32X32X16_FP8_BF8 : VOP3P_Real_MFMA_gfx940 <0x76>; +defm V_MFMA_F32_32X32X16_FP8_FP8 : VOP3P_Real_MFMA_gfx940 <0x77>; defm V_MFMA_F32_32X32X4BF16_1K : VOP3P_Real_MFMA_gfx940 <0x5d, "v_mfma_f32_32x32x4_2b_bf16">; defm V_MFMA_F32_16X16X4BF16_1K : VOP3P_Real_MFMA_gfx940 <0x5e, "v_mfma_f32_16x16x4_4b_bf16">; @@ -1137,6 +1167,14 @@ defm V_SMFMAC_F32_16X16X32_BF16 : VOP3P_Real_SMFMAC <0x66, "v_smfmac_f32_16x1 defm V_SMFMAC_F32_32X32X16_BF16 : VOP3P_Real_SMFMAC <0x68, "v_smfmac_f32_32x32x16bf16">; defm V_SMFMAC_I32_16X16X64_I8 : VOP3P_Real_SMFMAC <0x6a, "v_smfmac_i32_16x16x64i8">; defm V_SMFMAC_I32_32X32X32_I8 : VOP3P_Real_SMFMAC <0x6c, "v_smfmac_i32_32x32x32i8">; +defm V_SMFMAC_F32_16X16X64_BF8_BF8 : VOP3P_Real_SMFMAC <0x78, "v_smfmac_f32_16x16x64bf8bf8">; +defm V_SMFMAC_F32_16X16X64_BF8_FP8 : VOP3P_Real_SMFMAC <0x79, "v_smfmac_f32_16x16x64bf8fp8">; +defm V_SMFMAC_F32_16X16X64_FP8_BF8 : VOP3P_Real_SMFMAC <0x7a, "v_smfmac_f32_16x16x64fp8bf8">; +defm V_SMFMAC_F32_16X16X64_FP8_FP8 : VOP3P_Real_SMFMAC <0x7b, "v_smfmac_f32_16x16x64fp8fp8">; +defm V_SMFMAC_F32_32X32X32_BF8_BF8 : VOP3P_Real_SMFMAC <0x7c, "v_smfmac_f32_32x32x32bf8bf8">; +defm V_SMFMAC_F32_32X32X32_BF8_FP8 : VOP3P_Real_SMFMAC <0x7d, "v_smfmac_f32_32x32x32bf8fp8">; +defm V_SMFMAC_F32_32X32X32_FP8_BF8 : VOP3P_Real_SMFMAC <0x7e, "v_smfmac_f32_32x32x32fp8bf8">; +defm V_SMFMAC_F32_32X32X32_FP8_FP8 : VOP3P_Real_SMFMAC <0x7f, "v_smfmac_f32_32x32x32fp8fp8">; let SubtargetPredicate = HasPackedFP32Ops in { defm V_PK_FMA_F32 : VOP3P_Real_vi <0x30>; diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOPCInstructions.td index 33d3441e94c2..d489a089ac78 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOPCInstructions.td +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOPCInstructions.td @@ -59,15 +59,17 @@ class VOPC_Profile<list<SchedReadWrite> sched, ValueType vt0, ValueType vt1 = vt "$src0, $src1 $dpp_ctrl$row_mask$bank_mask$bound_ctrl"); let AsmDPP8 = "$src0, $src1 $dpp8$fi"; let AsmDPP16 = AsmDPP#"$fi"; + // VOPC DPP Instructions do not need an old operand + let TieRegDPP = ""; let InsDPP = getInsDPP<VOPDstOperand<Src0DPP>, Src0DPP, Src1DPP, Src2DPP, NumSrcArgs, HasModifiers, Src0ModDPP, Src1ModDPP, - Src2ModDPP>.ret; + Src2ModDPP, 0/*HasOld*/>.ret; let InsDPP16 = getInsDPP16<VOPDstOperand<Src0DPP>, Src0DPP, Src1DPP, Src2DPP, NumSrcArgs, HasModifiers, Src0ModDPP, Src1ModDPP, - Src2ModDPP>.ret; + Src2ModDPP, 0/*HasOld*/>.ret; let InsDPP8 = getInsDPP8<VOPDstOperand<Src0DPP>, Src0DPP, Src1DPP, Src2DPP, NumSrcArgs, HasModifiers, Src0ModDPP, Src1ModDPP, - Src2ModDPP>.ret; + Src2ModDPP, 0/*HasOld*/>.ret; // The destination for 32-bit encoding is implicit. let HasDst32 = 0; @@ -76,9 +78,9 @@ class VOPC_Profile<list<SchedReadWrite> sched, ValueType vt0, ValueType vt1 = vt let Outs64 = (outs VOPDstS64orS32:$sdst); let OutsVOP3DPP = Outs64; let OutsVOP3DPP8 = Outs64; - let InsVOP3DPP = getInsVOP3DPP<InsVOP3Base, Src0VOP3DPP, NumSrcArgs>.ret; - let InsVOP3DPP16 = getInsVOP3DPP16<InsVOP3Base, Src0VOP3DPP, NumSrcArgs>.ret; - let InsVOP3DPP8 = getInsVOP3DPP8<InsVOP3Base, Src0VOP3DPP, NumSrcArgs>.ret; + let InsVOP3DPP = getInsVOP3DPP<InsVOP3Base, Src0VOP3DPP, NumSrcArgs, 0/*HasOld*/>.ret; + let InsVOP3DPP16 = getInsVOP3DPP16<InsVOP3Base, Src0VOP3DPP, NumSrcArgs, 0/*HasOld*/>.ret; + let InsVOP3DPP8 = getInsVOP3DPP8<InsVOP3Base, Src0VOP3DPP, NumSrcArgs, 0/*HasOld*/>.ret; list<SchedReadWrite> Schedule = sched; } @@ -293,7 +295,7 @@ multiclass VOPC_Pseudos <string opName, let Defs = !if(DefExec, [EXEC], []); let SchedRW = P.Schedule; let isCompare = 1; - let Constraints = !if(P.NumSrcArgs, P.TieRegDPP # " = $sdst", ""); + let Constraints = ""; } } // end SubtargetPredicate = isGFX11Plus @@ -711,7 +713,7 @@ class VOPC_Class_Profile<list<SchedReadWrite> sched, ValueType vt> : VOPC_Profile<sched, vt, i32> { let AsmDPP = "$src0_modifiers, $src1 $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; let AsmDPP16 = AsmDPP#"$fi"; - let InsDPP = (ins VGPR_32:$old, FPVRegInputMods:$src0_modifiers, VGPR_32:$src0, VGPR_32:$src1, dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); + let InsDPP = (ins FPVRegInputMods:$src0_modifiers, VGPR_32:$src0, VGPR_32:$src1, dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); let InsDPP16 = !con(InsDPP, (ins FI:$fi)); // DPP8 forbids modifiers and can inherit from VOPC_Profile @@ -793,7 +795,7 @@ multiclass VOPC_Class_Pseudos <string opName, VOPC_Profile p, bit DefExec, def _e64_dpp : VOP3_DPP_Pseudo<opName, p> { let Defs = !if(DefExec, [EXEC], []); let SchedRW = p.Schedule; - let Constraints = !if(p.NumSrcArgs, p.TieRegDPP # " = $sdst", ""); + let Constraints = ""; } } // end SubtargetPredicate = isGFX11Plus } @@ -1068,7 +1070,6 @@ class VOPC_DPP16<bits<8> op, VOP_DPP_Pseudo ps, string opName = ps.OpName> let Uses = ps.Uses; let OtherPredicates = ps.OtherPredicates; let Constraints = ps.Constraints; - let AsmMatchConverter = "cvtVOPCNoDstDPP"; } class VOPC_DPP16_SIMC<bits<8> op, VOP_DPP_Pseudo ps, int subtarget, @@ -1084,7 +1085,6 @@ class VOPC_DPP8<bits<8> op, VOPC_Pseudo ps, string opName = ps.OpName> let Uses = ps.Uses; let OtherPredicates = ps.OtherPredicates; let Constraints = ""; - let AsmMatchConverter = "cvtVOPCNoDstDPP8"; } // VOPC64 @@ -1133,7 +1133,6 @@ class VOPC64_DPP16_NoDst<bits<10> op, VOP_DPP_Pseudo ps, string opName = ps.OpName> : VOPC64_DPP16<op, ps, opName> { let Inst{7-0} = ? ; - let AsmMatchConverter = "cvtVOPC64NoDstDPP"; } class VOPC64_DPP8_Base<bits<10> op, string OpName, VOPProfile P> @@ -1163,13 +1162,12 @@ class VOPC64_DPP8_Dst<bits<10> op, VOP_Pseudo ps, string opName = ps.OpName> : VOPC64_DPP8<op, ps, opName> { bits<8> sdst; let Inst{7-0} = sdst; - let Constraints = "$old = $sdst"; + let Constraints = ""; } class VOPC64_DPP8_NoDst<bits<10> op, VOP_Pseudo ps, string opName = ps.OpName> : VOPC64_DPP8<op, ps, opName> { let Inst{7-0} = ? ; - let AsmMatchConverter = "cvtVOPC64NoDstDPP8"; let Constraints = ""; } diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOPInstructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOPInstructions.td index 187485ffa3ae..b65ca2d6b1b3 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOPInstructions.td +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOPInstructions.td @@ -269,6 +269,10 @@ class VOP3OpSel_gfx10<bits<10> op, VOPProfile p> : VOP3e_gfx10<op, p> { class VOP3OpSel_gfx11<bits<10> op, VOPProfile p> : VOP3OpSel_gfx10<op, p>; +class VOP3DotOpSel_gfx11<bits<10> op, VOPProfile p> : VOP3OpSel_gfx11<op, p>{ + let Inst{11} = ?; + let Inst{12} = ?; +} // NB: For V_INTERP* opcodes, src0 is encoded as src1 and vice versa class VOP3Interp_vi <bits<10> op, VOPProfile P> : VOP3e_vi <op, P> { @@ -1270,6 +1274,8 @@ multiclass VOP3Inst<string OpName, VOPProfile P, SDPatternOperator node = null_f class Base_VOP3_DPP16<bits<10> op, VOP_DPP_Pseudo ps, string opName = ps.OpName> : VOP3_DPP<op, opName, ps.Pfl, 1> { + let VOP3_OPSEL = ps.Pfl.HasOpSel; + let IsDOT = ps.IsDOT; let hasSideEffects = ps.hasSideEffects; let Defs = ps.Defs; let SchedRW = ps.SchedRW; @@ -1285,6 +1291,8 @@ class VOP3_DPP16<bits<10> op, VOP_DPP_Pseudo ps, int subtarget, class Base_VOP3_DPP8<bits<10> op, VOP_Pseudo ps, string opName = ps.OpName> : VOP3_DPP8<op, opName, ps.Pfl> { + let VOP3_OPSEL = ps.Pfl.HasOpSel; + let IsDOT = ps.IsDOT; let hasSideEffects = ps.hasSideEffects; let Defs = ps.Defs; let SchedRW = ps.SchedRW; @@ -1326,6 +1334,15 @@ let AssemblerPredicate = isGFX11Only, VOP3e_gfx11<op, ps.Pfl>; } } + multiclass VOP3Dot_Real_Base_gfx11<bits<10> op, string opName = NAME, + bit isSingle = 0> { + defvar ps = !cast<VOP_Pseudo>(opName#"_e64"); + let IsSingle = !or(isSingle, ps.Pfl.IsSingle) in { + def _e64_gfx11 : + VOP3_Real<ps, SIEncodingFamily.GFX11>, + VOP3DotOpSel_gfx11<op, ps.Pfl>; + } + } multiclass VOP3_Real_with_name_gfx11<bits<10> op, string opName, string asmName, bit isSingle = 0> { defvar ps = !cast<VOP_Pseudo>(opName#"_e64"); @@ -1355,6 +1372,15 @@ let AssemblerPredicate = isGFX11Only, let DecoderNamespace = "DPPGFX11"; } } + + multiclass VOP3Dot_Real_dpp_Base_gfx11<bits<10> op, string opName = NAME> { + def _e64_dpp_gfx11 : VOP3_DPP16<op, !cast<VOP_DPP_Pseudo>(opName#"_e64"#"_dpp"), SIEncodingFamily.GFX11> { + let Inst{11} = ?; + let Inst{12} = ?; + let DecoderNamespace = "DPPGFX11"; + } + } + multiclass VOP3_Real_dpp_with_name_gfx11<bits<10> op, string opName, string asmName> { defvar ps = !cast<VOP3_Pseudo>(opName#"_e64"); @@ -1368,6 +1394,16 @@ let AssemblerPredicate = isGFX11Only, let DecoderNamespace = "DPP8GFX11"; } } + + multiclass VOP3Dot_Real_dpp8_Base_gfx11<bits<10> op, string opName = NAME> { + defvar ps = !cast<VOP3_Pseudo>(opName#"_e64"); + def _e64_dpp8_gfx11 : Base_VOP3_DPP8<op, ps> { + let Inst{11} = ?; + let Inst{12} = ?; + let DecoderNamespace = "DPP8GFX11"; + } + } + multiclass VOP3_Real_dpp8_with_name_gfx11<bits<10> op, string opName, string asmName> { defvar ps = !cast<VOP3_Pseudo>(opName#"_e64"); @@ -1406,6 +1442,12 @@ multiclass VOP3_Realtriple_gfx11<bits<10> op, VOP3_Real_dpp_Base_gfx11<op, opName>, VOP3_Real_dpp8_Base_gfx11<op, opName>; +multiclass VOP3Dot_Realtriple_gfx11<bits<10> op, + bit isSingle = 0, string opName = NAME> : + VOP3Dot_Real_Base_gfx11<op, opName, isSingle>, + VOP3Dot_Real_dpp_Base_gfx11<op, opName>, + VOP3Dot_Real_dpp8_Base_gfx11<op, opName>; + multiclass VOP3Only_Realtriple_gfx11<bits<10> op> : VOP3_Realtriple_gfx11<op, 1>; diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp index 80ba7b5f0d2e..183febe756c1 100644 --- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -6726,8 +6726,8 @@ bool ARMBaseInstrInfo::shouldOutlineFromFunctionByDefault( return Subtarget.isMClass() && MF.getFunction().hasMinSize(); } -bool ARMBaseInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI, - AAResults *AA) const { +bool ARMBaseInstrInfo::isReallyTriviallyReMaterializable( + const MachineInstr &MI) const { // Try hard to rematerialize any VCTPs because if we spill P0, it will block // the tail predication conversion. This means that the element count // register has to be live for longer, but that has to be better than diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.h index 3b8f3403e3c3..453e3fa1b99b 100644 --- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.h @@ -480,8 +480,7 @@ private: MachineInstr *canFoldIntoMOVCC(Register Reg, const MachineRegisterInfo &MRI, const TargetInstrInfo *TII) const; - bool isReallyTriviallyReMaterializable(const MachineInstr &MI, - AAResults *AA) const override; + bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override; private: /// Modeling special VFP / NEON fp MLA / MLS hazards. diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 613904f702f0..e5347ed8e53a 100644 --- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -1720,6 +1720,7 @@ bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB, unsigned UxtOp, MachineBasicBlock::iterator &NextMBBI) { bool IsThumb = STI->isThumb(); + bool IsThumb1Only = STI->isThumb1Only(); MachineInstr &MI = *MBBI; DebugLoc DL = MI.getDebugLoc(); const MachineOperand &Dest = MI.getOperand(0); @@ -1794,7 +1795,8 @@ bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB, MIB.addImm(0); // a 32-bit Thumb strex (only) allows an offset. MIB.add(predOps(ARMCC::AL)); - unsigned CMPri = IsThumb ? ARM::t2CMPri : ARM::CMPri; + unsigned CMPri = + IsThumb ? (IsThumb1Only ? ARM::tCMPi8 : ARM::t2CMPri) : ARM::CMPri; BuildMI(StoreBB, DL, TII->get(CMPri)) .addReg(TempReg, RegState::Kill) .addImm(0) @@ -1848,6 +1850,7 @@ bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, MachineBasicBlock::iterator &NextMBBI) { bool IsThumb = STI->isThumb(); + assert(!STI->isThumb1Only() && "CMP_SWAP_64 unsupported under Thumb1!"); MachineInstr &MI = *MBBI; DebugLoc DL = MI.getDebugLoc(); MachineOperand &Dest = MI.getOperand(0); @@ -3044,6 +3047,9 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, assert(STI->isThumb()); return ExpandCMP_SWAP(MBB, MBBI, ARM::t2LDREXH, ARM::t2STREXH, ARM::tUXTH, NextMBBI); + case ARM::tCMP_SWAP_32: + assert(STI->isThumb()); + return ExpandCMP_SWAP(MBB, MBBI, ARM::t2LDREX, ARM::t2STREX, 0, NextMBBI); case ARM::CMP_SWAP_8: assert(!STI->isThumb()); @@ -3054,11 +3060,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, return ExpandCMP_SWAP(MBB, MBBI, ARM::LDREXH, ARM::STREXH, ARM::UXTH, NextMBBI); case ARM::CMP_SWAP_32: - if (STI->isThumb()) - return ExpandCMP_SWAP(MBB, MBBI, ARM::t2LDREX, ARM::t2STREX, 0, - NextMBBI); - else - return ExpandCMP_SWAP(MBB, MBBI, ARM::LDREX, ARM::STREX, 0, NextMBBI); + assert(!STI->isThumb()); + return ExpandCMP_SWAP(MBB, MBBI, ARM::LDREX, ARM::STREX, 0, NextMBBI); case ARM::CMP_SWAP_64: return ExpandCMP_SWAP_64(MBB, MBBI, NextMBBI); diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp index e0e4ffd90e0e..afe16a3cd55c 100644 --- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -3131,7 +3131,7 @@ bool ARMDAGToDAGISel::tryInsertVectorElt(SDNode *N) { // Else v8i16 pattern of an extract and an insert, with a optional vmovx for // extracting odd lanes. - if (VT == MVT::v8i16) { + if (VT == MVT::v8i16 && Subtarget->hasFullFP16()) { SDValue Inp1 = CurDAG->getTargetExtractSubreg( ARM::ssub_0 + ExtractLane1 / 2, dl, MVT::f32, Val1.getOperand(0)); SDValue Inp2 = CurDAG->getTargetExtractSubreg( @@ -3151,7 +3151,7 @@ bool ARMDAGToDAGISel::tryInsertVectorElt(SDNode *N) { // The inserted values are not extracted - if they are f16 then insert them // directly using a VINS. - if (VT == MVT::v8f16) { + if (VT == MVT::v8f16 && Subtarget->hasFullFP16()) { SDNode *VINS = CurDAG->getMachineNode(ARM::VINSH, dl, MVT::f32, Val2, Val1); SDValue NewIns = CurDAG->getTargetInsertSubreg(ARM::ssub_0 + Lane2 / 2, dl, MVT::v4f32, @@ -3512,7 +3512,7 @@ void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) { else if (MemTy == MVT::i16) Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_16 : ARM::CMP_SWAP_16; else if (MemTy == MVT::i32) - Opcode = ARM::CMP_SWAP_32; + Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_32 : ARM::CMP_SWAP_32; else llvm_unreachable("Unknown AtomicCmpSwap type"); diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.cpp index e6be93e6480a..743cca9ff71f 100644 --- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -13572,6 +13572,10 @@ static SDValue PerformADDVecReduce(SDNode *N, SelectionDAG &DAG, bool ARMTargetLowering::isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const { + assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA || + N->getOpcode() == ISD::SRL) && + "Expected shift op"); + if (Level == BeforeLegalizeTypes) return true; @@ -13605,8 +13609,38 @@ ARMTargetLowering::isDesirableToCommuteWithShift(const SDNode *N, return false; } +bool ARMTargetLowering::isDesirableToCommuteXorWithShift( + const SDNode *N) const { + assert(N->getOpcode() == ISD::XOR && + (N->getOperand(0).getOpcode() == ISD::SHL || + N->getOperand(0).getOpcode() == ISD::SRL) && + "Expected XOR(SHIFT) pattern"); + + // Only commute if the entire NOT mask is a hidden shifted mask. + auto *XorC = dyn_cast<ConstantSDNode>(N->getOperand(1)); + auto *ShiftC = dyn_cast<ConstantSDNode>(N->getOperand(0).getOperand(1)); + if (XorC && ShiftC) { + unsigned MaskIdx, MaskLen; + if (XorC->getAPIntValue().isShiftedMask(MaskIdx, MaskLen)) { + unsigned ShiftAmt = ShiftC->getZExtValue(); + unsigned BitWidth = N->getValueType(0).getScalarSizeInBits(); + if (N->getOperand(0).getOpcode() == ISD::SHL) + return MaskIdx == ShiftAmt && MaskLen == (BitWidth - ShiftAmt); + return MaskIdx == 0 && MaskLen == (BitWidth - ShiftAmt); + } + } + + return false; +} + bool ARMTargetLowering::shouldFoldConstantShiftPairToMask( const SDNode *N, CombineLevel Level) const { + assert(((N->getOpcode() == ISD::SHL && + N->getOperand(0).getOpcode() == ISD::SRL) || + (N->getOpcode() == ISD::SRL && + N->getOperand(0).getOpcode() == ISD::SHL)) && + "Expected shift-shift mask"); + if (!Subtarget->isThumb1Only()) return true; @@ -19962,6 +19996,14 @@ bool ARMTargetLowering::SimplifyDemandedBitsForTargetNode( } break; } + case ARMISD::VBICIMM: { + SDValue Op0 = Op.getOperand(0); + unsigned ModImm = Op.getConstantOperandVal(1); + unsigned EltBits = 0; + uint64_t Mask = ARM_AM::decodeVMOVModImm(ModImm, EltBits); + if ((OriginalDemandedBits & Mask) == 0) + return TLO.CombineTo(Op, Op0); + } } return TargetLowering::SimplifyDemandedBitsForTargetNode( diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.h b/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.h index 10f60ab93ae3..fae279ea7569 100644 --- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.h @@ -733,6 +733,8 @@ class VectorType; bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const override; + bool isDesirableToCommuteXorWithShift(const SDNode *N) const override; + bool shouldFoldConstantShiftPairToMask(const SDNode *N, CombineLevel Level) const override; diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrMVE.td b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrMVE.td index 15c33014e988..9c03f72fe6ae 100644 --- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -1882,6 +1882,7 @@ let Predicates = [HasMVEInt] in { def : Pat<(ARMvgetlaneu (v8f16 MQPR:$src), imm:$lane), (MVE_VMOV_from_lane_u16 MQPR:$src, imm:$lane)>; // For i16's inserts being extracted from low lanes, then may use VINS. + let Predicates = [HasFullFP16] in { def : Pat<(ARMinsertelt (v8i16 MQPR:$src1), (ARMvgetlaneu (v8i16 MQPR:$src2), imm_even:$extlane), imm_odd:$inslane), @@ -1889,6 +1890,7 @@ let Predicates = [HasMVEInt] in { (VINSH (EXTRACT_SUBREG MQPR:$src1, (SSubReg_f16_reg imm_odd:$inslane)), (EXTRACT_SUBREG MQPR:$src2, (SSubReg_f16_reg imm_even:$extlane))), (SSubReg_f16_reg imm_odd:$inslane)), MQPR)>; + } def : Pat<(v16i8 (scalar_to_vector GPR:$src)), (MVE_VMOV_to_lane_8 (v16i8 (IMPLICIT_DEF)), rGPR:$src, (i32 0))>; @@ -1905,17 +1907,21 @@ let Predicates = [HasMVEInt] in { def : Pat<(insertelt (v8f16 MQPR:$src1), (f16 HPR:$src2), imm_even:$lane), (MVE_VMOV_to_lane_16 MQPR:$src1, (COPY_TO_REGCLASS (f16 HPR:$src2), rGPR), imm:$lane)>; + let Predicates = [HasFullFP16] in { def : Pat<(insertelt (v8f16 MQPR:$src1), (f16 HPR:$src2), imm_odd:$lane), (COPY_TO_REGCLASS (INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS MQPR:$src1, MQPR)), (VINSH (EXTRACT_SUBREG MQPR:$src1, (SSubReg_f16_reg imm_odd:$lane)), (COPY_TO_REGCLASS HPR:$src2, SPR)), (SSubReg_f16_reg imm_odd:$lane)), MQPR)>; + } def : Pat<(extractelt (v8f16 MQPR:$src), imm_even:$lane), (EXTRACT_SUBREG MQPR:$src, (SSubReg_f16_reg imm_even:$lane))>; + let Predicates = [HasFullFP16] in { def : Pat<(extractelt (v8f16 MQPR:$src), imm_odd:$lane), (COPY_TO_REGCLASS (VMOVH (EXTRACT_SUBREG MQPR:$src, (SSubReg_f16_reg imm_odd:$lane))), HPR)>; + } def : Pat<(v2f64 (scalar_to_vector (f64 DPR:$src))), (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrThumb.td b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrThumb.td index 71527ae1ab11..8f7039a327b3 100644 --- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrThumb.td +++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrThumb.td @@ -1782,11 +1782,15 @@ def tLDRConstPool let Constraints = "@earlyclobber $Rd,@earlyclobber $temp", mayLoad = 1, mayStore = 1 in { -def tCMP_SWAP_8 : PseudoInst<(outs GPR:$Rd, GPR:$temp), +def tCMP_SWAP_8 : PseudoInst<(outs GPR:$Rd, tGPR:$temp), (ins GPR:$addr, tGPR:$desired, GPR:$new), NoItinerary, []>, Sched<[]>; -def tCMP_SWAP_16 : PseudoInst<(outs GPR:$Rd, GPR:$temp), +def tCMP_SWAP_16 : PseudoInst<(outs GPR:$Rd, tGPR:$temp), (ins GPR:$addr, tGPR:$desired, GPR:$new), NoItinerary, []>, Sched<[]>; + +def tCMP_SWAP_32 : PseudoInst<(outs GPR:$Rd, tGPR:$temp), + (ins GPR:$addr, GPR:$desired, GPR:$new), + NoItinerary, []>, Sched<[]>; } diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index ba1d806c8d81..3c102463ba08 100644 --- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -20,8 +20,8 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicsARM.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/Type.h" @@ -33,6 +33,7 @@ #include "llvm/Transforms/InstCombine/InstCombiner.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" +#include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h" #include <algorithm> #include <cassert> #include <cstdint> @@ -2197,12 +2198,9 @@ static bool canTailPredicateLoop(Loop *L, LoopInfo *LI, ScalarEvolution &SE, return true; } -bool ARMTTIImpl::preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, - ScalarEvolution &SE, - AssumptionCache &AC, - TargetLibraryInfo *TLI, - DominatorTree *DT, - const LoopAccessInfo *LAI) { +bool ARMTTIImpl::preferPredicateOverEpilogue( + Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC, + TargetLibraryInfo *TLI, DominatorTree *DT, LoopVectorizationLegality *LVL) { if (!EnableTailPredication) { LLVM_DEBUG(dbgs() << "Tail-predication not enabled.\n"); return false; @@ -2244,7 +2242,7 @@ bool ARMTTIImpl::preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, return false; } - return canTailPredicateLoop(L, LI, SE, DL, LAI); + return canTailPredicateLoop(L, LI, SE, DL, LVL->getLAI()); } PredicationStyle ARMTTIImpl::emitGetActiveLaneMask() const { diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetTransformInfo.h index dcf82e703a7f..9c3980d79e60 100644 --- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -288,12 +288,10 @@ public: AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo); - bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, - ScalarEvolution &SE, - AssumptionCache &AC, - TargetLibraryInfo *TLI, + bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, + AssumptionCache &AC, TargetLibraryInfo *TLI, DominatorTree *DT, - const LoopAccessInfo *LAI); + LoopVectorizationLegality *LVL); void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE); diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/AVRSubtarget.h b/contrib/llvm-project/llvm/lib/Target/AVR/AVRSubtarget.h index 2325193bac0a..3dd71243387b 100644 --- a/contrib/llvm-project/llvm/lib/Target/AVR/AVRSubtarget.h +++ b/contrib/llvm-project/llvm/lib/Target/AVR/AVRSubtarget.h @@ -92,15 +92,15 @@ public: } /// Get I/O register addresses. - int getIORegRAMPZ(void) const { return hasELPM() ? 0x3b : -1; } - int getIORegEIND(void) const { return hasEIJMPCALL() ? 0x3c : -1; } - int getIORegSPL(void) const { return 0x3d; } - int getIORegSPH(void) const { return hasSmallStack() ? -1 : 0x3e; } - int getIORegSREG(void) const { return 0x3f; } + int getIORegRAMPZ() const { return hasELPM() ? 0x3b : -1; } + int getIORegEIND() const { return hasEIJMPCALL() ? 0x3c : -1; } + int getIORegSPL() const { return 0x3d; } + int getIORegSPH() const { return hasSmallStack() ? -1 : 0x3e; } + int getIORegSREG() const { return 0x3f; } /// Get GPR aliases. - int getRegTmpIndex(void) const { return hasTinyEncoding() ? 16 : 0; } - int getRegZeroIndex(void) const { return hasTinyEncoding() ? 17 : 1; } + int getRegTmpIndex() const { return hasTinyEncoding() ? 16 : 0; } + int getRegZeroIndex() const { return hasTinyEncoding() ? 17 : 1; } private: /// The ELF e_flags architecture. diff --git a/contrib/llvm-project/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp index d490b385ac16..0bf739452fd2 100644 --- a/contrib/llvm-project/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp @@ -518,7 +518,7 @@ void CSKYInstrInfo::copyPhysReg(MachineBasicBlock &MBB, unsigned Opcode = 0; if (CSKY::GPRRegClass.contains(DestReg, SrcReg)) - Opcode = CSKY::MOV32; + Opcode = STI.hasE2() ? CSKY::MOV32 : CSKY::MOV16; else if (v2sf && CSKY::sFPR32RegClass.contains(DestReg, SrcReg)) Opcode = CSKY::FMOV_S; else if (v3sf && CSKY::FPR32RegClass.contains(DestReg, SrcReg)) diff --git a/contrib/llvm-project/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp b/contrib/llvm-project/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp index 3e09270a66d0..869433613620 100644 --- a/contrib/llvm-project/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp +++ b/contrib/llvm-project/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp @@ -13,6 +13,7 @@ #include "DXILBitcodeWriter.h" #include "DXILValueEnumerator.h" #include "PointerTypeAnalysis.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Triple.h" #include "llvm/Bitcode/BitcodeCommon.h" #include "llvm/Bitcode/BitcodeReader.h" @@ -2580,10 +2581,9 @@ void DXILBitcodeWriter::writeFunctionLevelValueSymbolTable( SortedTable.push_back(VI.second->getValueName()); } // The keys are unique, so there shouldn't be stability issues. - std::sort(SortedTable.begin(), SortedTable.end(), - [](const ValueName *A, const ValueName *B) { - return A->first() < B->first(); - }); + llvm::sort(SortedTable, [](const ValueName *A, const ValueName *B) { + return A->first() < B->first(); + }); for (const ValueName *SI : SortedTable) { auto &Name = *SI; diff --git a/contrib/llvm-project/llvm/lib/Target/DirectX/DXILWriter/DXILValueEnumerator.cpp b/contrib/llvm-project/llvm/lib/Target/DirectX/DXILWriter/DXILValueEnumerator.cpp index 08944ee3f1fe..e2a41515de38 100644 --- a/contrib/llvm-project/llvm/lib/Target/DirectX/DXILWriter/DXILValueEnumerator.cpp +++ b/contrib/llvm-project/llvm/lib/Target/DirectX/DXILWriter/DXILValueEnumerator.cpp @@ -809,7 +809,7 @@ void ValueEnumerator::organizeMetadata() { // - by function, then // - by isa<MDString> // and then sort by the original/current ID. Since the IDs are guaranteed to - // be unique, the result of std::sort will be deterministic. There's no need + // be unique, the result of llvm::sort will be deterministic. There's no need // for std::stable_sort. llvm::sort(Order, [this](MDIndex LHS, MDIndex RHS) { return std::make_tuple(LHS.F, getMetadataTypeOrder(LHS.get(MDs)), LHS.ID) < diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp index abd84a188cfa..bd0232c71d48 100644 --- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp +++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp @@ -85,7 +85,6 @@ public: int getAllocSizeOf(const Type *Ty) const; int getTypeAlignment(Type *Ty) const; - VectorType *getByteVectorTy(int ScLen) const; Constant *getNullValue(Type *Ty) const; Constant *getFullValue(Type *Ty) const; diff --git a/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 4acf90bd9788..93c8864347bb 100644 --- a/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -217,9 +217,8 @@ SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op, const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); unsigned ADDIOp = Subtarget.is64Bit() ? LoongArch::ADDI_D : LoongArch::ADDI_W; - // FIXME: Only support PC-relative addressing to access the symbol. - // TODO: Add target flags. - if (!isPositionIndependent()) { + // TODO: Support dso_preemptable and target flags. + if (GV->isDSOLocal()) { SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty); SDValue AddrHi(DAG.getMachineNode(LoongArch::PCALAU12I, DL, Ty, GA), 0); SDValue Addr(DAG.getMachineNode(ADDIOp, DL, Ty, AddrHi, GA), 0); diff --git a/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp index 468c4f43cb90..2d08d5c674bc 100644 --- a/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp +++ b/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp @@ -38,9 +38,7 @@ static std::string computeDataLayout(const Triple &TT) { static Reloc::Model getEffectiveRelocModel(const Triple &TT, Optional<Reloc::Model> RM) { - if (!RM.hasValue()) - return Reloc::Static; - return *RM; + return RM.value_or(Reloc::Static); } LoongArchTargetMachine::LoongArchTargetMachine( diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelLowering.cpp index b98be4ae4b75..4dfc16526a00 100644 --- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelLowering.cpp @@ -1192,6 +1192,12 @@ bool MipsTargetLowering::hasBitTest(SDValue X, SDValue Y) const { bool MipsTargetLowering::shouldFoldConstantShiftPairToMask( const SDNode *N, CombineLevel Level) const { + assert(((N->getOpcode() == ISD::SHL && + N->getOperand(0).getOpcode() == ISD::SRL) || + (N->getOpcode() == ISD::SRL && + N->getOperand(0).getOpcode() == ISD::SHL)) && + "Expected shift-shift mask"); + if (N->getOperand(0).getValueType().isVector()) return false; return true; diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index 9977d8ba0300..45e82e935772 100644 --- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -73,8 +73,10 @@ #include "llvm/MC/TargetRegistry.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Endian.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MachineValueType.h" +#include "llvm/Support/NativeFormatting.h" #include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetLoweringObjectFile.h" @@ -354,8 +356,7 @@ void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) { // PTX ABI requires all scalar return values to be at least 32 // bits in size. fp16 normally uses .b16 as its storage type in // PTX, so its size must be adjusted here, too. - if (size < 32) - size = 32; + size = promoteScalarArgumentSize(size); O << ".param .b" << size << " func_retval0"; } else if (isa<PointerType>(Ty)) { @@ -384,8 +385,8 @@ void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) { for (unsigned j = 0, je = elems; j != je; ++j) { unsigned sz = elemtype.getSizeInBits(); - if (elemtype.isInteger() && (sz < 32)) - sz = 32; + if (elemtype.isInteger()) + sz = promoteScalarArgumentSize(sz); O << ".reg .b" << sz << " func_retval" << idx; if (j < je - 1) O << ", "; @@ -1168,31 +1169,37 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar, GVar->hasInitializer()) { const Constant *Initializer = GVar->getInitializer(); if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) { - AggBuffer aggBuffer(ElementSize, O, *this); + AggBuffer aggBuffer(ElementSize, *this); bufferAggregateConstant(Initializer, &aggBuffer); - if (aggBuffer.numSymbols) { - if (static_cast<const NVPTXTargetMachine &>(TM).is64Bit()) { - O << " .u64 "; + if (aggBuffer.numSymbols()) { + unsigned int ptrSize = MAI->getCodePointerSize(); + if (ElementSize % ptrSize || + !aggBuffer.allSymbolsAligned(ptrSize)) { + // Print in bytes and use the mask() operator for pointers. + if (!STI.hasMaskOperator()) + report_fatal_error( + "initialized packed aggregate with pointers '" + + GVar->getName() + + "' requires at least PTX ISA version 7.1"); + O << " .u8 "; getSymbol(GVar)->print(O, MAI); - O << "["; - O << ElementSize / 8; + O << "[" << ElementSize << "] = {"; + aggBuffer.printBytes(O); + O << "}"; } else { - O << " .u32 "; + O << " .u" << ptrSize * 8 << " "; getSymbol(GVar)->print(O, MAI); - O << "["; - O << ElementSize / 4; + O << "[" << ElementSize / ptrSize << "] = {"; + aggBuffer.printWords(O); + O << "}"; } - O << "]"; } else { O << " .b8 "; getSymbol(GVar)->print(O, MAI); - O << "["; - O << ElementSize; - O << "]"; + O << "[" << ElementSize << "] = {"; + aggBuffer.printBytes(O); + O << "}"; } - O << " = {"; - aggBuffer.print(); - O << "}"; } else { O << " .b8 "; getSymbol(GVar)->print(O, MAI); @@ -1219,6 +1226,80 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar, O << ";\n"; } +void NVPTXAsmPrinter::AggBuffer::printSymbol(unsigned nSym, raw_ostream &os) { + const Value *v = Symbols[nSym]; + const Value *v0 = SymbolsBeforeStripping[nSym]; + if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) { + MCSymbol *Name = AP.getSymbol(GVar); + PointerType *PTy = dyn_cast<PointerType>(v0->getType()); + // Is v0 a generic pointer? + bool isGenericPointer = PTy && PTy->getAddressSpace() == 0; + if (EmitGeneric && isGenericPointer && !isa<Function>(v)) { + os << "generic("; + Name->print(os, AP.MAI); + os << ")"; + } else { + Name->print(os, AP.MAI); + } + } else if (const ConstantExpr *CExpr = dyn_cast<ConstantExpr>(v0)) { + const MCExpr *Expr = AP.lowerConstantForGV(cast<Constant>(CExpr), false); + AP.printMCExpr(*Expr, os); + } else + llvm_unreachable("symbol type unknown"); +} + +void NVPTXAsmPrinter::AggBuffer::printBytes(raw_ostream &os) { + unsigned int ptrSize = AP.MAI->getCodePointerSize(); + symbolPosInBuffer.push_back(size); + unsigned int nSym = 0; + unsigned int nextSymbolPos = symbolPosInBuffer[nSym]; + for (unsigned int pos = 0; pos < size;) { + if (pos) + os << ", "; + if (pos != nextSymbolPos) { + os << (unsigned int)buffer[pos]; + ++pos; + continue; + } + // Generate a per-byte mask() operator for the symbol, which looks like: + // .global .u8 addr[] = {0xFF(foo), 0xFF00(foo), 0xFF0000(foo), ...}; + // See https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#initializers + std::string symText; + llvm::raw_string_ostream oss(symText); + printSymbol(nSym, oss); + for (unsigned i = 0; i < ptrSize; ++i) { + if (i) + os << ", "; + llvm::write_hex(os, 0xFFULL << i * 8, HexPrintStyle::PrefixUpper); + os << "(" << symText << ")"; + } + pos += ptrSize; + nextSymbolPos = symbolPosInBuffer[++nSym]; + assert(nextSymbolPos >= pos); + } +} + +void NVPTXAsmPrinter::AggBuffer::printWords(raw_ostream &os) { + unsigned int ptrSize = AP.MAI->getCodePointerSize(); + symbolPosInBuffer.push_back(size); + unsigned int nSym = 0; + unsigned int nextSymbolPos = symbolPosInBuffer[nSym]; + assert(nextSymbolPos % ptrSize == 0); + for (unsigned int pos = 0; pos < size; pos += ptrSize) { + if (pos) + os << ", "; + if (pos == nextSymbolPos) { + printSymbol(nSym, os); + nextSymbolPos = symbolPosInBuffer[++nSym]; + assert(nextSymbolPos % ptrSize == 0); + assert(nextSymbolPos >= pos + ptrSize); + } else if (ptrSize == 4) + os << support::endian::read32le(&buffer[pos]); + else + os << support::endian::read64le(&buffer[pos]); + } +} + void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) { if (localDecls.find(f) == localDecls.end()) return; @@ -1494,8 +1575,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { unsigned sz = 0; if (isa<IntegerType>(Ty)) { sz = cast<IntegerType>(Ty)->getBitWidth(); - if (sz < 32) - sz = 32; + sz = promoteScalarArgumentSize(sz); } else if (isa<PointerType>(Ty)) sz = thePointerTy.getSizeInBits(); else if (Ty->isHalfTy()) @@ -1559,8 +1639,8 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { for (unsigned j = 0, je = elems; j != je; ++j) { unsigned sz = elemtype.getSizeInBits(); - if (elemtype.isInteger() && (sz < 32)) - sz = 32; + if (elemtype.isInteger()) + sz = promoteScalarArgumentSize(sz); O << "\t.reg .b" << sz << " "; printParamName(I, paramIndex, O); if (j < je - 1) diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h index cd61e99a103a..710c089e3325 100644 --- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h +++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h @@ -61,24 +61,30 @@ class MCOperand; class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter { class AggBuffer { - // Used to buffer the emitted string for initializing global - // aggregates. + // Used to buffer the emitted string for initializing global aggregates. // - // Normally an aggregate (array, vector or structure) is emitted - // as a u8[]. However, if one element/field of the aggregate - // is a non-NULL address, then the aggregate is emitted as u32[] - // or u64[]. + // Normally an aggregate (array, vector, or structure) is emitted as a u8[]. + // However, if either element/field of the aggregate is a non-NULL address, + // and all such addresses are properly aligned, then the aggregate is + // emitted as u32[] or u64[]. In the case of unaligned addresses, the + // aggregate is emitted as u8[], and the mask() operator is used for all + // pointers. // - // We first layout the aggregate in 'buffer' in bytes, except for - // those symbol addresses. For the i-th symbol address in the - //aggregate, its corresponding 4-byte or 8-byte elements in 'buffer' - // are filled with 0s. symbolPosInBuffer[i-1] records its position - // in 'buffer', and Symbols[i-1] records the Value*. + // We first layout the aggregate in 'buffer' in bytes, except for those + // symbol addresses. For the i-th symbol address in the aggregate, its + // corresponding 4-byte or 8-byte elements in 'buffer' are filled with 0s. + // symbolPosInBuffer[i-1] records its position in 'buffer', and Symbols[i-1] + // records the Value*. // - // Once we have this AggBuffer setup, we can choose how to print - // it out. + // Once we have this AggBuffer setup, we can choose how to print it out. public: - unsigned numSymbols; // number of symbol addresses + // number of symbol addresses + unsigned numSymbols() const { return Symbols.size(); } + + bool allSymbolsAligned(unsigned ptrSize) const { + return llvm::all_of(symbolPosInBuffer, + [=](unsigned pos) { return pos % ptrSize == 0; }); + } private: const unsigned size; // size of the buffer in bytes @@ -94,15 +100,13 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter { // SymbolsBeforeStripping[i]. SmallVector<const Value *, 4> SymbolsBeforeStripping; unsigned curpos; - raw_ostream &O; NVPTXAsmPrinter &AP; bool EmitGeneric; public: - AggBuffer(unsigned size, raw_ostream &O, NVPTXAsmPrinter &AP) - : size(size), buffer(size), O(O), AP(AP) { + AggBuffer(unsigned size, NVPTXAsmPrinter &AP) + : size(size), buffer(size), AP(AP) { curpos = 0; - numSymbols = 0; EmitGeneric = AP.EmitGeneric; } @@ -135,63 +139,13 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter { symbolPosInBuffer.push_back(curpos); Symbols.push_back(GVar); SymbolsBeforeStripping.push_back(GVarBeforeStripping); - numSymbols++; } - void print() { - if (numSymbols == 0) { - // print out in bytes - for (unsigned i = 0; i < size; i++) { - if (i) - O << ", "; - O << (unsigned int) buffer[i]; - } - } else { - // print out in 4-bytes or 8-bytes - unsigned int pos = 0; - unsigned int nSym = 0; - unsigned int nextSymbolPos = symbolPosInBuffer[nSym]; - unsigned int nBytes = 4; - if (static_cast<const NVPTXTargetMachine &>(AP.TM).is64Bit()) - nBytes = 8; - for (pos = 0; pos < size; pos += nBytes) { - if (pos) - O << ", "; - if (pos == nextSymbolPos) { - const Value *v = Symbols[nSym]; - const Value *v0 = SymbolsBeforeStripping[nSym]; - if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) { - MCSymbol *Name = AP.getSymbol(GVar); - PointerType *PTy = dyn_cast<PointerType>(v0->getType()); - bool IsNonGenericPointer = false; // Is v0 a non-generic pointer? - if (PTy && PTy->getAddressSpace() != 0) { - IsNonGenericPointer = true; - } - if (EmitGeneric && !isa<Function>(v) && !IsNonGenericPointer) { - O << "generic("; - Name->print(O, AP.MAI); - O << ")"; - } else { - Name->print(O, AP.MAI); - } - } else if (const ConstantExpr *CExpr = dyn_cast<ConstantExpr>(v0)) { - const MCExpr *Expr = - AP.lowerConstantForGV(cast<Constant>(CExpr), false); - AP.printMCExpr(*Expr, O); - } else - llvm_unreachable("symbol type unknown"); - nSym++; - if (nSym >= numSymbols) - nextSymbolPos = size + 1; - else - nextSymbolPos = symbolPosInBuffer[nSym]; - } else if (nBytes == 4) - O << *(unsigned int *)(&buffer[pos]); - else - O << *(unsigned long long *)(&buffer[pos]); - } - } - } + void printBytes(raw_ostream &os); + void printWords(raw_ostream &os); + + private: + void printSymbol(unsigned nSym, raw_ostream &os); }; friend class AggBuffer; diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index 6ad016dfa0a7..8264032b765a 100644 --- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -206,6 +206,40 @@ static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL, } } +/// PromoteScalarIntegerPTX +/// Used to make sure the arguments/returns are suitable for passing +/// and promote them to a larger size if they're not. +/// +/// The promoted type is placed in \p PromoteVT if the function returns true. +static bool PromoteScalarIntegerPTX(const EVT &VT, MVT *PromotedVT) { + if (VT.isScalarInteger()) { + switch (PowerOf2Ceil(VT.getFixedSizeInBits())) { + default: + llvm_unreachable( + "Promotion is not suitable for scalars of size larger than 64-bits"); + case 1: + *PromotedVT = MVT::i1; + break; + case 2: + case 4: + case 8: + *PromotedVT = MVT::i8; + break; + case 16: + *PromotedVT = MVT::i16; + break; + case 32: + *PromotedVT = MVT::i32; + break; + case 64: + *PromotedVT = MVT::i64; + break; + } + return EVT(*PromotedVT) != VT; + } + return false; +} + // Check whether we can merge loads/stores of some of the pieces of a // flattened function parameter or return value into a single vector // load/store. @@ -1291,8 +1325,7 @@ std::string NVPTXTargetLowering::getPrototype( // PTX ABI requires all scalar return values to be at least 32 // bits in size. fp16 normally uses .b16 as its storage type in // PTX, so its size must be adjusted here, too. - if (size < 32) - size = 32; + size = promoteScalarArgumentSize(size); O << ".param .b" << size << " _"; } else if (isa<PointerType>(retTy)) { @@ -1343,8 +1376,7 @@ std::string NVPTXTargetLowering::getPrototype( unsigned sz = 0; if (isa<IntegerType>(Ty)) { sz = cast<IntegerType>(Ty)->getBitWidth(); - if (sz < 32) - sz = 32; + sz = promoteScalarArgumentSize(sz); } else if (isa<PointerType>(Ty)) { sz = PtrVT.getSizeInBits(); } else if (Ty->isHalfTy()) @@ -1515,11 +1547,11 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, NeedAlign = true; } else { // declare .param .b<size> .param<n>; - if ((VT.isInteger() || VT.isFloatingPoint()) && TypeSize < 4) { + if (VT.isInteger() || VT.isFloatingPoint()) { // PTX ABI requires integral types to be at least 32 bits in // size. FP16 is loaded/stored using i16, so it's handled // here as well. - TypeSize = 4; + TypeSize = promoteScalarArgumentSize(TypeSize * 8) / 8; } SDValue DeclareScalarParamOps[] = { Chain, DAG.getConstant(ParamCount, dl, MVT::i32), @@ -1556,6 +1588,17 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, } SDValue StVal = OutVals[OIdx]; + + MVT PromotedVT; + if (PromoteScalarIntegerPTX(EltVT, &PromotedVT)) { + EltVT = EVT(PromotedVT); + } + if (PromoteScalarIntegerPTX(StVal.getValueType(), &PromotedVT)) { + llvm::ISD::NodeType Ext = + Outs[OIdx].Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; + StVal = DAG.getNode(Ext, dl, PromotedVT, StVal); + } + if (IsByVal) { auto PtrVT = getPointerTy(DL); SDValue srcAddr = DAG.getNode(ISD::ADD, dl, PtrVT, StVal, @@ -1638,9 +1681,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Plus, this behavior is consistent with nvcc's. if (RetTy->isFloatingPointTy() || RetTy->isPointerTy() || (RetTy->isIntegerTy() && !RetTy->isIntegerTy(128))) { - // Scalar needs to be at least 32bit wide - if (resultsz < 32) - resultsz = 32; + resultsz = promoteScalarArgumentSize(resultsz); SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue); SDValue DeclareRetOps[] = { Chain, DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(resultsz, dl, MVT::i32), @@ -1778,6 +1819,14 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, EVT TheLoadType = VTs[i]; EVT EltType = Ins[i].VT; Align EltAlign = commonAlignment(RetAlign, Offsets[i]); + MVT PromotedVT; + + if (PromoteScalarIntegerPTX(TheLoadType, &PromotedVT)) { + TheLoadType = EVT(PromotedVT); + EltType = EVT(PromotedVT); + needTruncate = true; + } + if (ExtendIntegerRetVal) { TheLoadType = MVT::i32; EltType = MVT::i32; @@ -2558,6 +2607,13 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( // v2f16 was loaded as an i32. Now we must bitcast it back. else if (EltVT == MVT::v2f16) Elt = DAG.getNode(ISD::BITCAST, dl, MVT::v2f16, Elt); + + // If a promoted integer type is used, truncate down to the original + MVT PromotedVT; + if (PromoteScalarIntegerPTX(EltVT, &PromotedVT)) { + Elt = DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt); + } + // Extend the element if necessary (e.g. an i8 is loaded // into an i16 register) if (Ins[InsIdx].VT.isInteger() && @@ -2627,11 +2683,26 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, return Chain; const DataLayout &DL = DAG.getDataLayout(); + SmallVector<SDValue, 16> PromotedOutVals; SmallVector<EVT, 16> VTs; SmallVector<uint64_t, 16> Offsets; ComputePTXValueVTs(*this, DL, RetTy, VTs, &Offsets); assert(VTs.size() == OutVals.size() && "Bad return value decomposition"); + for (unsigned i = 0, e = VTs.size(); i != e; ++i) { + SDValue PromotedOutVal = OutVals[i]; + MVT PromotedVT; + if (PromoteScalarIntegerPTX(VTs[i], &PromotedVT)) { + VTs[i] = EVT(PromotedVT); + } + if (PromoteScalarIntegerPTX(PromotedOutVal.getValueType(), &PromotedVT)) { + llvm::ISD::NodeType Ext = + Outs[i].Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; + PromotedOutVal = DAG.getNode(Ext, dl, PromotedVT, PromotedOutVal); + } + PromotedOutVals.push_back(PromotedOutVal); + } + auto VectorInfo = VectorizePTXValueVTs( VTs, Offsets, RetTy->isSized() ? getFunctionParamOptimizedAlign(&F, RetTy, DL) @@ -2652,12 +2723,14 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, StoreOperands.push_back(DAG.getConstant(Offsets[i], dl, MVT::i32)); } - SDValue RetVal = OutVals[i]; + SDValue OutVal = OutVals[i]; + SDValue RetVal = PromotedOutVals[i]; + if (ExtendIntegerRetVal) { RetVal = DAG.getNode(Outs[i].Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl, MVT::i32, RetVal); - } else if (RetVal.getValueSizeInBits() < 16) { + } else if (OutVal.getValueSizeInBits() < 16) { // Use 16-bit registers for small load-stores as it's the // smallest general purpose register size supported by NVPTX. RetVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, RetVal); diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXSubtarget.h b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXSubtarget.h index 9a249d3da3d5..cea3dce3f1c5 100644 --- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXSubtarget.h +++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXSubtarget.h @@ -77,6 +77,7 @@ public: bool hasImageHandles() const; bool hasFP16Math() const { return SmVersion >= 53; } bool allowFP16Math() const; + bool hasMaskOperator() const { return PTXVersion >= 71; } unsigned int getSmVersion() const { return SmVersion; } std::string getTargetName() const { return TargetName; } diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXUtilities.h b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXUtilities.h index bf1524194cfb..6fee57b4664e 100644 --- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXUtilities.h +++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXUtilities.h @@ -59,6 +59,16 @@ bool isKernelFunction(const Function &); bool getAlign(const Function &, unsigned index, unsigned &); bool getAlign(const CallInst &, unsigned index, unsigned &); +// PTX ABI requires all scalar argument/return values to have +// bit-size as a power of two of at least 32 bits. +inline unsigned promoteScalarArgumentSize(unsigned size) { + if (size <= 32) + return 32; + else if (size <= 64) + return 64; + else + return size; +} } #endif diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 4247cf557c2a..14c4fd3a9ffa 100644 --- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -5473,7 +5473,8 @@ void PPCDAGToDAGISel::Select(SDNode *N) { } case ISD::MUL: { SDValue Op1 = N->getOperand(1); - if (Op1.getOpcode() != ISD::Constant || Op1.getValueType() != MVT::i64) + if (Op1.getOpcode() != ISD::Constant || + (Op1.getValueType() != MVT::i64 && Op1.getValueType() != MVT::i32)) break; // If the multiplier fits int16, we can handle it with mulli. @@ -5486,13 +5487,27 @@ void PPCDAGToDAGISel::Select(SDNode *N) { // (mul X, c1 << c2) -> (rldicr (mulli X, c1) c2). We do this in ISEL due to // DAGCombiner prefers (shl (mul X, c1), c2) -> (mul X, c1 << c2). uint64_t ImmSh = Imm >> Shift; - if (isInt<16>(ImmSh)) { - uint64_t SextImm = SignExtend64(ImmSh & 0xFFFF, 16); + if (!isInt<16>(ImmSh)) + break; + + uint64_t SextImm = SignExtend64(ImmSh & 0xFFFF, 16); + if (Op1.getValueType() == MVT::i64) { SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64); SDNode *MulNode = CurDAG->getMachineNode(PPC::MULLI8, dl, MVT::i64, N->getOperand(0), SDImm); - CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, SDValue(MulNode, 0), - getI32Imm(Shift, dl), getI32Imm(63 - Shift, dl)); + + SDValue Ops[] = {SDValue(MulNode, 0), getI32Imm(Shift, dl), + getI32Imm(63 - Shift, dl)}; + CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, Ops); + return; + } else { + SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i32); + SDNode *MulNode = CurDAG->getMachineNode(PPC::MULLI, dl, MVT::i32, + N->getOperand(0), SDImm); + + SDValue Ops[] = {SDValue(MulNode, 0), getI32Imm(Shift, dl), + getI32Imm(0, dl), getI32Imm(31 - Shift, dl)}; + CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); return; } break; diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index 59486c323567..c85f57f04c7d 100644 --- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -1086,8 +1086,8 @@ unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, // For opcodes with the ReMaterializable flag set, this function is called to // verify the instruction is really rematable. -bool PPCInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI, - AliasAnalysis *AA) const { +bool PPCInstrInfo::isReallyTriviallyReMaterializable( + const MachineInstr &MI) const { switch (MI.getOpcode()) { default: // This function should only be called for opcodes with the ReMaterializable diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.h index e22b0086bde8..980bb3107a8b 100644 --- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.h +++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.h @@ -495,8 +495,7 @@ public: unsigned &SubIdx) const override; unsigned isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override; - bool isReallyTriviallyReMaterializable(const MachineInstr &MI, - AAResults *AA) const override; + bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override; unsigned isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override; diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp index 4689c0638ca6..23703ac54d0e 100644 --- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp +++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp @@ -568,7 +568,7 @@ bool PPCLoopInstrFormPrep::rewriteLoadStoresForCommoningChains( const SCEVAddRecExpr *BasePtrSCEV = cast<SCEVAddRecExpr>(BaseSCEV); // Make sure the base is able to expand. - if (!isSafeToExpand(BasePtrSCEV->getStart(), *SE)) + if (!SCEVE.isSafeToExpand(BasePtrSCEV->getStart())) return MadeChange; assert(BasePtrSCEV->isAffine() && @@ -602,7 +602,7 @@ bool PPCLoopInstrFormPrep::rewriteLoadStoresForCommoningChains( // Make sure offset is able to expand. Only need to check one time as the // offsets are reused between different chains. if (!BaseElemIdx) - if (!isSafeToExpand(OffsetSCEV, *SE)) + if (!SCEVE.isSafeToExpand(OffsetSCEV)) return false; Value *OffsetValue = SCEVE.expandCodeFor( @@ -1018,14 +1018,13 @@ bool PPCLoopInstrFormPrep::rewriteLoadStores( if (!BasePtrSCEV->isAffine()) return MadeChange; - if (!isSafeToExpand(BasePtrSCEV->getStart(), *SE)) - return MadeChange; - - SmallPtrSet<Value *, 16> DeletedPtrs; - BasicBlock *Header = L->getHeader(); SCEVExpander SCEVE(*SE, Header->getModule()->getDataLayout(), "loopprepare-formrewrite"); + if (!SCEVE.isSafeToExpand(BasePtrSCEV->getStart())) + return MadeChange; + + SmallPtrSet<Value *, 16> DeletedPtrs; // For some DS form load/store instructions, it can also be an update form, // if the stride is constant and is a multipler of 4. Use update form if diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCV.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCV.h index 917837a307ad..e6140edc8403 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCV.h +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCV.h @@ -30,6 +30,9 @@ class MachineInstr; class MachineOperand; class PassRegistry; +FunctionPass *createRISCVCodeGenPreparePass(); +void initializeRISCVCodeGenPreparePass(PassRegistry &); + bool lowerRISCVMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, AsmPrinter &AP); bool lowerRISCVMachineOperandToMCOperand(const MachineOperand &MO, diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCV.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCV.td index e783ef38b448..8a6f69c7f7ca 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCV.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCV.td @@ -19,6 +19,19 @@ def HasStdExtM : Predicate<"Subtarget->hasStdExtM()">, AssemblerPredicate<(all_of FeatureStdExtM), "'M' (Integer Multiplication and Division)">; +def FeatureStdExtZmmul + : SubtargetFeature<"zmmul", "HasStdExtZmmul", "true", + "'Zmmul' (Integer Multiplication)">; +def HasStdExtZmmul : Predicate<"Subtarget->hasStdExtZmmul()">, + AssemblerPredicate<(all_of FeatureStdExtZmmul), + "'Zmmul' (Integer Multiplication)">; + +def HasStdExtMOrZmmul + : Predicate<"Subtarget->hasStdExtM() || Subtarget->hasStdExtZmmul()">, + AssemblerPredicate<(any_of FeatureStdExtM, FeatureStdExtZmmul), + "'M' (Integer Multiplication and Division) or " + "'Zmmul' (Integer Multiplication)">; + def FeatureStdExtA : SubtargetFeature<"a", "HasStdExtA", "true", "'A' (Atomic Instructions)">; @@ -465,7 +478,8 @@ def TuneNoDefaultUnroll "Disable default unroll preference.">; def TuneSiFive7 : SubtargetFeature<"sifive7", "RISCVProcFamily", "SiFive7", - "SiFive 7-Series processors">; + "SiFive 7-Series processors", + [TuneNoDefaultUnroll]>; //===----------------------------------------------------------------------===// // Named operands for CSR instructions. @@ -499,9 +513,9 @@ def : ProcessorModel<"rocket-rv32", RocketModel, []>; def : ProcessorModel<"rocket-rv64", RocketModel, [Feature64Bit]>; def : ProcessorModel<"sifive-7-rv32", SiFive7Model, [], - [TuneSiFive7, TuneNoDefaultUnroll]>; + [TuneSiFive7]>; def : ProcessorModel<"sifive-7-rv64", SiFive7Model, [Feature64Bit], - [TuneSiFive7, TuneNoDefaultUnroll]>; + [TuneSiFive7]>; def : ProcessorModel<"sifive-e20", RocketModel, [FeatureStdExtM, FeatureStdExtC]>; @@ -528,7 +542,7 @@ def : ProcessorModel<"sifive-e76", SiFive7Model, [FeatureStdExtM, FeatureStdExtA, FeatureStdExtF, FeatureStdExtC], - [TuneSiFive7, TuneNoDefaultUnroll]>; + [TuneSiFive7]>; def : ProcessorModel<"sifive-s21", RocketModel, [Feature64Bit, FeatureStdExtM, @@ -553,7 +567,7 @@ def : ProcessorModel<"sifive-s76", SiFive7Model, [Feature64Bit, FeatureStdExtF, FeatureStdExtD, FeatureStdExtC], - [TuneSiFive7, TuneNoDefaultUnroll]>; + [TuneSiFive7]>; def : ProcessorModel<"sifive-u54", RocketModel, [Feature64Bit, FeatureStdExtM, @@ -568,7 +582,7 @@ def : ProcessorModel<"sifive-u74", SiFive7Model, [Feature64Bit, FeatureStdExtF, FeatureStdExtD, FeatureStdExtC], - [TuneSiFive7, TuneNoDefaultUnroll]>; + [TuneSiFive7]>; //===----------------------------------------------------------------------===// // Define the RISC-V target. diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp new file mode 100644 index 000000000000..b700a9ede39b --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp @@ -0,0 +1,169 @@ +//===----- RISCVCodeGenPrepare.cpp ----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This is a RISCV specific version of CodeGenPrepare. +// It munges the code in the input function to better prepare it for +// SelectionDAG-based code generation. This works around limitations in it's +// basic-block-at-a-time approach. +// +//===----------------------------------------------------------------------===// + +#include "RISCV.h" +#include "RISCVTargetMachine.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" + +using namespace llvm; + +#define DEBUG_TYPE "riscv-codegenprepare" +#define PASS_NAME "RISCV CodeGenPrepare" + +STATISTIC(NumZExtToSExt, "Number of SExt instructions converted to ZExt"); + +namespace { + +class RISCVCodeGenPrepare : public FunctionPass { + const DataLayout *DL; + const RISCVSubtarget *ST; + +public: + static char ID; + + RISCVCodeGenPrepare() : FunctionPass(ID) {} + + bool runOnFunction(Function &F) override; + + StringRef getPassName() const override { return PASS_NAME; } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addRequired<TargetPassConfig>(); + } + +private: + bool optimizeZExt(ZExtInst *I); + bool optimizeAndExt(BinaryOperator *BO); +}; + +} // end anonymous namespace + +bool RISCVCodeGenPrepare::optimizeZExt(ZExtInst *ZExt) { + if (!ST->is64Bit()) + return false; + + Value *Src = ZExt->getOperand(0); + + // We only care about ZExt from i32 to i64. + if (!ZExt->getType()->isIntegerTy(64) || !Src->getType()->isIntegerTy(32)) + return false; + + // Look for an opportunity to replace (i64 (zext (i32 X))) with a sext if we + // can determine that the sign bit of X is zero via a dominating condition. + // This often occurs with widened induction variables. + if (isImpliedByDomCondition(ICmpInst::ICMP_SGE, Src, + Constant::getNullValue(Src->getType()), ZExt, + *DL)) { + auto *SExt = new SExtInst(Src, ZExt->getType(), "", ZExt); + SExt->takeName(ZExt); + SExt->setDebugLoc(ZExt->getDebugLoc()); + + ZExt->replaceAllUsesWith(SExt); + ZExt->eraseFromParent(); + ++NumZExtToSExt; + return true; + } + + return false; +} + +// Try to optimize (i64 (and (zext/sext (i32 X), C1))) if C1 has bit 31 set, +// but bits 63:32 are zero. If we can prove that bit 31 of X is 0, we can fill +// the upper 32 bits with ones. A separate transform will turn (zext X) into +// (sext X) for the same condition. +bool RISCVCodeGenPrepare::optimizeAndExt(BinaryOperator *BO) { + if (!ST->is64Bit()) + return false; + + if (BO->getOpcode() != Instruction::And) + return false; + + if (!BO->getType()->isIntegerTy(64)) + return false; + + // Left hand side should be sext or zext. + Instruction *LHS = dyn_cast<Instruction>(BO->getOperand(0)); + if (!LHS || (!isa<SExtInst>(LHS) && !isa<ZExtInst>(LHS))) + return false; + + Value *LHSSrc = LHS->getOperand(0); + if (!LHSSrc->getType()->isIntegerTy(32)) + return false; + + // Right hand side should be a constant. + Value *RHS = BO->getOperand(1); + + auto *CI = dyn_cast<ConstantInt>(RHS); + if (!CI) + return false; + uint64_t C = CI->getZExtValue(); + + // Look for constants that fit in 32 bits but not simm12, and can be made + // into simm12 by sign extending bit 31. This will allow use of ANDI. + // TODO: Is worth making simm32? + if (!isUInt<32>(C) || isInt<12>(C) || !isInt<12>(SignExtend64<32>(C))) + return false; + + // If we can determine the sign bit of the input is 0, we can replace the + // And mask constant. + if (!isImpliedByDomCondition(ICmpInst::ICMP_SGE, LHSSrc, + Constant::getNullValue(LHSSrc->getType()), + LHS, *DL)) + return false; + + // Sign extend the constant and replace the And operand. + C = SignExtend64<32>(C); + BO->setOperand(1, ConstantInt::get(LHS->getType(), C)); + + return true; +} + +bool RISCVCodeGenPrepare::runOnFunction(Function &F) { + if (skipFunction(F)) + return false; + + auto &TPC = getAnalysis<TargetPassConfig>(); + auto &TM = TPC.getTM<RISCVTargetMachine>(); + ST = &TM.getSubtarget<RISCVSubtarget>(F); + + DL = &F.getParent()->getDataLayout(); + + bool MadeChange = false; + for (auto &BB : F) { + for (Instruction &I : llvm::make_early_inc_range(BB)) { + if (auto *ZExt = dyn_cast<ZExtInst>(&I)) + MadeChange |= optimizeZExt(ZExt); + else if (I.getOpcode() == Instruction::And) + MadeChange |= optimizeAndExt(cast<BinaryOperator>(&I)); + } + } + + return MadeChange; +} + +INITIALIZE_PASS_BEGIN(RISCVCodeGenPrepare, DEBUG_TYPE, PASS_NAME, false, false) +INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) +INITIALIZE_PASS_END(RISCVCodeGenPrepare, DEBUG_TYPE, PASS_NAME, false, false) + +char RISCVCodeGenPrepare::ID = 0; + +FunctionPass *llvm::createRISCVCodeGenPreparePass() { + return new RISCVCodeGenPrepare(); +} diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index 5b823af1e9b8..d5826b46d738 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -690,6 +690,14 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { // 32 trailing ones should use srliw via tablegen pattern. if (TrailingOnes == 32 || ShAmt >= TrailingOnes) break; + // If C2 is (1 << ShAmt) use bexti if possible. + if (Subtarget->hasStdExtZbs() && ShAmt + 1 == TrailingOnes) { + SDNode *BEXTI = + CurDAG->getMachineNode(RISCV::BEXTI, DL, VT, N0->getOperand(0), + CurDAG->getTargetConstant(ShAmt, DL, VT)); + ReplaceNode(Node, BEXTI); + return; + } unsigned LShAmt = Subtarget->getXLen() - TrailingOnes; SDNode *SLLI = CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0), @@ -939,18 +947,17 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { if (!isMask_64(C2)) break; - // This should be the only use of the AND unless we will use - // (SRLI (SLLI X, 32), 32). We don't use a shift pair for other AND - // constants. - if (!N0.hasOneUse() && C2 != UINT64_C(0xFFFFFFFF)) - break; - - // If this can be an ANDI, ZEXT.H or ZEXT.W we don't need to do this - // optimization. - if (isInt<12>(C2) || + // If this can be an ANDI, ZEXT.H or ZEXT.W, don't do this if the ANDI/ZEXT + // has multiple users or the constant is a simm12. This prevents inserting + // a shift and still have uses of the AND/ZEXT. Shifting a simm12 will + // likely make it more costly to materialize. Otherwise, using a SLLI + // might allow it to be compressed. + bool IsANDIOrZExt = + isInt<12>(C2) || (C2 == UINT64_C(0xFFFF) && (Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbp())) || - (C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba())) + (C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba()); + if (IsANDIOrZExt && (isInt<12>(N1C->getSExtValue()) || !N0.hasOneUse())) break; // We need to shift left the AND input and C1 by a total of XLen bits. diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 658865703079..1702546b58a6 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -215,21 +215,26 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setLibcallName(RTLIB::MULO_I64, nullptr); } - if (!Subtarget.hasStdExtM()) { - setOperationAction({ISD::MUL, ISD::MULHS, ISD::MULHU, ISD::SDIV, ISD::UDIV, - ISD::SREM, ISD::UREM}, - XLenVT, Expand); + if (!Subtarget.hasStdExtM() && !Subtarget.hasStdExtZmmul()) { + setOperationAction({ISD::MUL, ISD::MULHS, ISD::MULHU}, XLenVT, Expand); } else { if (Subtarget.is64Bit()) { setOperationAction(ISD::MUL, {MVT::i32, MVT::i128}, Custom); - - setOperationAction({ISD::SDIV, ISD::UDIV, ISD::UREM}, - {MVT::i8, MVT::i16, MVT::i32}, Custom); } else { setOperationAction(ISD::MUL, MVT::i64, Custom); } } + if (!Subtarget.hasStdExtM()) { + setOperationAction({ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM}, + XLenVT, Expand); + } else { + if (Subtarget.is64Bit()) { + setOperationAction({ISD::SDIV, ISD::UDIV, ISD::UREM}, + {MVT::i8, MVT::i16, MVT::i32}, Custom); + } + } + setOperationAction( {ISD::SDIVREM, ISD::UDIVREM, ISD::SMUL_LOHI, ISD::UMUL_LOHI}, XLenVT, Expand); @@ -294,7 +299,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SELECT, XLenVT, Custom); } - static constexpr ISD::NodeType FPLegalNodeTypes[] = { + static const unsigned FPLegalNodeTypes[] = { ISD::FMINNUM, ISD::FMAXNUM, ISD::LRINT, ISD::LLRINT, ISD::LROUND, ISD::LLROUND, ISD::STRICT_LRINT, ISD::STRICT_LLRINT, ISD::STRICT_LROUND, @@ -307,7 +312,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT, ISD::SETGE, ISD::SETNE, ISD::SETO, ISD::SETUO}; - static const ISD::NodeType FPOpToExpand[] = { + static const unsigned FPOpToExpand[] = { ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FP16_TO_FP, ISD::FP_TO_FP16}; @@ -315,8 +320,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::BITCAST, MVT::i16, Custom); if (Subtarget.hasStdExtZfh()) { - for (auto NT : FPLegalNodeTypes) - setOperationAction(NT, MVT::f16, Legal); + setOperationAction(FPLegalNodeTypes, MVT::f16, Legal); setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Legal); setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal); setCondCodeAction(FPCCToExpand, MVT::f16, Expand); @@ -340,14 +344,12 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, } if (Subtarget.hasStdExtF()) { - for (auto NT : FPLegalNodeTypes) - setOperationAction(NT, MVT::f32, Legal); + setOperationAction(FPLegalNodeTypes, MVT::f32, Legal); setCondCodeAction(FPCCToExpand, MVT::f32, Expand); setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); setOperationAction(ISD::SELECT, MVT::f32, Custom); setOperationAction(ISD::BR_CC, MVT::f32, Expand); - for (auto Op : FPOpToExpand) - setOperationAction(Op, MVT::f32, Expand); + setOperationAction(FPOpToExpand, MVT::f32, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); setTruncStoreAction(MVT::f32, MVT::f16, Expand); } @@ -356,8 +358,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::BITCAST, MVT::i32, Custom); if (Subtarget.hasStdExtD()) { - for (auto NT : FPLegalNodeTypes) - setOperationAction(NT, MVT::f64, Legal); + setOperationAction(FPLegalNodeTypes, MVT::f64, Legal); setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal); setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal); setCondCodeAction(FPCCToExpand, MVT::f64, Expand); @@ -366,8 +367,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::BR_CC, MVT::f64, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); setTruncStoreAction(MVT::f64, MVT::f32, Expand); - for (auto Op : FPOpToExpand) - setOperationAction(Op, MVT::f64, Expand); + setOperationAction(FPOpToExpand, MVT::f64, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); setTruncStoreAction(MVT::f64, MVT::f16, Expand); } @@ -458,17 +458,22 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}; + static const unsigned IntegerVecReduceOps[] = { + ISD::VECREDUCE_ADD, ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, + ISD::VECREDUCE_XOR, ISD::VECREDUCE_SMAX, ISD::VECREDUCE_SMIN, + ISD::VECREDUCE_UMAX, ISD::VECREDUCE_UMIN}; + + static const unsigned FloatingPointVecReduceOps[] = { + ISD::VECREDUCE_FADD, ISD::VECREDUCE_SEQ_FADD, ISD::VECREDUCE_FMIN, + ISD::VECREDUCE_FMAX}; + if (!Subtarget.is64Bit()) { // We must custom-lower certain vXi64 operations on RV32 due to the vector // element type being illegal. setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, MVT::i64, Custom); - setOperationAction({ISD::VECREDUCE_ADD, ISD::VECREDUCE_AND, - ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR, - ISD::VECREDUCE_SMAX, ISD::VECREDUCE_SMIN, - ISD::VECREDUCE_UMAX, ISD::VECREDUCE_UMIN}, - MVT::i64, Custom); + setOperationAction(IntegerVecReduceOps, MVT::i64, Custom); setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, @@ -581,11 +586,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, // Custom-lower reduction operations to set up the corresponding custom // nodes' operands. - setOperationAction({ISD::VECREDUCE_ADD, ISD::VECREDUCE_AND, - ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR, - ISD::VECREDUCE_SMAX, ISD::VECREDUCE_SMIN, - ISD::VECREDUCE_UMAX, ISD::VECREDUCE_UMIN}, - VT, Custom); + setOperationAction(IntegerVecReduceOps, VT, Custom); setOperationAction(IntegerVPOps, VT, Custom); @@ -661,9 +662,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND}, VT, Custom); - setOperationAction({ISD::VECREDUCE_FADD, ISD::VECREDUCE_SEQ_FADD, - ISD::VECREDUCE_FMIN, ISD::VECREDUCE_FMAX}, - VT, Custom); + setOperationAction(FloatingPointVecReduceOps, VT, Custom); // Expand FP operations that need libcalls. setOperationAction(ISD::FREM, VT, Expand); @@ -905,17 +904,14 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND}, VT, Custom); - for (auto CC : VFPCCToExpand) - setCondCodeAction(CC, VT, Expand); + setCondCodeAction(VFPCCToExpand, VT, Expand); setOperationAction({ISD::VSELECT, ISD::SELECT}, VT, Custom); setOperationAction(ISD::SELECT_CC, VT, Expand); setOperationAction(ISD::BITCAST, VT, Custom); - setOperationAction({ISD::VECREDUCE_FADD, ISD::VECREDUCE_SEQ_FADD, - ISD::VECREDUCE_FMIN, ISD::VECREDUCE_FMAX}, - VT, Custom); + setOperationAction(FloatingPointVecReduceOps, VT, Custom); setOperationAction(FloatingPointVPOps, VT, Custom); } @@ -943,7 +939,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setJumpIsExpensive(); setTargetDAGCombine({ISD::INTRINSIC_WO_CHAIN, ISD::ADD, ISD::SUB, ISD::AND, - ISD::OR, ISD::XOR}); + ISD::OR, ISD::XOR, ISD::SETCC}); if (Subtarget.is64Bit()) setTargetDAGCombine(ISD::SRA); @@ -1374,6 +1370,23 @@ unsigned RISCVTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context // with 1/-1. static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, ISD::CondCode &CC, SelectionDAG &DAG) { + // If this is a single bit test that can't be handled by ANDI, shift the + // bit to be tested to the MSB and perform a signed compare with 0. + if (isIntEqualitySetCC(CC) && isNullConstant(RHS) && + LHS.getOpcode() == ISD::AND && LHS.hasOneUse() && + isa<ConstantSDNode>(LHS.getOperand(1))) { + uint64_t Mask = LHS.getConstantOperandVal(1); + if (isPowerOf2_64(Mask) && !isInt<12>(Mask)) { + CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT; + unsigned ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask); + LHS = LHS.getOperand(0); + if (ShAmt != 0) + LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS, + DAG.getConstant(ShAmt, DL, LHS.getValueType())); + return; + } + } + // Convert X > -1 to X >= 0. if (CC == ISD::SETGT && isAllOnesConstant(RHS)) { RHS = DAG.getConstant(0, DL, RHS.getValueType()); @@ -3707,10 +3720,7 @@ SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op, SDLoc DL(Op); GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); assert(N->getOffset() == 0 && "unexpected offset in global node"); - - const GlobalValue *GV = N->getGlobal(); - bool IsLocal = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV); - return getAddr(N, DAG, IsLocal); + return getAddr(N, DAG, N->getGlobal()->isDSOLocal()); } SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op, @@ -8130,6 +8140,50 @@ static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG) { return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false); } +// Replace (seteq (i64 (and X, 0xffffffff)), C1) with +// (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from +// bit 31. Same for setne. C1' may be cheaper to materialize and the sext_inreg +// can become a sext.w instead of a shift pair. +static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N->getValueType(0); + EVT OpVT = N0.getValueType(); + + if (OpVT != MVT::i64 || !Subtarget.is64Bit()) + return SDValue(); + + // RHS needs to be a constant. + auto *N1C = dyn_cast<ConstantSDNode>(N1); + if (!N1C) + return SDValue(); + + // LHS needs to be (and X, 0xffffffff). + if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() || + !isa<ConstantSDNode>(N0.getOperand(1)) || + N0.getConstantOperandVal(1) != UINT64_C(0xffffffff)) + return SDValue(); + + // Looking for an equality compare. + ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get(); + if (!isIntEqualitySetCC(Cond)) + return SDValue(); + + const APInt &C1 = cast<ConstantSDNode>(N1)->getAPIntValue(); + + SDLoc dl(N); + // If the constant is larger than 2^32 - 1 it is impossible for both sides + // to be equal. + if (C1.getActiveBits() > 32) + return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT); + + SDValue SExtOp = DAG.getNode(ISD::SIGN_EXTEND_INREG, N, OpVT, + N0.getOperand(0), DAG.getValueType(MVT::i32)); + return DAG.getSetCC(dl, VT, SExtOp, DAG.getConstant(C1.trunc(32).sext(64), + dl, OpVT), Cond); +} + static SDValue performSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { @@ -8658,6 +8712,75 @@ static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, DAG.getConstant(32 - ShAmt, DL, MVT::i64)); } +// Perform common combines for BR_CC and SELECT_CC condtions. +static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, + SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { + ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get(); + if (!ISD::isIntEqualitySetCC(CCVal)) + return false; + + // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt) + // Sometimes the setcc is introduced after br_cc/select_cc has been formed. + if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) && + LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) { + // If we're looking for eq 0 instead of ne 0, we need to invert the + // condition. + bool Invert = CCVal == ISD::SETEQ; + CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get(); + if (Invert) + CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType()); + + RHS = LHS.getOperand(1); + LHS = LHS.getOperand(0); + translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG); + + CC = DAG.getCondCode(CCVal); + return true; + } + + // Fold ((xor X, Y), 0, eq/ne) -> (X, Y, eq/ne) + if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS)) { + RHS = LHS.getOperand(1); + LHS = LHS.getOperand(0); + return true; + } + + // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, XLen-1-C), 0, ge/lt) + if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() && + LHS.getOperand(1).getOpcode() == ISD::Constant) { + SDValue LHS0 = LHS.getOperand(0); + if (LHS0.getOpcode() == ISD::AND && + LHS0.getOperand(1).getOpcode() == ISD::Constant) { + uint64_t Mask = LHS0.getConstantOperandVal(1); + uint64_t ShAmt = LHS.getConstantOperandVal(1); + if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) { + CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT; + CC = DAG.getCondCode(CCVal); + + ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt; + LHS = LHS0.getOperand(0); + if (ShAmt != 0) + LHS = + DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0), + DAG.getConstant(ShAmt, DL, LHS.getValueType())); + return true; + } + } + } + + // (X, 1, setne) -> // (X, 0, seteq) if we can prove X is 0/1. + // This can occur when legalizing some floating point comparisons. + APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1); + if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) { + CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType()); + CC = DAG.getCondCode(CCVal); + RHS = DAG.getConstant(0, DL, LHS.getValueType()); + return true; + } + + return false; +} + SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -8872,6 +8995,8 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, case ISD::FMAXNUM: case ISD::FMINNUM: return combineBinOpToReduce(N, DAG); + case ISD::SETCC: + return performSETCCCombine(N, DAG, Subtarget); case ISD::SIGN_EXTEND_INREG: return performSIGN_EXTEND_INREGCombine(N, DAG, Subtarget); case ISD::ZERO_EXTEND: @@ -8900,110 +9025,32 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, // Transform SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); + SDValue CC = N->getOperand(2); SDValue TrueV = N->getOperand(3); SDValue FalseV = N->getOperand(4); + SDLoc DL(N); // If the True and False values are the same, we don't need a select_cc. if (TrueV == FalseV) return TrueV; - ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(2))->get(); - if (!ISD::isIntEqualitySetCC(CCVal)) - break; - - // Fold (select_cc (setlt X, Y), 0, ne, trueV, falseV) -> - // (select_cc X, Y, lt, trueV, falseV) - // Sometimes the setcc is introduced after select_cc has been formed. - if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) && - LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) { - // If we're looking for eq 0 instead of ne 0, we need to invert the - // condition. - bool Invert = CCVal == ISD::SETEQ; - CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get(); - if (Invert) - CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType()); - - SDLoc DL(N); - RHS = LHS.getOperand(1); - LHS = LHS.getOperand(0); - translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG); - - SDValue TargetCC = DAG.getCondCode(CCVal); - return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0), - {LHS, RHS, TargetCC, TrueV, FalseV}); - } - - // Fold (select_cc (xor X, Y), 0, eq/ne, trueV, falseV) -> - // (select_cc X, Y, eq/ne, trueV, falseV) - if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS)) - return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), N->getValueType(0), - {LHS.getOperand(0), LHS.getOperand(1), - N->getOperand(2), TrueV, FalseV}); - // (select_cc X, 1, setne, trueV, falseV) -> - // (select_cc X, 0, seteq, trueV, falseV) if we can prove X is 0/1. - // This can occur when legalizing some floating point comparisons. - APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1); - if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) { - SDLoc DL(N); - CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType()); - SDValue TargetCC = DAG.getCondCode(CCVal); - RHS = DAG.getConstant(0, DL, LHS.getValueType()); + if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget)) return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0), - {LHS, RHS, TargetCC, TrueV, FalseV}); - } + {LHS, RHS, CC, TrueV, FalseV}); - break; + return SDValue(); } case RISCVISD::BR_CC: { SDValue LHS = N->getOperand(1); SDValue RHS = N->getOperand(2); - ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(3))->get(); - if (!ISD::isIntEqualitySetCC(CCVal)) - break; - - // Fold (br_cc (setlt X, Y), 0, ne, dest) -> - // (br_cc X, Y, lt, dest) - // Sometimes the setcc is introduced after br_cc has been formed. - if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) && - LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) { - // If we're looking for eq 0 instead of ne 0, we need to invert the - // condition. - bool Invert = CCVal == ISD::SETEQ; - CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get(); - if (Invert) - CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType()); - - SDLoc DL(N); - RHS = LHS.getOperand(1); - LHS = LHS.getOperand(0); - translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG); + SDValue CC = N->getOperand(3); + SDLoc DL(N); + if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget)) return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0), - N->getOperand(0), LHS, RHS, DAG.getCondCode(CCVal), - N->getOperand(4)); - } - - // Fold (br_cc (xor X, Y), 0, eq/ne, dest) -> - // (br_cc X, Y, eq/ne, trueV, falseV) - if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS)) - return DAG.getNode(RISCVISD::BR_CC, SDLoc(N), N->getValueType(0), - N->getOperand(0), LHS.getOperand(0), LHS.getOperand(1), - N->getOperand(3), N->getOperand(4)); - - // (br_cc X, 1, setne, br_cc) -> - // (br_cc X, 0, seteq, br_cc) if we can prove X is 0/1. - // This can occur when legalizing some floating point comparisons. - APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1); - if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) { - SDLoc DL(N); - CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType()); - SDValue TargetCC = DAG.getCondCode(CCVal); - RHS = DAG.getConstant(0, DL, LHS.getValueType()); - return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0), - N->getOperand(0), LHS, RHS, TargetCC, - N->getOperand(4)); - } - break; + N->getOperand(0), LHS, RHS, CC, N->getOperand(4)); + + return SDValue(); } case ISD::BITREVERSE: return performBITREVERSECombine(N, DAG, Subtarget); @@ -9299,6 +9346,10 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, bool RISCVTargetLowering::isDesirableToCommuteWithShift( const SDNode *N, CombineLevel Level) const { + assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA || + N->getOpcode() == ISD::SRL) && + "Expected shift op"); + // The following folds are only desirable if `(OP _, c1 << c2)` can be // materialised in fewer instructions than `(OP _, c1)`: // @@ -9357,7 +9408,8 @@ bool RISCVTargetLowering::targetShrinkDemandedConstant( return false; // Only handle AND for now. - if (Op.getOpcode() != ISD::AND) + unsigned Opcode = Op.getOpcode(); + if (Opcode != ISD::AND && Opcode != ISD::OR && Opcode != ISD::XOR) return false; ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); @@ -9376,12 +9428,13 @@ bool RISCVTargetLowering::targetShrinkDemandedConstant( auto IsLegalMask = [ShrunkMask, ExpandedMask](const APInt &Mask) -> bool { return ShrunkMask.isSubsetOf(Mask) && Mask.isSubsetOf(ExpandedMask); }; - auto UseMask = [Mask, Op, VT, &TLO](const APInt &NewMask) -> bool { + auto UseMask = [Mask, Op, &TLO](const APInt &NewMask) -> bool { if (NewMask == Mask) return true; SDLoc DL(Op); - SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT); - SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC); + SDValue NewC = TLO.DAG.getConstant(NewMask, DL, Op.getValueType()); + SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), + Op.getOperand(0), NewC); return TLO.CombineTo(Op, NewOp); }; @@ -9390,18 +9443,21 @@ bool RISCVTargetLowering::targetShrinkDemandedConstant( if (ShrunkMask.isSignedIntN(12)) return false; - // Preserve (and X, 0xffff) when zext.h is supported. - if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp()) { - APInt NewMask = APInt(Mask.getBitWidth(), 0xffff); - if (IsLegalMask(NewMask)) - return UseMask(NewMask); - } + // And has a few special cases for zext. + if (Opcode == ISD::AND) { + // Preserve (and X, 0xffff) when zext.h is supported. + if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp()) { + APInt NewMask = APInt(Mask.getBitWidth(), 0xffff); + if (IsLegalMask(NewMask)) + return UseMask(NewMask); + } - // Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern. - if (VT == MVT::i64) { - APInt NewMask = APInt(64, 0xffffffff); - if (IsLegalMask(NewMask)) - return UseMask(NewMask); + // Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern. + if (VT == MVT::i64) { + APInt NewMask = APInt(64, 0xffffffff); + if (IsLegalMask(NewMask)) + return UseMask(NewMask); + } } // For the remaining optimizations, we need to be able to make a negative @@ -9414,10 +9470,11 @@ bool RISCVTargetLowering::targetShrinkDemandedConstant( // Try to make a 12 bit negative immediate. If that fails try to make a 32 // bit negative immediate unless the shrunk immediate already fits in 32 bits. + // If we can't create a simm12, we shouldn't change opaque constants. APInt NewMask = ShrunkMask; if (MinSignedBits <= 12) NewMask.setBitsFrom(11); - else if (MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32)) + else if (!C->isOpaque() && MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32)) NewMask.setBitsFrom(31); else return false; @@ -10015,15 +10072,15 @@ static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI, LastSelectPseudo = &*SequenceMBBI; SequenceMBBI->collectDebugValues(SelectDebugValues); SelectDests.insert(SequenceMBBI->getOperand(0).getReg()); - } else { - if (SequenceMBBI->hasUnmodeledSideEffects() || - SequenceMBBI->mayLoadOrStore()) - break; - if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) { - return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg()); - })) - break; + continue; } + if (SequenceMBBI->hasUnmodeledSideEffects() || + SequenceMBBI->mayLoadOrStore()) + break; + if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) { + return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg()); + })) + break; } const RISCVInstrInfo &TII = *Subtarget.getInstrInfo(); @@ -12159,7 +12216,8 @@ bool RISCVTargetLowering::isVScaleKnownToBeAPowerOfTwo() const { // FIXME: This doesn't work for zve32, but that's already broken // elsewhere for the same reason. assert(Subtarget.getRealMinVLen() >= 64 && "zve32* unsupported"); - assert(RISCV::RVVBitsPerBlock == 64 && "RVVBitsPerBlock changed, audit needed"); + static_assert(RISCV::RVVBitsPerBlock == 64, + "RVVBitsPerBlock changed, audit needed"); return true; } @@ -12214,10 +12272,12 @@ bool RISCVTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const { // Check integral scalar types. + const bool HasExtMOrZmmul = + Subtarget.hasStdExtM() || Subtarget.hasStdExtZmmul(); if (VT.isScalarInteger()) { // Omit the optimization if the sub target has the M extension and the data // size exceeds XLen. - if (Subtarget.hasStdExtM() && VT.getSizeInBits() > Subtarget.getXLen()) + if (HasExtMOrZmmul && VT.getSizeInBits() > Subtarget.getXLen()) return false; if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) { // Break the MUL to a SLLI and an ADD/SUB. @@ -12232,7 +12292,7 @@ bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT, return true; // Omit the following optimization if the sub target has the M extension // and the data size >= XLen. - if (Subtarget.hasStdExtM() && VT.getSizeInBits() >= Subtarget.getXLen()) + if (HasExtMOrZmmul && VT.getSizeInBits() >= Subtarget.getXLen()) return false; // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs // a pair of LUI/ADDI. diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 685604ad9a59..75a79895330f 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -637,6 +637,64 @@ void RISCVInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, } } +MachineInstr *RISCVInstrInfo::foldMemoryOperandImpl( + MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops, + MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS, + VirtRegMap *VRM) const { + const MachineFrameInfo &MFI = MF.getFrameInfo(); + + // The below optimizations narrow the load so they are only valid for little + // endian. + // TODO: Support big endian by adding an offset into the frame object? + if (MF.getDataLayout().isBigEndian()) + return nullptr; + + // Fold load from stack followed by sext.w into lw. + // TODO: Fold with sext.b, sext.h, zext.b, zext.h, zext.w? + if (Ops.size() != 1 || Ops[0] != 1) + return nullptr; + + unsigned LoadOpc; + switch (MI.getOpcode()) { + default: + if (RISCV::isSEXT_W(MI)) { + LoadOpc = RISCV::LW; + break; + } + if (RISCV::isZEXT_W(MI)) { + LoadOpc = RISCV::LWU; + break; + } + if (RISCV::isZEXT_B(MI)) { + LoadOpc = RISCV::LBU; + break; + } + return nullptr; + case RISCV::SEXT_H: + LoadOpc = RISCV::LH; + break; + case RISCV::SEXT_B: + LoadOpc = RISCV::LB; + break; + case RISCV::ZEXT_H_RV32: + case RISCV::ZEXT_H_RV64: + LoadOpc = RISCV::LHU; + break; + } + + MachineMemOperand *MMO = MF.getMachineMemOperand( + MachinePointerInfo::getFixedStack(MF, FrameIndex), + MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIndex), + MFI.getObjectAlign(FrameIndex)); + + Register DstReg = MI.getOperand(0).getReg(); + return BuildMI(*MI.getParent(), InsertPt, MI.getDebugLoc(), get(LoadOpc), + DstReg) + .addFrameIndex(FrameIndex) + .addImm(0) + .addMemOperand(MMO); +} + void RISCVInstrInfo::movImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register DstReg, uint64_t Val, @@ -1799,17 +1857,30 @@ Register RISCVInstrInfo::getVLENFactoredAmount(MachineFunction &MF, .addReg(VL, RegState::Kill) .addImm(ShiftAmount) .setMIFlag(Flag); - } else if ((NumOfVReg == 3 || NumOfVReg == 5 || NumOfVReg == 9) && - STI.hasStdExtZba()) { - // We can use Zba SHXADD instructions for multiply in some cases. - // TODO: Generalize to SHXADD+SLLI. + } else if (STI.hasStdExtZba() && + ((NumOfVReg % 3 == 0 && isPowerOf2_64(NumOfVReg / 3)) || + (NumOfVReg % 5 == 0 && isPowerOf2_64(NumOfVReg / 5)) || + (NumOfVReg % 9 == 0 && isPowerOf2_64(NumOfVReg / 9)))) { + // We can use Zba SHXADD+SLLI instructions for multiply in some cases. unsigned Opc; - switch (NumOfVReg) { - default: llvm_unreachable("Unexpected number of vregs"); - case 3: Opc = RISCV::SH1ADD; break; - case 5: Opc = RISCV::SH2ADD; break; - case 9: Opc = RISCV::SH3ADD; break; + uint32_t ShiftAmount; + if (NumOfVReg % 9 == 0) { + Opc = RISCV::SH3ADD; + ShiftAmount = Log2_64(NumOfVReg / 9); + } else if (NumOfVReg % 5 == 0) { + Opc = RISCV::SH2ADD; + ShiftAmount = Log2_64(NumOfVReg / 5); + } else if (NumOfVReg % 3 == 0) { + Opc = RISCV::SH1ADD; + ShiftAmount = Log2_64(NumOfVReg / 3); + } else { + llvm_unreachable("Unexpected number of vregs"); } + if (ShiftAmount) + BuildMI(MBB, II, DL, get(RISCV::SLLI), VL) + .addReg(VL, RegState::Kill) + .addImm(ShiftAmount) + .setMIFlag(Flag); BuildMI(MBB, II, DL, get(Opc), VL) .addReg(VL, RegState::Kill) .addReg(VL) @@ -1839,10 +1910,11 @@ Register RISCVInstrInfo::getVLENFactoredAmount(MachineFunction &MF, } else { Register N = MRI.createVirtualRegister(&RISCV::GPRRegClass); movImm(MBB, II, DL, N, NumOfVReg, Flag); - if (!STI.hasStdExtM()) + if (!STI.hasStdExtM() && !STI.hasStdExtZmmul()) MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ MF.getFunction(), - "M-extension must be enabled to calculate the vscaled size/offset."}); + "M- or Zmmul-extension must be enabled to calculate the vscaled size/" + "offset."}); BuildMI(MBB, II, DL, get(RISCV::MUL), VL) .addReg(VL, RegState::Kill) .addReg(N, RegState::Kill) @@ -1852,6 +1924,24 @@ Register RISCVInstrInfo::getVLENFactoredAmount(MachineFunction &MF, return VL; } +// Returns true if this is the sext.w pattern, addiw rd, rs1, 0. +bool RISCV::isSEXT_W(const MachineInstr &MI) { + return MI.getOpcode() == RISCV::ADDIW && MI.getOperand(1).isReg() && + MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0; +} + +// Returns true if this is the zext.w pattern, adduw rd, rs1, x0. +bool RISCV::isZEXT_W(const MachineInstr &MI) { + return MI.getOpcode() == RISCV::ADD_UW && MI.getOperand(1).isReg() && + MI.getOperand(2).isReg() && MI.getOperand(2).getReg() == RISCV::X0; +} + +// Returns true if this is the zext.b pattern, andi rd, rs1, 255. +bool RISCV::isZEXT_B(const MachineInstr &MI) { + return MI.getOpcode() == RISCV::ANDI && MI.getOperand(1).isReg() && + MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 255; +} + static bool isRVVWholeLoadStore(unsigned Opcode) { switch (Opcode) { default: diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.h index 5368437618bd..4aa9ded5b3a2 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.h +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.h @@ -69,6 +69,14 @@ public: int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const override; + using TargetInstrInfo::foldMemoryOperandImpl; + MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, + ArrayRef<unsigned> Ops, + MachineBasicBlock::iterator InsertPt, + int FrameIndex, + LiveIntervals *LIS = nullptr, + VirtRegMap *VRM = nullptr) const override; + // Materializes the given integer Val into DstReg. void movImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register DstReg, uint64_t Val, @@ -183,6 +191,11 @@ protected: namespace RISCV { +// Returns true if this is the sext.w pattern, addiw rd, rs1, 0. +bool isSEXT_W(const MachineInstr &MI); +bool isZEXT_W(const MachineInstr &MI); +bool isZEXT_B(const MachineInstr &MI); + // Returns true if the given MI is an RVV instruction opcode for which we may // expect to see a FrameIndex operand. bool isRVVSpill(const MachineInstr &MI); diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.td index 06a90438838e..78fd09fbf387 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -1278,6 +1278,13 @@ def : Pat<(setgt GPR:$rs1, simm12_minus1_nonzero:$imm), def : Pat<(setugt GPR:$rs1, simm12_minus1_nonzero:$imm), (XORI (SLTIU GPR:$rs1, (ImmPlus1 simm12_minus1_nonzero:$imm)), 1)>; +// If negating a pattern that requires an XORI above, we can fold the XORI with +// the NEG. The XORI is equivalent to 1-X and negating gives X-1. +def : Pat<(ineg (setuge GPR:$rs1, GPR:$rs2)), (ADDI (SLTU GPR:$rs1, GPR:$rs2), -1)>; +def : Pat<(ineg (setule GPR:$rs1, GPR:$rs2)), (ADDI (SLTU GPR:$rs2, GPR:$rs1), -1)>; +def : Pat<(ineg (setge GPR:$rs1, GPR:$rs2)), (ADDI (SLT GPR:$rs1, GPR:$rs2), -1)>; +def : Pat<(ineg (setle GPR:$rs1, GPR:$rs2)), (ADDI (SLT GPR:$rs2, GPR:$rs1), -1)>; + def IntCCtoRISCVCC : SDNodeXForm<riscv_selectcc, [{ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); RISCVCC::CondCode BrCC = getRISCVCCForIntCC(CC); diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoM.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoM.td index 72ba8460116f..662604b138d2 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoM.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoM.td @@ -24,7 +24,7 @@ def riscv_remuw : SDNode<"RISCVISD::REMUW", SDT_RISCVIntBinOpW>; // Instructions //===----------------------------------------------------------------------===// -let Predicates = [HasStdExtM] in { +let Predicates = [HasStdExtMOrZmmul] in { def MUL : ALU_rr<0b0000001, 0b000, "mul", /*Commutable*/1>, Sched<[WriteIMul, ReadIMul, ReadIMul]>; def MULH : ALU_rr<0b0000001, 0b001, "mulh", /*Commutable*/1>, @@ -33,6 +33,9 @@ def MULHSU : ALU_rr<0b0000001, 0b010, "mulhsu">, Sched<[WriteIMul, ReadIMul, ReadIMul]>; def MULHU : ALU_rr<0b0000001, 0b011, "mulhu", /*Commutable*/1>, Sched<[WriteIMul, ReadIMul, ReadIMul]>; +} // Predicates = [HasStdExtMOrZmmul] + +let Predicates = [HasStdExtM] in { def DIV : ALU_rr<0b0000001, 0b100, "div">, Sched<[WriteIDiv, ReadIDiv, ReadIDiv]>; def DIVU : ALU_rr<0b0000001, 0b101, "divu">, @@ -43,9 +46,12 @@ def REMU : ALU_rr<0b0000001, 0b111, "remu">, Sched<[WriteIDiv, ReadIDiv, ReadIDiv]>; } // Predicates = [HasStdExtM] -let Predicates = [HasStdExtM, IsRV64] in { +let Predicates = [HasStdExtMOrZmmul, IsRV64] in { def MULW : ALUW_rr<0b0000001, 0b000, "mulw", /*Commutable*/1>, Sched<[WriteIMul32, ReadIMul32, ReadIMul32]>; +} // Predicates = [HasStdExtMOrZmmul, IsRV64] + +let Predicates = [HasStdExtM, IsRV64] in { def DIVW : ALUW_rr<0b0000001, 0b100, "divw">, Sched<[WriteIDiv32, ReadIDiv32, ReadIDiv32]>; def DIVUW : ALUW_rr<0b0000001, 0b101, "divuw">, @@ -60,21 +66,25 @@ def REMUW : ALUW_rr<0b0000001, 0b111, "remuw">, // Pseudo-instructions and codegen patterns //===----------------------------------------------------------------------===// -let Predicates = [HasStdExtM] in { +let Predicates = [HasStdExtMOrZmmul] in { def : PatGprGpr<mul, MUL>; def : PatGprGpr<mulhs, MULH>; def : PatGprGpr<mulhu, MULHU>; def : PatGprGpr<riscv_mulhsu, MULHSU>; +} // Predicates = [HasStdExtMOrZmmul] + +let Predicates = [HasStdExtM] in { def : PatGprGpr<sdiv, DIV>; def : PatGprGpr<udiv, DIVU>; def : PatGprGpr<srem, REM>; def : PatGprGpr<urem, REMU>; } // Predicates = [HasStdExtM] -let Predicates = [HasStdExtM, IsRV64] in { // Select W instructions if only the lower 32-bits of the result are used. +let Predicates = [HasStdExtMOrZmmul, IsRV64] in def : PatGprGpr<binop_allwusers<mul>, MULW>; +let Predicates = [HasStdExtM, IsRV64] in { def : PatGprGpr<riscv_divw, DIVW>; def : PatGprGpr<riscv_divuw, DIVUW>; def : PatGprGpr<riscv_remuw, REMUW>; @@ -96,11 +106,11 @@ def : Pat<(srem (sexti32 (i64 GPR:$rs1)), (sexti32 (i64 GPR:$rs2))), (REMW GPR:$rs1, GPR:$rs2)>; } // Predicates = [HasStdExtM, IsRV64] -let Predicates = [HasStdExtM, IsRV64, NotHasStdExtZba] in { +let Predicates = [HasStdExtMOrZmmul, IsRV64, NotHasStdExtZba] in { // Special case for calculating the full 64-bit product of a 32x32 unsigned // multiply where the inputs aren't known to be zero extended. We can shift the // inputs left by 32 and use a MULHU. This saves two SRLIs needed to finish // zeroing the upper 32 bits. def : Pat<(i64 (mul (and GPR:$rs1, 0xffffffff), (and GPR:$rs2, 0xffffffff))), (MULHU (SLLI GPR:$rs1, 32), (SLLI GPR:$rs2, 32))>; -} // Predicates = [HasStdExtM, IsRV64, NotHasStdExtZba] +} // Predicates = [HasStdExtMOrZmmul, IsRV64, NotHasStdExtZba] diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp index dadf8f81a2c0..920729e9ebbf 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp @@ -443,8 +443,7 @@ bool RISCVSExtWRemoval::runOnMachineFunction(MachineFunction &MF) { MachineInstr *MI = &*I++; // We're looking for the sext.w pattern ADDIW rd, rs1, 0. - if (MI->getOpcode() != RISCV::ADDIW || !MI->getOperand(2).isImm() || - MI->getOperand(2).getImm() != 0 || !MI->getOperand(1).isReg()) + if (!RISCV::isSEXT_W(*MI)) continue; // Input should be a virtual register. diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.cpp index 7589b44b81d3..0446edefa979 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.cpp @@ -202,11 +202,9 @@ bool RISCVSubtarget::useRVVForFixedLengthVectors() const { } bool RISCVSubtarget::enableSubRegLiveness() const { - if (EnableSubRegLiveness.getNumOccurrences()) - return EnableSubRegLiveness; - // Enable subregister liveness for RVV to better handle LMUL>1 and segment - // load/store. - return hasVInstructions(); + // FIXME: Enable subregister liveness by default for RVV to better handle + // LMUL>1 and segment load/store. + return EnableSubRegLiveness; } void RISCVSubtarget::getPostRAMutations( diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.h index 831f7fadaa62..6eb949fa551c 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.h +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSubtarget.h @@ -89,6 +89,7 @@ private: bool HasStdExtZicbom = false; bool HasStdExtZicboz = false; bool HasStdExtZicbop = false; + bool HasStdExtZmmul = false; bool HasRV64 = false; bool IsRV32E = false; bool EnableLinkerRelax = false; @@ -184,6 +185,7 @@ public: bool hasStdExtZicbom() const { return HasStdExtZicbom; } bool hasStdExtZicboz() const { return HasStdExtZicboz; } bool hasStdExtZicbop() const { return HasStdExtZicbop; } + bool hasStdExtZmmul() const { return HasStdExtZmmul; } bool is64Bit() const { return HasRV64; } bool isRV32E() const { return IsRV32E; } bool enableLinkerRelax() const { return EnableLinkerRelax; } diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp index b2707b753e87..50fcb00e6c63 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -49,6 +49,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() { initializeGlobalISel(*PR); initializeRISCVMakeCompressibleOptPass(*PR); initializeRISCVGatherScatterLoweringPass(*PR); + initializeRISCVCodeGenPreparePass(*PR); initializeRISCVMergeBaseOffsetOptPass(*PR); initializeRISCVSExtWRemovalPass(*PR); initializeRISCVExpandPseudoPass(*PR); @@ -187,7 +188,11 @@ TargetPassConfig *RISCVTargetMachine::createPassConfig(PassManagerBase &PM) { void RISCVPassConfig::addIRPasses() { addPass(createAtomicExpandPass()); - addPass(createRISCVGatherScatterLoweringPass()); + if (getOptLevel() != CodeGenOpt::None) + addPass(createRISCVGatherScatterLoweringPass()); + + if (getOptLevel() != CodeGenOpt::None) + addPass(createRISCVCodeGenPreparePass()); TargetPassConfig::addIRPasses(); } diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index 29d3c5e491de..f9cd5ffb512b 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -65,7 +65,7 @@ InstructionCost RISCVTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, if (Imm == UINT64_C(0xffff) && ST->hasStdExtZbb()) return TTI::TCC_Free; // zext.w - if (Imm == UINT64_C(0xffffffff) && ST->hasStdExtZbb()) + if (Imm == UINT64_C(0xffffffff) && ST->hasStdExtZba()) return TTI::TCC_Free; LLVM_FALLTHROUGH; case Instruction::Add: @@ -198,6 +198,9 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, // vid.v v9 // vrsub.vx v10, v9, a0 // vrgather.vv v9, v8, v10 + if (Tp->getElementType()->isIntegerTy(1)) + // Mask operation additionally required extend and truncate + return LT.first * 9; return LT.first * 6; } } diff --git a/contrib/llvm-project/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVBaseInfo.cpp b/contrib/llvm-project/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVBaseInfo.cpp index 1a3e35a5f901..220fd76305aa 100644 --- a/contrib/llvm-project/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVBaseInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVBaseInfo.cpp @@ -1068,5 +1068,15 @@ StringRef getKernelProfilingInfoName(KernelProfilingInfo e) { } llvm_unreachable("Unexpected operand"); } + +std::string getExtInstSetName(InstructionSet e) { + switch (e) { + CASE(InstructionSet, OpenCL_std) + CASE(InstructionSet, GLSL_std_450) + CASE(InstructionSet, SPV_AMD_shader_trinary_minmax) + break; + } + llvm_unreachable("Unexpected operand"); +} } // namespace SPIRV } // namespace llvm diff --git a/contrib/llvm-project/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVBaseInfo.h b/contrib/llvm-project/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVBaseInfo.h index 2aa9f076c78e..9482723993a2 100644 --- a/contrib/llvm-project/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVBaseInfo.h +++ b/contrib/llvm-project/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVBaseInfo.h @@ -706,6 +706,19 @@ enum class KernelProfilingInfo : uint32_t { CmdExecTime = 0x1, }; StringRef getKernelProfilingInfoName(KernelProfilingInfo e); + +enum class InstructionSet : uint32_t { + OpenCL_std = 0, + GLSL_std_450 = 1, + SPV_AMD_shader_trinary_minmax = 2, +}; +std::string getExtInstSetName(InstructionSet e); + +// TODO: implement other mnemonics. +enum class Opcode : uint32_t { + InBoundsPtrAccessChain = 70, + PtrCastToGeneric = 121, +}; } // namespace SPIRV } // namespace llvm diff --git a/contrib/llvm-project/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVInstPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVInstPrinter.cpp index 3105baa02c90..d60e61f36270 100644 --- a/contrib/llvm-project/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVInstPrinter.cpp +++ b/contrib/llvm-project/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVInstPrinter.cpp @@ -59,7 +59,7 @@ void SPIRVInstPrinter::printOpConstantVarOps(const MCInst *MI, } void SPIRVInstPrinter::recordOpExtInstImport(const MCInst *MI) { - llvm_unreachable("Unimplemented recordOpExtInstImport"); + // TODO: insert {Reg, Set} into ExtInstSetIDs map. } void SPIRVInstPrinter::printInst(const MCInst *MI, uint64_t Address, @@ -176,7 +176,18 @@ void SPIRVInstPrinter::printInst(const MCInst *MI, uint64_t Address, } void SPIRVInstPrinter::printOpExtInst(const MCInst *MI, raw_ostream &O) { - llvm_unreachable("Unimplemented printOpExtInst"); + // The fixed operands have already been printed, so just need to decide what + // type of ExtInst operands to print based on the instruction set and number. + MCInstrDesc MCDesc = MII.get(MI->getOpcode()); + unsigned NumFixedOps = MCDesc.getNumOperands(); + const auto NumOps = MI->getNumOperands(); + if (NumOps == NumFixedOps) + return; + + O << ' '; + + // TODO: implement special printing for OpenCLExtInst::vstor*. + printRemainingVariableOps(MI, NumFixedOps, O, true); } void SPIRVInstPrinter::printOpDecorate(const MCInst *MI, raw_ostream &O) { diff --git a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRV.h b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRV.h index 8da54a5d6e61..5a7f2e51afb8 100644 --- a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRV.h +++ b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRV.h @@ -19,6 +19,7 @@ class SPIRVSubtarget; class InstructionSelector; class RegisterBankInfo; +ModulePass *createSPIRVPrepareFunctionsPass(); FunctionPass *createSPIRVPreLegalizerPass(); FunctionPass *createSPIRVEmitIntrinsicsPass(SPIRVTargetMachine *TM); InstructionSelector * diff --git a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp index 605bf949187f..6d60bd5e3c97 100644 --- a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp +++ b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp @@ -21,6 +21,7 @@ #include "SPIRVUtils.h" #include "TargetInfo/SPIRVTargetInfo.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -58,9 +59,14 @@ public: void outputModuleSection(SPIRV::ModuleSectionType MSType); void outputEntryPoints(); void outputDebugSourceAndStrings(const Module &M); + void outputOpExtInstImports(const Module &M); void outputOpMemoryModel(); void outputOpFunctionEnd(); void outputExtFuncDecls(); + void outputExecutionModeFromMDNode(Register Reg, MDNode *Node, + SPIRV::ExecutionMode EM); + void outputExecutionMode(const Module &M); + void outputAnnotations(const Module &M); void outputModuleSections(); void emitInstruction(const MachineInstr *MI) override; @@ -127,6 +133,8 @@ void SPIRVAsmPrinter::emitFunctionBodyEnd() { } void SPIRVAsmPrinter::emitOpLabel(const MachineBasicBlock &MBB) { + if (MAI->MBBsToSkip.contains(&MBB)) + return; MCInst LabelInst; LabelInst.setOpcode(SPIRV::OpLabel); LabelInst.addOperand(MCOperand::createReg(MAI->getOrCreateMBBRegister(MBB))); @@ -237,6 +245,13 @@ void SPIRVAsmPrinter::outputModuleSection(SPIRV::ModuleSectionType MSType) { } void SPIRVAsmPrinter::outputDebugSourceAndStrings(const Module &M) { + // Output OpSourceExtensions. + for (auto &Str : MAI->SrcExt) { + MCInst Inst; + Inst.setOpcode(SPIRV::OpSourceExtension); + addStringImm(Str.first(), Inst); + outputMCInst(Inst); + } // Output OpSource. MCInst Inst; Inst.setOpcode(SPIRV::OpSource); @@ -246,6 +261,19 @@ void SPIRVAsmPrinter::outputDebugSourceAndStrings(const Module &M) { outputMCInst(Inst); } +void SPIRVAsmPrinter::outputOpExtInstImports(const Module &M) { + for (auto &CU : MAI->ExtInstSetMap) { + unsigned Set = CU.first; + Register Reg = CU.second; + MCInst Inst; + Inst.setOpcode(SPIRV::OpExtInstImport); + Inst.addOperand(MCOperand::createReg(Reg)); + addStringImm(getExtInstSetName(static_cast<SPIRV::InstructionSet>(Set)), + Inst); + outputMCInst(Inst); + } +} + void SPIRVAsmPrinter::outputOpMemoryModel() { MCInst Inst; Inst.setOpcode(SPIRV::OpMemoryModel); @@ -301,6 +329,135 @@ void SPIRVAsmPrinter::outputExtFuncDecls() { } } +// Encode LLVM type by SPIR-V execution mode VecTypeHint. +static unsigned encodeVecTypeHint(Type *Ty) { + if (Ty->isHalfTy()) + return 4; + if (Ty->isFloatTy()) + return 5; + if (Ty->isDoubleTy()) + return 6; + if (IntegerType *IntTy = dyn_cast<IntegerType>(Ty)) { + switch (IntTy->getIntegerBitWidth()) { + case 8: + return 0; + case 16: + return 1; + case 32: + return 2; + case 64: + return 3; + default: + llvm_unreachable("invalid integer type"); + } + } + if (FixedVectorType *VecTy = dyn_cast<FixedVectorType>(Ty)) { + Type *EleTy = VecTy->getElementType(); + unsigned Size = VecTy->getNumElements(); + return Size << 16 | encodeVecTypeHint(EleTy); + } + llvm_unreachable("invalid type"); +} + +static void addOpsFromMDNode(MDNode *MDN, MCInst &Inst, + SPIRV::ModuleAnalysisInfo *MAI) { + for (const MDOperand &MDOp : MDN->operands()) { + if (auto *CMeta = dyn_cast<ConstantAsMetadata>(MDOp)) { + Constant *C = CMeta->getValue(); + if (ConstantInt *Const = dyn_cast<ConstantInt>(C)) { + Inst.addOperand(MCOperand::createImm(Const->getZExtValue())); + } else if (auto *CE = dyn_cast<Function>(C)) { + Register FuncReg = MAI->getFuncReg(CE->getName().str()); + assert(FuncReg.isValid()); + Inst.addOperand(MCOperand::createReg(FuncReg)); + } + } + } +} + +void SPIRVAsmPrinter::outputExecutionModeFromMDNode(Register Reg, MDNode *Node, + SPIRV::ExecutionMode EM) { + MCInst Inst; + Inst.setOpcode(SPIRV::OpExecutionMode); + Inst.addOperand(MCOperand::createReg(Reg)); + Inst.addOperand(MCOperand::createImm(static_cast<unsigned>(EM))); + addOpsFromMDNode(Node, Inst, MAI); + outputMCInst(Inst); +} + +void SPIRVAsmPrinter::outputExecutionMode(const Module &M) { + NamedMDNode *Node = M.getNamedMetadata("spirv.ExecutionMode"); + if (Node) { + for (unsigned i = 0; i < Node->getNumOperands(); i++) { + MCInst Inst; + Inst.setOpcode(SPIRV::OpExecutionMode); + addOpsFromMDNode(cast<MDNode>(Node->getOperand(i)), Inst, MAI); + outputMCInst(Inst); + } + } + for (auto FI = M.begin(), E = M.end(); FI != E; ++FI) { + const Function &F = *FI; + if (F.isDeclaration()) + continue; + Register FReg = MAI->getFuncReg(F.getGlobalIdentifier()); + assert(FReg.isValid()); + if (MDNode *Node = F.getMetadata("reqd_work_group_size")) + outputExecutionModeFromMDNode(FReg, Node, + SPIRV::ExecutionMode::LocalSize); + if (MDNode *Node = F.getMetadata("work_group_size_hint")) + outputExecutionModeFromMDNode(FReg, Node, + SPIRV::ExecutionMode::LocalSizeHint); + if (MDNode *Node = F.getMetadata("intel_reqd_sub_group_size")) + outputExecutionModeFromMDNode(FReg, Node, + SPIRV::ExecutionMode::SubgroupSize); + if (MDNode *Node = F.getMetadata("vec_type_hint")) { + MCInst Inst; + Inst.setOpcode(SPIRV::OpExecutionMode); + Inst.addOperand(MCOperand::createReg(FReg)); + unsigned EM = static_cast<unsigned>(SPIRV::ExecutionMode::VecTypeHint); + Inst.addOperand(MCOperand::createImm(EM)); + unsigned TypeCode = encodeVecTypeHint(getMDOperandAsType(Node, 0)); + Inst.addOperand(MCOperand::createImm(TypeCode)); + outputMCInst(Inst); + } + } +} + +void SPIRVAsmPrinter::outputAnnotations(const Module &M) { + outputModuleSection(SPIRV::MB_Annotations); + // Process llvm.global.annotations special global variable. + for (auto F = M.global_begin(), E = M.global_end(); F != E; ++F) { + if ((*F).getName() != "llvm.global.annotations") + continue; + const GlobalVariable *V = &(*F); + const ConstantArray *CA = cast<ConstantArray>(V->getOperand(0)); + for (Value *Op : CA->operands()) { + ConstantStruct *CS = cast<ConstantStruct>(Op); + // The first field of the struct contains a pointer to + // the annotated variable. + Value *AnnotatedVar = CS->getOperand(0)->stripPointerCasts(); + if (!isa<Function>(AnnotatedVar)) + llvm_unreachable("Unsupported value in llvm.global.annotations"); + Function *Func = cast<Function>(AnnotatedVar); + Register Reg = MAI->getFuncReg(Func->getGlobalIdentifier()); + + // The second field contains a pointer to a global annotation string. + GlobalVariable *GV = + cast<GlobalVariable>(CS->getOperand(1)->stripPointerCasts()); + + StringRef AnnotationString; + getConstantStringInfo(GV, AnnotationString); + MCInst Inst; + Inst.setOpcode(SPIRV::OpDecorate); + Inst.addOperand(MCOperand::createReg(Reg)); + unsigned Dec = static_cast<unsigned>(SPIRV::Decoration::UserSemantic); + Inst.addOperand(MCOperand::createImm(Dec)); + addStringImm(AnnotationString, Inst); + outputMCInst(Inst); + } + } +} + void SPIRVAsmPrinter::outputModuleSections() { const Module *M = MMI->getModule(); // Get the global subtarget to output module-level info. @@ -311,13 +468,14 @@ void SPIRVAsmPrinter::outputModuleSections() { // Output instructions according to the Logical Layout of a Module: // TODO: 1,2. All OpCapability instructions, then optional OpExtension // instructions. - // TODO: 3. Optional OpExtInstImport instructions. + // 3. Optional OpExtInstImport instructions. + outputOpExtInstImports(*M); // 4. The single required OpMemoryModel instruction. outputOpMemoryModel(); // 5. All entry point declarations, using OpEntryPoint. outputEntryPoints(); // 6. Execution-mode declarations, using OpExecutionMode or OpExecutionModeId. - // TODO: + outputExecutionMode(*M); // 7a. Debug: all OpString, OpSourceExtension, OpSource, and // OpSourceContinued, without forward references. outputDebugSourceAndStrings(*M); @@ -326,7 +484,7 @@ void SPIRVAsmPrinter::outputModuleSections() { // 7c. Debug: all OpModuleProcessed instructions. outputModuleSection(SPIRV::MB_DebugModuleProcessed); // 8. All annotation instructions (all decorations). - outputModuleSection(SPIRV::MB_Annotations); + outputAnnotations(*M); // 9. All type declarations (OpTypeXXX instructions), all constant // instructions, and all global variable declarations. This section is // the first section to allow use of: OpLine and OpNoLine debug information; diff --git a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp index 5b6b82aebf30..e8fedfeffde7 100644 --- a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp @@ -24,9 +24,8 @@ using namespace llvm; SPIRVCallLowering::SPIRVCallLowering(const SPIRVTargetLowering &TLI, - const SPIRVSubtarget &ST, SPIRVGlobalRegistry *GR) - : CallLowering(&TLI), ST(ST), GR(GR) {} + : CallLowering(&TLI), GR(GR) {} bool SPIRVCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val, ArrayRef<Register> VRegs, @@ -36,11 +35,13 @@ bool SPIRVCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, // TODO: handle the case of multiple registers. if (VRegs.size() > 1) return false; - if (Val) + if (Val) { + const auto &STI = MIRBuilder.getMF().getSubtarget(); return MIRBuilder.buildInstr(SPIRV::OpReturnValue) .addUse(VRegs[0]) - .constrainAllUses(MIRBuilder.getTII(), *ST.getRegisterInfo(), - *ST.getRegBankInfo()); + .constrainAllUses(MIRBuilder.getTII(), *STI.getRegisterInfo(), + *STI.getRegBankInfo()); + } MIRBuilder.buildInstr(SPIRV::OpReturn); return true; } @@ -63,6 +64,56 @@ static uint32_t getFunctionControl(const Function &F) { return FuncControl; } +static ConstantInt *getConstInt(MDNode *MD, unsigned NumOp) { + if (MD->getNumOperands() > NumOp) { + auto *CMeta = dyn_cast<ConstantAsMetadata>(MD->getOperand(NumOp)); + if (CMeta) + return dyn_cast<ConstantInt>(CMeta->getValue()); + } + return nullptr; +} + +// This code restores function args/retvalue types for composite cases +// because the final types should still be aggregate whereas they're i32 +// during the translation to cope with aggregate flattening etc. +static FunctionType *getOriginalFunctionType(const Function &F) { + auto *NamedMD = F.getParent()->getNamedMetadata("spv.cloned_funcs"); + if (NamedMD == nullptr) + return F.getFunctionType(); + + Type *RetTy = F.getFunctionType()->getReturnType(); + SmallVector<Type *, 4> ArgTypes; + for (auto &Arg : F.args()) + ArgTypes.push_back(Arg.getType()); + + auto ThisFuncMDIt = + std::find_if(NamedMD->op_begin(), NamedMD->op_end(), [&F](MDNode *N) { + return isa<MDString>(N->getOperand(0)) && + cast<MDString>(N->getOperand(0))->getString() == F.getName(); + }); + // TODO: probably one function can have numerous type mutations, + // so we should support this. + if (ThisFuncMDIt != NamedMD->op_end()) { + auto *ThisFuncMD = *ThisFuncMDIt; + MDNode *MD = dyn_cast<MDNode>(ThisFuncMD->getOperand(1)); + assert(MD && "MDNode operand is expected"); + ConstantInt *Const = getConstInt(MD, 0); + if (Const) { + auto *CMeta = dyn_cast<ConstantAsMetadata>(MD->getOperand(1)); + assert(CMeta && "ConstantAsMetadata operand is expected"); + assert(Const->getSExtValue() >= -1); + // Currently -1 indicates return value, greater values mean + // argument numbers. + if (Const->getSExtValue() == -1) + RetTy = CMeta->getType(); + else + ArgTypes[Const->getSExtValue()] = CMeta->getType(); + } + } + + return FunctionType::get(RetTy, ArgTypes, F.isVarArg()); +} + bool SPIRVCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F, ArrayRef<ArrayRef<Register>> VRegs, @@ -71,7 +122,8 @@ bool SPIRVCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, GR->setCurrentFunc(MIRBuilder.getMF()); // Assign types and names to all args, and store their types for later. - SmallVector<Register, 4> ArgTypeVRegs; + FunctionType *FTy = getOriginalFunctionType(F); + SmallVector<SPIRVType *, 4> ArgTypeVRegs; if (VRegs.size() > 0) { unsigned i = 0; for (const auto &Arg : F.args()) { @@ -79,9 +131,18 @@ bool SPIRVCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, // TODO: handle the case of multiple registers. if (VRegs[i].size() > 1) return false; - auto *SpirvTy = - GR->assignTypeToVReg(Arg.getType(), VRegs[i][0], MIRBuilder); - ArgTypeVRegs.push_back(GR->getSPIRVTypeID(SpirvTy)); + Type *ArgTy = FTy->getParamType(i); + SPIRV::AccessQualifier AQ = SPIRV::AccessQualifier::ReadWrite; + MDNode *Node = F.getMetadata("kernel_arg_access_qual"); + if (Node && i < Node->getNumOperands()) { + StringRef AQString = cast<MDString>(Node->getOperand(i))->getString(); + if (AQString.compare("read_only") == 0) + AQ = SPIRV::AccessQualifier::ReadOnly; + else if (AQString.compare("write_only") == 0) + AQ = SPIRV::AccessQualifier::WriteOnly; + } + auto *SpirvTy = GR->assignTypeToVReg(ArgTy, VRegs[i][0], MIRBuilder, AQ); + ArgTypeVRegs.push_back(SpirvTy); if (Arg.hasName()) buildOpName(VRegs[i][0], Arg.getName(), MIRBuilder); @@ -92,8 +153,10 @@ bool SPIRVCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, SPIRV::Decoration::MaxByteOffset, {DerefBytes}); } if (Arg.hasAttribute(Attribute::Alignment)) { + auto Alignment = static_cast<unsigned>( + Arg.getAttribute(Attribute::Alignment).getValueAsInt()); buildOpDecorate(VRegs[i][0], MIRBuilder, SPIRV::Decoration::Alignment, - {static_cast<unsigned>(Arg.getParamAlignment())}); + {Alignment}); } if (Arg.hasAttribute(Attribute::ReadOnly)) { auto Attr = @@ -107,6 +170,38 @@ bool SPIRVCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, buildOpDecorate(VRegs[i][0], MIRBuilder, SPIRV::Decoration::FuncParamAttr, {Attr}); } + if (Arg.hasAttribute(Attribute::NoAlias)) { + auto Attr = + static_cast<unsigned>(SPIRV::FunctionParameterAttribute::NoAlias); + buildOpDecorate(VRegs[i][0], MIRBuilder, + SPIRV::Decoration::FuncParamAttr, {Attr}); + } + Node = F.getMetadata("kernel_arg_type_qual"); + if (Node && i < Node->getNumOperands()) { + StringRef TypeQual = cast<MDString>(Node->getOperand(i))->getString(); + if (TypeQual.compare("volatile") == 0) + buildOpDecorate(VRegs[i][0], MIRBuilder, SPIRV::Decoration::Volatile, + {}); + } + Node = F.getMetadata("spirv.ParameterDecorations"); + if (Node && i < Node->getNumOperands() && + isa<MDNode>(Node->getOperand(i))) { + MDNode *MD = cast<MDNode>(Node->getOperand(i)); + for (const MDOperand &MDOp : MD->operands()) { + MDNode *MD2 = dyn_cast<MDNode>(MDOp); + assert(MD2 && "Metadata operand is expected"); + ConstantInt *Const = getConstInt(MD2, 0); + assert(Const && "MDOperand should be ConstantInt"); + auto Dec = static_cast<SPIRV::Decoration>(Const->getZExtValue()); + std::vector<uint32_t> DecVec; + for (unsigned j = 1; j < MD2->getNumOperands(); j++) { + ConstantInt *Const = getConstInt(MD2, j); + assert(Const && "MDOperand should be ConstantInt"); + DecVec.push_back(static_cast<uint32_t>(Const->getZExtValue())); + } + buildOpDecorate(VRegs[i][0], MIRBuilder, Dec, DecVec); + } + } ++i; } } @@ -117,30 +212,30 @@ bool SPIRVCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, MRI->setRegClass(FuncVReg, &SPIRV::IDRegClass); if (F.isDeclaration()) GR->add(&F, &MIRBuilder.getMF(), FuncVReg); - - auto *FTy = F.getFunctionType(); - auto FuncTy = GR->assignTypeToVReg(FTy, FuncVReg, MIRBuilder); + SPIRVType *RetTy = GR->getOrCreateSPIRVType(FTy->getReturnType(), MIRBuilder); + SPIRVType *FuncTy = GR->getOrCreateOpTypeFunctionWithArgs( + FTy, RetTy, ArgTypeVRegs, MIRBuilder); // Build the OpTypeFunction declaring it. - Register ReturnTypeID = FuncTy->getOperand(1).getReg(); uint32_t FuncControl = getFunctionControl(F); MIRBuilder.buildInstr(SPIRV::OpFunction) .addDef(FuncVReg) - .addUse(ReturnTypeID) + .addUse(GR->getSPIRVTypeID(RetTy)) .addImm(FuncControl) .addUse(GR->getSPIRVTypeID(FuncTy)); // Add OpFunctionParameters. - const unsigned NumArgs = ArgTypeVRegs.size(); - for (unsigned i = 0; i < NumArgs; ++i) { + int i = 0; + for (const auto &Arg : F.args()) { assert(VRegs[i].size() == 1 && "Formal arg has multiple vregs"); MRI->setRegClass(VRegs[i][0], &SPIRV::IDRegClass); MIRBuilder.buildInstr(SPIRV::OpFunctionParameter) .addDef(VRegs[i][0]) - .addUse(ArgTypeVRegs[i]); + .addUse(GR->getSPIRVTypeID(ArgTypeVRegs[i])); if (F.isDeclaration()) - GR->add(F.getArg(i), &MIRBuilder.getMF(), VRegs[i][0]); + GR->add(&Arg, &MIRBuilder.getMF(), VRegs[i][0]); + i++; } // Name the function. if (F.hasName()) @@ -169,48 +264,51 @@ bool SPIRVCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, // TODO: handle the case of multiple registers. if (Info.OrigRet.Regs.size() > 1) return false; + MachineFunction &MF = MIRBuilder.getMF(); + GR->setCurrentFunc(MF); + FunctionType *FTy = nullptr; + const Function *CF = nullptr; - GR->setCurrentFunc(MIRBuilder.getMF()); - Register ResVReg = - Info.OrigRet.Regs.empty() ? Register(0) : Info.OrigRet.Regs[0]; // Emit a regular OpFunctionCall. If it's an externally declared function, - // be sure to emit its type and function declaration here. It will be - // hoisted globally later. + // be sure to emit its type and function declaration here. It will be hoisted + // globally later. if (Info.Callee.isGlobal()) { - auto *CF = dyn_cast_or_null<const Function>(Info.Callee.getGlobal()); + CF = dyn_cast_or_null<const Function>(Info.Callee.getGlobal()); // TODO: support constexpr casts and indirect calls. if (CF == nullptr) return false; - if (CF->isDeclaration()) { - // Emit the type info and forward function declaration to the first MBB - // to ensure VReg definition dependencies are valid across all MBBs. - MachineBasicBlock::iterator OldII = MIRBuilder.getInsertPt(); - MachineBasicBlock &OldBB = MIRBuilder.getMBB(); - MachineBasicBlock &FirstBB = *MIRBuilder.getMF().getBlockNumbered(0); - MIRBuilder.setInsertPt(FirstBB, FirstBB.instr_end()); - - SmallVector<ArrayRef<Register>, 8> VRegArgs; - SmallVector<SmallVector<Register, 1>, 8> ToInsert; - for (const Argument &Arg : CF->args()) { - if (MIRBuilder.getDataLayout().getTypeStoreSize(Arg.getType()).isZero()) - continue; // Don't handle zero sized types. - ToInsert.push_back({MIRBuilder.getMRI()->createGenericVirtualRegister( - LLT::scalar(32))}); - VRegArgs.push_back(ToInsert.back()); - } - // TODO: Reuse FunctionLoweringInfo. - FunctionLoweringInfo FuncInfo; - lowerFormalArguments(MIRBuilder, *CF, VRegArgs, FuncInfo); - MIRBuilder.setInsertPt(OldBB, OldII); + FTy = getOriginalFunctionType(*CF); + } + + Register ResVReg = + Info.OrigRet.Regs.empty() ? Register(0) : Info.OrigRet.Regs[0]; + if (CF && CF->isDeclaration() && + !GR->find(CF, &MIRBuilder.getMF()).isValid()) { + // Emit the type info and forward function declaration to the first MBB + // to ensure VReg definition dependencies are valid across all MBBs. + MachineIRBuilder FirstBlockBuilder; + FirstBlockBuilder.setMF(MF); + FirstBlockBuilder.setMBB(*MF.getBlockNumbered(0)); + + SmallVector<ArrayRef<Register>, 8> VRegArgs; + SmallVector<SmallVector<Register, 1>, 8> ToInsert; + for (const Argument &Arg : CF->args()) { + if (MIRBuilder.getDataLayout().getTypeStoreSize(Arg.getType()).isZero()) + continue; // Don't handle zero sized types. + ToInsert.push_back( + {MIRBuilder.getMRI()->createGenericVirtualRegister(LLT::scalar(32))}); + VRegArgs.push_back(ToInsert.back()); } + // TODO: Reuse FunctionLoweringInfo + FunctionLoweringInfo FuncInfo; + lowerFormalArguments(FirstBlockBuilder, *CF, VRegArgs, FuncInfo); } // Make sure there's a valid return reg, even for functions returning void. - if (!ResVReg.isValid()) { + if (!ResVReg.isValid()) ResVReg = MIRBuilder.getMRI()->createVirtualRegister(&SPIRV::IDRegClass); - } SPIRVType *RetType = - GR->assignTypeToVReg(Info.OrigRet.Ty, ResVReg, MIRBuilder); + GR->assignTypeToVReg(FTy->getReturnType(), ResVReg, MIRBuilder); // Emit the OpFunctionCall and its args. auto MIB = MIRBuilder.buildInstr(SPIRV::OpFunctionCall) @@ -224,6 +322,7 @@ bool SPIRVCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, return false; MIB.addUse(Arg.Regs[0]); } - return MIB.constrainAllUses(MIRBuilder.getTII(), *ST.getRegisterInfo(), - *ST.getRegBankInfo()); + const auto &STI = MF.getSubtarget(); + return MIB.constrainAllUses(MIRBuilder.getTII(), *STI.getRegisterInfo(), + *STI.getRegBankInfo()); } diff --git a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVCallLowering.h b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVCallLowering.h index c179bb35154b..c2d6ad82d507 100644 --- a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVCallLowering.h +++ b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVCallLowering.h @@ -13,23 +13,21 @@ #ifndef LLVM_LIB_TARGET_SPIRV_SPIRVCALLLOWERING_H #define LLVM_LIB_TARGET_SPIRV_SPIRVCALLLOWERING_H +#include "SPIRVGlobalRegistry.h" #include "llvm/CodeGen/GlobalISel/CallLowering.h" namespace llvm { class SPIRVGlobalRegistry; -class SPIRVSubtarget; class SPIRVTargetLowering; class SPIRVCallLowering : public CallLowering { private: - const SPIRVSubtarget &ST; // Used to create and assign function, argument, and return type information. SPIRVGlobalRegistry *GR; public: - SPIRVCallLowering(const SPIRVTargetLowering &TLI, const SPIRVSubtarget &ST, - SPIRVGlobalRegistry *GR); + SPIRVCallLowering(const SPIRVTargetLowering &TLI, SPIRVGlobalRegistry *GR); // Built OpReturn or OpReturnValue. bool lowerReturn(MachineIRBuilder &MIRBuiler, const Value *Val, diff --git a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.cpp b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.cpp index 57cd4bafd351..1926977ea66e 100644 --- a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.cpp +++ b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.cpp @@ -92,4 +92,4 @@ void SPIRVGeneralDuplicatesTracker::buildDepsGraph( } } } -}
\ No newline at end of file +} diff --git a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.h b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.h index 58ae1f86ce42..ab22c3d2a647 100644 --- a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.h +++ b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.h @@ -169,6 +169,8 @@ public: Register find(const Argument *Arg, const MachineFunction *MF) { return AT.find(const_cast<Argument *>(Arg), MF); } + + const SPIRVDuplicatesTracker<Type> *getTypes() { return &TT; } }; } // namespace llvm -#endif
\ No newline at end of file +#endif // LLVM_LIB_TARGET_SPIRV_SPIRVDUPLICATESTRACKER_H diff --git a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp index 9624482e3622..0075f547b6d6 100644 --- a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp +++ b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp @@ -87,6 +87,7 @@ public: Instruction *visitLoadInst(LoadInst &I); Instruction *visitStoreInst(StoreInst &I); Instruction *visitAllocaInst(AllocaInst &I); + Instruction *visitAtomicCmpXchgInst(AtomicCmpXchgInst &I); bool runOnFunction(Function &F) override; }; } // namespace @@ -103,7 +104,7 @@ static inline bool isAssignTypeInstr(const Instruction *I) { static bool isMemInstrToReplace(Instruction *I) { return isa<StoreInst>(I) || isa<LoadInst>(I) || isa<InsertValueInst>(I) || - isa<ExtractValueInst>(I); + isa<ExtractValueInst>(I) || isa<AtomicCmpXchgInst>(I); } static bool isAggrToReplace(const Value *V) { @@ -134,13 +135,14 @@ void SPIRVEmitIntrinsics::replaceMemInstrUses(Instruction *Old, Instruction *New) { while (!Old->user_empty()) { auto *U = Old->user_back(); - if (isMemInstrToReplace(U) || isa<ReturnInst>(U)) { - U->replaceUsesOfWith(Old, New); - } else if (isAssignTypeInstr(U)) { + if (isAssignTypeInstr(U)) { IRB->SetInsertPoint(U); SmallVector<Value *, 2> Args = {New, U->getOperand(1)}; IRB->CreateIntrinsic(Intrinsic::spv_assign_type, {New->getType()}, Args); U->eraseFromParent(); + } else if (isMemInstrToReplace(U) || isa<ReturnInst>(U) || + isa<CallInst>(U)) { + U->replaceUsesOfWith(Old, New); } else { llvm_unreachable("illegal aggregate intrinsic user"); } @@ -301,10 +303,10 @@ Instruction *SPIRVEmitIntrinsics::visitStoreInst(StoreInst &I) { MachineMemOperand::Flags Flags = TLI->getStoreMemOperandFlags(I, F->getParent()->getDataLayout()); auto *PtrOp = I.getPointerOperand(); - auto *NewI = - IRB->CreateIntrinsic(Intrinsic::spv_store, {PtrOp->getType()}, - {I.getValueOperand(), PtrOp, IRB->getInt16(Flags), - IRB->getInt8(I.getAlign().value())}); + auto *NewI = IRB->CreateIntrinsic( + Intrinsic::spv_store, {I.getValueOperand()->getType(), PtrOp->getType()}, + {I.getValueOperand(), PtrOp, IRB->getInt16(Flags), + IRB->getInt8(I.getAlign().value())}); I.eraseFromParent(); return NewI; } @@ -314,6 +316,22 @@ Instruction *SPIRVEmitIntrinsics::visitAllocaInst(AllocaInst &I) { return &I; } +Instruction *SPIRVEmitIntrinsics::visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) { + assert(I.getType()->isAggregateType() && "Aggregate result is expected"); + SmallVector<Value *> Args; + for (auto &Op : I.operands()) + Args.push_back(Op); + Args.push_back(IRB->getInt32(I.getSyncScopeID())); + Args.push_back(IRB->getInt32( + static_cast<uint32_t>(getMemSemantics(I.getSuccessOrdering())))); + Args.push_back(IRB->getInt32( + static_cast<uint32_t>(getMemSemantics(I.getFailureOrdering())))); + auto *NewI = IRB->CreateIntrinsic(Intrinsic::spv_cmpxchg, + {I.getPointerOperand()->getType()}, {Args}); + replaceMemInstrUses(&I, NewI); + return NewI; +} + void SPIRVEmitIntrinsics::processGlobalValue(GlobalVariable &GV) { // Skip special artifical variable llvm.global.annotations. if (GV.getName() == "llvm.global.annotations") @@ -351,14 +369,13 @@ void SPIRVEmitIntrinsics::insertAssignTypeIntrs(Instruction *I) { // Check GetElementPtrConstantExpr case. (isa<ConstantExpr>(Op) && isa<GEPOperator>(Op))) { IRB->SetInsertPoint(I); - buildIntrWithMD(Intrinsic::spv_assign_type, {Op->getType()}, Op, Op); + if (isa<UndefValue>(Op) && Op->getType()->isAggregateType()) + buildIntrWithMD(Intrinsic::spv_assign_type, {IRB->getInt32Ty()}, Op, + UndefValue::get(IRB->getInt32Ty())); + else + buildIntrWithMD(Intrinsic::spv_assign_type, {Op->getType()}, Op, Op); } } - // StoreInst's operand type can be changed in the next stage so we need to - // store it in the set. - if (isa<StoreInst>(I) && - cast<StoreInst>(I)->getValueOperand()->getType()->isAggregateType()) - AggrStores.insert(I); } void SPIRVEmitIntrinsics::processInstrAfterVisit(Instruction *I) { @@ -378,7 +395,7 @@ void SPIRVEmitIntrinsics::processInstrAfterVisit(Instruction *I) { if ((isa<ConstantAggregateZero>(Op) && Op->getType()->isVectorTy()) || isa<PHINode>(I) || isa<SwitchInst>(I)) TrackConstants = false; - if (isa<ConstantData>(Op) && TrackConstants) { + if ((isa<ConstantData>(Op) || isa<ConstantExpr>(Op)) && TrackConstants) { unsigned OpNo = Op.getOperandNo(); if (II && ((II->getIntrinsicID() == Intrinsic::spv_gep && OpNo == 0) || (II->paramHasAttr(OpNo, Attribute::ImmArg)))) @@ -405,8 +422,20 @@ bool SPIRVEmitIntrinsics::runOnFunction(Function &Func) { AggrConsts.clear(); AggrStores.clear(); - IRB->SetInsertPoint(&Func.getEntryBlock().front()); + // StoreInst's operand type can be changed during the next transformations, + // so we need to store it in the set. Also store already transformed types. + for (auto &I : instructions(Func)) { + StoreInst *SI = dyn_cast<StoreInst>(&I); + if (!SI) + continue; + Type *ElTy = SI->getValueOperand()->getType(); + PointerType *PTy = cast<PointerType>(SI->getOperand(1)->getType()); + if (ElTy->isAggregateType() || ElTy->isVectorTy() || + !PTy->isOpaqueOrPointeeTypeMatches(ElTy)) + AggrStores.insert(&I); + } + IRB->SetInsertPoint(&Func.getEntryBlock().front()); for (auto &GV : Func.getParent()->globals()) processGlobalValue(GV); diff --git a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp index 5f890c003cbc..5c8fa7adfbdf 100644 --- a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp +++ b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp @@ -24,6 +24,24 @@ using namespace llvm; SPIRVGlobalRegistry::SPIRVGlobalRegistry(unsigned PointerSize) : PointerSize(PointerSize) {} +SPIRVType *SPIRVGlobalRegistry::assignIntTypeToVReg(unsigned BitWidth, + Register VReg, + MachineInstr &I, + const SPIRVInstrInfo &TII) { + SPIRVType *SpirvType = getOrCreateSPIRVIntegerType(BitWidth, I, TII); + assignSPIRVTypeToVReg(SpirvType, VReg, *CurMF); + return SpirvType; +} + +SPIRVType *SPIRVGlobalRegistry::assignVectTypeToVReg( + SPIRVType *BaseType, unsigned NumElements, Register VReg, MachineInstr &I, + const SPIRVInstrInfo &TII) { + SPIRVType *SpirvType = + getOrCreateSPIRVVectorType(BaseType, NumElements, I, TII); + assignSPIRVTypeToVReg(SpirvType, VReg, *CurMF); + return SpirvType; +} + SPIRVType *SPIRVGlobalRegistry::assignTypeToVReg( const Type *Type, Register VReg, MachineIRBuilder &MIRBuilder, SPIRV::AccessQualifier AccessQual, bool EmitIR) { @@ -96,6 +114,65 @@ SPIRVType *SPIRVGlobalRegistry::getOpTypeVector(uint32_t NumElems, return MIB; } +std::tuple<Register, ConstantInt *, bool> +SPIRVGlobalRegistry::getOrCreateConstIntReg(uint64_t Val, SPIRVType *SpvType, + MachineIRBuilder *MIRBuilder, + MachineInstr *I, + const SPIRVInstrInfo *TII) { + const IntegerType *LLVMIntTy; + if (SpvType) + LLVMIntTy = cast<IntegerType>(getTypeForSPIRVType(SpvType)); + else + LLVMIntTy = IntegerType::getInt32Ty(CurMF->getFunction().getContext()); + bool NewInstr = false; + // Find a constant in DT or build a new one. + ConstantInt *CI = ConstantInt::get(const_cast<IntegerType *>(LLVMIntTy), Val); + Register Res = DT.find(CI, CurMF); + if (!Res.isValid()) { + unsigned BitWidth = SpvType ? getScalarOrVectorBitWidth(SpvType) : 32; + LLT LLTy = LLT::scalar(32); + Res = CurMF->getRegInfo().createGenericVirtualRegister(LLTy); + if (MIRBuilder) + assignTypeToVReg(LLVMIntTy, Res, *MIRBuilder); + else + assignIntTypeToVReg(BitWidth, Res, *I, *TII); + DT.add(CI, CurMF, Res); + NewInstr = true; + } + return std::make_tuple(Res, CI, NewInstr); +} + +Register SPIRVGlobalRegistry::getOrCreateConstInt(uint64_t Val, MachineInstr &I, + SPIRVType *SpvType, + const SPIRVInstrInfo &TII) { + assert(SpvType); + ConstantInt *CI; + Register Res; + bool New; + std::tie(Res, CI, New) = + getOrCreateConstIntReg(Val, SpvType, nullptr, &I, &TII); + // If we have found Res register which is defined by the passed G_CONSTANT + // machine instruction, a new constant instruction should be created. + if (!New && (!I.getOperand(0).isReg() || Res != I.getOperand(0).getReg())) + return Res; + MachineInstrBuilder MIB; + MachineBasicBlock &BB = *I.getParent(); + if (Val) { + MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpConstantI)) + .addDef(Res) + .addUse(getSPIRVTypeID(SpvType)); + addNumImm(APInt(getScalarOrVectorBitWidth(SpvType), Val), MIB); + } else { + MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpConstantNull)) + .addDef(Res) + .addUse(getSPIRVTypeID(SpvType)); + } + const auto &ST = CurMF->getSubtarget(); + constrainSelectedInstRegOperands(*MIB, *ST.getInstrInfo(), + *ST.getRegisterInfo(), *ST.getRegBankInfo()); + return Res; +} + Register SPIRVGlobalRegistry::buildConstantInt(uint64_t Val, MachineIRBuilder &MIRBuilder, SPIRVType *SpvType, @@ -112,14 +189,32 @@ Register SPIRVGlobalRegistry::buildConstantInt(uint64_t Val, Register Res = DT.find(ConstInt, &MF); if (!Res.isValid()) { unsigned BitWidth = SpvType ? getScalarOrVectorBitWidth(SpvType) : 32; - Res = MF.getRegInfo().createGenericVirtualRegister(LLT::scalar(BitWidth)); - assignTypeToVReg(LLVMIntTy, Res, MIRBuilder); - if (EmitIR) + LLT LLTy = LLT::scalar(EmitIR ? BitWidth : 32); + Res = MF.getRegInfo().createGenericVirtualRegister(LLTy); + assignTypeToVReg(LLVMIntTy, Res, MIRBuilder, + SPIRV::AccessQualifier::ReadWrite, EmitIR); + DT.add(ConstInt, &MIRBuilder.getMF(), Res); + if (EmitIR) { MIRBuilder.buildConstant(Res, *ConstInt); - else - MIRBuilder.buildInstr(SPIRV::OpConstantI) - .addDef(Res) - .addImm(ConstInt->getSExtValue()); + } else { + MachineInstrBuilder MIB; + if (Val) { + assert(SpvType); + MIB = MIRBuilder.buildInstr(SPIRV::OpConstantI) + .addDef(Res) + .addUse(getSPIRVTypeID(SpvType)); + addNumImm(APInt(BitWidth, Val), MIB); + } else { + assert(SpvType); + MIB = MIRBuilder.buildInstr(SPIRV::OpConstantNull) + .addDef(Res) + .addUse(getSPIRVTypeID(SpvType)); + } + const auto &Subtarget = CurMF->getSubtarget(); + constrainSelectedInstRegOperands(*MIB, *Subtarget.getInstrInfo(), + *Subtarget.getRegisterInfo(), + *Subtarget.getRegBankInfo()); + } } return Res; } @@ -142,11 +237,63 @@ Register SPIRVGlobalRegistry::buildConstantFP(APFloat Val, unsigned BitWidth = SpvType ? getScalarOrVectorBitWidth(SpvType) : 32; Res = MF.getRegInfo().createGenericVirtualRegister(LLT::scalar(BitWidth)); assignTypeToVReg(LLVMFPTy, Res, MIRBuilder); + DT.add(ConstFP, &MF, Res); MIRBuilder.buildFConstant(Res, *ConstFP); } return Res; } +Register +SPIRVGlobalRegistry::getOrCreateConsIntVector(uint64_t Val, MachineInstr &I, + SPIRVType *SpvType, + const SPIRVInstrInfo &TII) { + const Type *LLVMTy = getTypeForSPIRVType(SpvType); + assert(LLVMTy->isVectorTy()); + const FixedVectorType *LLVMVecTy = cast<FixedVectorType>(LLVMTy); + Type *LLVMBaseTy = LLVMVecTy->getElementType(); + // Find a constant vector in DT or build a new one. + const auto ConstInt = ConstantInt::get(LLVMBaseTy, Val); + auto ConstVec = + ConstantVector::getSplat(LLVMVecTy->getElementCount(), ConstInt); + Register Res = DT.find(ConstVec, CurMF); + if (!Res.isValid()) { + unsigned BitWidth = getScalarOrVectorBitWidth(SpvType); + SPIRVType *SpvBaseType = getOrCreateSPIRVIntegerType(BitWidth, I, TII); + // SpvScalConst should be created before SpvVecConst to avoid undefined ID + // error on validation. + // TODO: can moved below once sorting of types/consts/defs is implemented. + Register SpvScalConst; + if (Val) + SpvScalConst = getOrCreateConstInt(Val, I, SpvBaseType, TII); + // TODO: maybe use bitwidth of base type. + LLT LLTy = LLT::scalar(32); + Register SpvVecConst = + CurMF->getRegInfo().createGenericVirtualRegister(LLTy); + const unsigned ElemCnt = SpvType->getOperand(2).getImm(); + assignVectTypeToVReg(SpvBaseType, ElemCnt, SpvVecConst, I, TII); + DT.add(ConstVec, CurMF, SpvVecConst); + MachineInstrBuilder MIB; + MachineBasicBlock &BB = *I.getParent(); + if (Val) { + MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpConstantComposite)) + .addDef(SpvVecConst) + .addUse(getSPIRVTypeID(SpvType)); + for (unsigned i = 0; i < ElemCnt; ++i) + MIB.addUse(SpvScalConst); + } else { + MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpConstantNull)) + .addDef(SpvVecConst) + .addUse(getSPIRVTypeID(SpvType)); + } + const auto &Subtarget = CurMF->getSubtarget(); + constrainSelectedInstRegOperands(*MIB, *Subtarget.getInstrInfo(), + *Subtarget.getRegisterInfo(), + *Subtarget.getRegBankInfo()); + return SpvVecConst; + } + return Res; +} + Register SPIRVGlobalRegistry::buildGlobalVariable( Register ResVReg, SPIRVType *BaseType, StringRef Name, const GlobalValue *GV, SPIRV::StorageClass Storage, @@ -169,7 +316,13 @@ Register SPIRVGlobalRegistry::buildGlobalVariable( } GV = GVar; } - Register Reg; + Register Reg = DT.find(GVar, &MIRBuilder.getMF()); + if (Reg.isValid()) { + if (Reg != ResVReg) + MIRBuilder.buildCopy(ResVReg, Reg); + return ResVReg; + } + auto MIB = MIRBuilder.buildInstr(SPIRV::OpVariable) .addDef(ResVReg) .addUse(getSPIRVTypeID(BaseType)) @@ -234,14 +387,76 @@ SPIRVType *SPIRVGlobalRegistry::getOpTypeArray(uint32_t NumElems, return MIB; } +SPIRVType *SPIRVGlobalRegistry::getOpTypeOpaque(const StructType *Ty, + MachineIRBuilder &MIRBuilder) { + assert(Ty->hasName()); + const StringRef Name = Ty->hasName() ? Ty->getName() : ""; + Register ResVReg = createTypeVReg(MIRBuilder); + auto MIB = MIRBuilder.buildInstr(SPIRV::OpTypeOpaque).addDef(ResVReg); + addStringImm(Name, MIB); + buildOpName(ResVReg, Name, MIRBuilder); + return MIB; +} + +SPIRVType *SPIRVGlobalRegistry::getOpTypeStruct(const StructType *Ty, + MachineIRBuilder &MIRBuilder, + bool EmitIR) { + SmallVector<Register, 4> FieldTypes; + for (const auto &Elem : Ty->elements()) { + SPIRVType *ElemTy = findSPIRVType(Elem, MIRBuilder); + assert(ElemTy && ElemTy->getOpcode() != SPIRV::OpTypeVoid && + "Invalid struct element type"); + FieldTypes.push_back(getSPIRVTypeID(ElemTy)); + } + Register ResVReg = createTypeVReg(MIRBuilder); + auto MIB = MIRBuilder.buildInstr(SPIRV::OpTypeStruct).addDef(ResVReg); + for (const auto &Ty : FieldTypes) + MIB.addUse(Ty); + if (Ty->hasName()) + buildOpName(ResVReg, Ty->getName(), MIRBuilder); + if (Ty->isPacked()) + buildOpDecorate(ResVReg, MIRBuilder, SPIRV::Decoration::CPacked, {}); + return MIB; +} + +static bool isOpenCLBuiltinType(const StructType *SType) { + return SType->isOpaque() && SType->hasName() && + SType->getName().startswith("opencl."); +} + +static bool isSPIRVBuiltinType(const StructType *SType) { + return SType->isOpaque() && SType->hasName() && + SType->getName().startswith("spirv."); +} + +static bool isSpecialType(const Type *Ty) { + if (auto PType = dyn_cast<PointerType>(Ty)) { + if (!PType->isOpaque()) + Ty = PType->getNonOpaquePointerElementType(); + } + if (auto SType = dyn_cast<StructType>(Ty)) + return isOpenCLBuiltinType(SType) || isSPIRVBuiltinType(SType); + return false; +} + SPIRVType *SPIRVGlobalRegistry::getOpTypePointer(SPIRV::StorageClass SC, SPIRVType *ElemType, - MachineIRBuilder &MIRBuilder) { - auto MIB = MIRBuilder.buildInstr(SPIRV::OpTypePointer) - .addDef(createTypeVReg(MIRBuilder)) - .addImm(static_cast<uint32_t>(SC)) - .addUse(getSPIRVTypeID(ElemType)); - return MIB; + MachineIRBuilder &MIRBuilder, + Register Reg) { + if (!Reg.isValid()) + Reg = createTypeVReg(MIRBuilder); + return MIRBuilder.buildInstr(SPIRV::OpTypePointer) + .addDef(Reg) + .addImm(static_cast<uint32_t>(SC)) + .addUse(getSPIRVTypeID(ElemType)); +} + +SPIRVType * +SPIRVGlobalRegistry::getOpTypeForwardPointer(SPIRV::StorageClass SC, + MachineIRBuilder &MIRBuilder) { + return MIRBuilder.buildInstr(SPIRV::OpTypeForwardPointer) + .addUse(createTypeVReg(MIRBuilder)) + .addImm(static_cast<uint32_t>(SC)); } SPIRVType *SPIRVGlobalRegistry::getOpTypeFunction( @@ -255,10 +470,49 @@ SPIRVType *SPIRVGlobalRegistry::getOpTypeFunction( return MIB; } +SPIRVType *SPIRVGlobalRegistry::getOrCreateOpTypeFunctionWithArgs( + const Type *Ty, SPIRVType *RetType, + const SmallVectorImpl<SPIRVType *> &ArgTypes, + MachineIRBuilder &MIRBuilder) { + Register Reg = DT.find(Ty, &MIRBuilder.getMF()); + if (Reg.isValid()) + return getSPIRVTypeForVReg(Reg); + SPIRVType *SpirvType = getOpTypeFunction(RetType, ArgTypes, MIRBuilder); + return finishCreatingSPIRVType(Ty, SpirvType); +} + +SPIRVType *SPIRVGlobalRegistry::findSPIRVType(const Type *Ty, + MachineIRBuilder &MIRBuilder, + SPIRV::AccessQualifier AccQual, + bool EmitIR) { + Register Reg = DT.find(Ty, &MIRBuilder.getMF()); + if (Reg.isValid()) + return getSPIRVTypeForVReg(Reg); + if (ForwardPointerTypes.find(Ty) != ForwardPointerTypes.end()) + return ForwardPointerTypes[Ty]; + return restOfCreateSPIRVType(Ty, MIRBuilder, AccQual, EmitIR); +} + +Register SPIRVGlobalRegistry::getSPIRVTypeID(const SPIRVType *SpirvType) const { + assert(SpirvType && "Attempting to get type id for nullptr type."); + if (SpirvType->getOpcode() == SPIRV::OpTypeForwardPointer) + return SpirvType->uses().begin()->getReg(); + return SpirvType->defs().begin()->getReg(); +} + SPIRVType *SPIRVGlobalRegistry::createSPIRVType(const Type *Ty, MachineIRBuilder &MIRBuilder, SPIRV::AccessQualifier AccQual, bool EmitIR) { + assert(!isSpecialType(Ty)); + auto &TypeToSPIRVTypeMap = DT.getTypes()->getAllUses(); + auto t = TypeToSPIRVTypeMap.find(Ty); + if (t != TypeToSPIRVTypeMap.end()) { + auto tt = t->second.find(&MIRBuilder.getMF()); + if (tt != t->second.end()) + return getSPIRVTypeForVReg(tt->second); + } + if (auto IType = dyn_cast<IntegerType>(Ty)) { const unsigned Width = IType->getBitWidth(); return Width == 1 ? getOpTypeBool(MIRBuilder) @@ -269,21 +523,25 @@ SPIRVType *SPIRVGlobalRegistry::createSPIRVType(const Type *Ty, if (Ty->isVoidTy()) return getOpTypeVoid(MIRBuilder); if (Ty->isVectorTy()) { - auto El = getOrCreateSPIRVType(cast<FixedVectorType>(Ty)->getElementType(), - MIRBuilder); + SPIRVType *El = + findSPIRVType(cast<FixedVectorType>(Ty)->getElementType(), MIRBuilder); return getOpTypeVector(cast<FixedVectorType>(Ty)->getNumElements(), El, MIRBuilder); } if (Ty->isArrayTy()) { - auto *El = getOrCreateSPIRVType(Ty->getArrayElementType(), MIRBuilder); + SPIRVType *El = findSPIRVType(Ty->getArrayElementType(), MIRBuilder); return getOpTypeArray(Ty->getArrayNumElements(), El, MIRBuilder, EmitIR); } - assert(!isa<StructType>(Ty) && "Unsupported StructType"); + if (auto SType = dyn_cast<StructType>(Ty)) { + if (SType->isOpaque()) + return getOpTypeOpaque(SType, MIRBuilder); + return getOpTypeStruct(SType, MIRBuilder, EmitIR); + } if (auto FType = dyn_cast<FunctionType>(Ty)) { - SPIRVType *RetTy = getOrCreateSPIRVType(FType->getReturnType(), MIRBuilder); + SPIRVType *RetTy = findSPIRVType(FType->getReturnType(), MIRBuilder); SmallVector<SPIRVType *, 4> ParamTypes; for (const auto &t : FType->params()) { - ParamTypes.push_back(getOrCreateSPIRVType(t, MIRBuilder)); + ParamTypes.push_back(findSPIRVType(t, MIRBuilder)); } return getOpTypeFunction(RetTy, ParamTypes, MIRBuilder); } @@ -292,24 +550,51 @@ SPIRVType *SPIRVGlobalRegistry::createSPIRVType(const Type *Ty, // At the moment, all opaque pointers correspond to i8 element type. // TODO: change the implementation once opaque pointers are supported // in the SPIR-V specification. - if (PType->isOpaque()) { + if (PType->isOpaque()) SpvElementType = getOrCreateSPIRVIntegerType(8, MIRBuilder); - } else { - Type *ElemType = PType->getNonOpaquePointerElementType(); - // TODO: support OpenCL and SPIRV builtins like image2d_t that are passed - // as pointers, but should be treated as custom types like OpTypeImage. - assert(!isa<StructType>(ElemType) && "Unsupported StructType pointer"); - - // Otherwise, treat it as a regular pointer type. - SpvElementType = getOrCreateSPIRVType( - ElemType, MIRBuilder, SPIRV::AccessQualifier::ReadWrite, EmitIR); - } + else + SpvElementType = + findSPIRVType(PType->getNonOpaquePointerElementType(), MIRBuilder, + SPIRV::AccessQualifier::ReadWrite, EmitIR); auto SC = addressSpaceToStorageClass(PType->getAddressSpace()); - return getOpTypePointer(SC, SpvElementType, MIRBuilder); + // Null pointer means we have a loop in type definitions, make and + // return corresponding OpTypeForwardPointer. + if (SpvElementType == nullptr) { + if (ForwardPointerTypes.find(Ty) == ForwardPointerTypes.end()) + ForwardPointerTypes[PType] = getOpTypeForwardPointer(SC, MIRBuilder); + return ForwardPointerTypes[PType]; + } + Register Reg(0); + // If we have forward pointer associated with this type, use its register + // operand to create OpTypePointer. + if (ForwardPointerTypes.find(PType) != ForwardPointerTypes.end()) + Reg = getSPIRVTypeID(ForwardPointerTypes[PType]); + + return getOpTypePointer(SC, SpvElementType, MIRBuilder, Reg); } llvm_unreachable("Unable to convert LLVM type to SPIRVType"); } +SPIRVType *SPIRVGlobalRegistry::restOfCreateSPIRVType( + const Type *Ty, MachineIRBuilder &MIRBuilder, + SPIRV::AccessQualifier AccessQual, bool EmitIR) { + if (TypesInProcessing.count(Ty) && !Ty->isPointerTy()) + return nullptr; + TypesInProcessing.insert(Ty); + SPIRVType *SpirvType = createSPIRVType(Ty, MIRBuilder, AccessQual, EmitIR); + TypesInProcessing.erase(Ty); + VRegToTypeMap[&MIRBuilder.getMF()][getSPIRVTypeID(SpirvType)] = SpirvType; + SPIRVToLLVMType[SpirvType] = Ty; + Register Reg = DT.find(Ty, &MIRBuilder.getMF()); + // Do not add OpTypeForwardPointer to DT, a corresponding normal pointer type + // will be added later. For special types it is already added to DT. + if (SpirvType->getOpcode() != SPIRV::OpTypeForwardPointer && !Reg.isValid() && + !isSpecialType(Ty)) + DT.add(Ty, &MIRBuilder.getMF(), getSPIRVTypeID(SpirvType)); + + return SpirvType; +} + SPIRVType *SPIRVGlobalRegistry::getSPIRVTypeForVReg(Register VReg) const { auto t = VRegToTypeMap.find(CurMF); if (t != VRegToTypeMap.end()) { @@ -321,13 +606,26 @@ SPIRVType *SPIRVGlobalRegistry::getSPIRVTypeForVReg(Register VReg) const { } SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVType( - const Type *Type, MachineIRBuilder &MIRBuilder, + const Type *Ty, MachineIRBuilder &MIRBuilder, SPIRV::AccessQualifier AccessQual, bool EmitIR) { - Register Reg = DT.find(Type, &MIRBuilder.getMF()); + Register Reg = DT.find(Ty, &MIRBuilder.getMF()); if (Reg.isValid()) return getSPIRVTypeForVReg(Reg); - SPIRVType *SpirvType = createSPIRVType(Type, MIRBuilder, AccessQual, EmitIR); - return restOfCreateSPIRVType(Type, SpirvType); + TypesInProcessing.clear(); + SPIRVType *STy = restOfCreateSPIRVType(Ty, MIRBuilder, AccessQual, EmitIR); + // Create normal pointer types for the corresponding OpTypeForwardPointers. + for (auto &CU : ForwardPointerTypes) { + const Type *Ty2 = CU.first; + SPIRVType *STy2 = CU.second; + if ((Reg = DT.find(Ty2, &MIRBuilder.getMF())).isValid()) + STy2 = getSPIRVTypeForVReg(Reg); + else + STy2 = restOfCreateSPIRVType(Ty2, MIRBuilder, AccessQual, EmitIR); + if (Ty == Ty2) + STy = STy2; + } + ForwardPointerTypes.clear(); + return STy; } bool SPIRVGlobalRegistry::isScalarOfType(Register VReg, @@ -393,8 +691,8 @@ SPIRVGlobalRegistry::getOrCreateSPIRVIntegerType(unsigned BitWidth, MIRBuilder); } -SPIRVType *SPIRVGlobalRegistry::restOfCreateSPIRVType(const Type *LLVMTy, - SPIRVType *SpirvType) { +SPIRVType *SPIRVGlobalRegistry::finishCreatingSPIRVType(const Type *LLVMTy, + SPIRVType *SpirvType) { assert(CurMF == SpirvType->getMF()); VRegToTypeMap[CurMF][getSPIRVTypeID(SpirvType)] = SpirvType; SPIRVToLLVMType[SpirvType] = LLVMTy; @@ -413,7 +711,7 @@ SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVIntegerType( .addDef(createTypeVReg(CurMF->getRegInfo())) .addImm(BitWidth) .addImm(0); - return restOfCreateSPIRVType(LLVMTy, MIB); + return finishCreatingSPIRVType(LLVMTy, MIB); } SPIRVType * @@ -423,6 +721,19 @@ SPIRVGlobalRegistry::getOrCreateSPIRVBoolType(MachineIRBuilder &MIRBuilder) { MIRBuilder); } +SPIRVType * +SPIRVGlobalRegistry::getOrCreateSPIRVBoolType(MachineInstr &I, + const SPIRVInstrInfo &TII) { + Type *LLVMTy = IntegerType::get(CurMF->getFunction().getContext(), 1); + Register Reg = DT.find(LLVMTy, CurMF); + if (Reg.isValid()) + return getSPIRVTypeForVReg(Reg); + MachineBasicBlock &BB = *I.getParent(); + auto MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpTypeBool)) + .addDef(createTypeVReg(CurMF->getRegInfo())); + return finishCreatingSPIRVType(LLVMTy, MIB); +} + SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVVectorType( SPIRVType *BaseType, unsigned NumElements, MachineIRBuilder &MIRBuilder) { return getOrCreateSPIRVType( @@ -436,12 +747,15 @@ SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVVectorType( const SPIRVInstrInfo &TII) { Type *LLVMTy = FixedVectorType::get( const_cast<Type *>(getTypeForSPIRVType(BaseType)), NumElements); + Register Reg = DT.find(LLVMTy, CurMF); + if (Reg.isValid()) + return getSPIRVTypeForVReg(Reg); MachineBasicBlock &BB = *I.getParent(); auto MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpTypeVector)) .addDef(createTypeVReg(CurMF->getRegInfo())) .addUse(getSPIRVTypeID(BaseType)) .addImm(NumElements); - return restOfCreateSPIRVType(LLVMTy, MIB); + return finishCreatingSPIRVType(LLVMTy, MIB); } SPIRVType * @@ -460,10 +774,39 @@ SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVPointerType( Type *LLVMTy = PointerType::get(const_cast<Type *>(getTypeForSPIRVType(BaseType)), storageClassToAddressSpace(SC)); + Register Reg = DT.find(LLVMTy, CurMF); + if (Reg.isValid()) + return getSPIRVTypeForVReg(Reg); MachineBasicBlock &BB = *I.getParent(); auto MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpTypePointer)) .addDef(createTypeVReg(CurMF->getRegInfo())) .addImm(static_cast<uint32_t>(SC)) .addUse(getSPIRVTypeID(BaseType)); - return restOfCreateSPIRVType(LLVMTy, MIB); + return finishCreatingSPIRVType(LLVMTy, MIB); +} + +Register SPIRVGlobalRegistry::getOrCreateUndef(MachineInstr &I, + SPIRVType *SpvType, + const SPIRVInstrInfo &TII) { + assert(SpvType); + const Type *LLVMTy = getTypeForSPIRVType(SpvType); + assert(LLVMTy); + // Find a constant in DT or build a new one. + UndefValue *UV = UndefValue::get(const_cast<Type *>(LLVMTy)); + Register Res = DT.find(UV, CurMF); + if (Res.isValid()) + return Res; + LLT LLTy = LLT::scalar(32); + Res = CurMF->getRegInfo().createGenericVirtualRegister(LLTy); + assignSPIRVTypeToVReg(SpvType, Res, *CurMF); + DT.add(UV, CurMF, Res); + + MachineInstrBuilder MIB; + MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpUndef)) + .addDef(Res) + .addUse(getSPIRVTypeID(SpvType)); + const auto &ST = CurMF->getSubtarget(); + constrainSelectedInstRegOperands(*MIB, *ST.getInstrInfo(), + *ST.getRegisterInfo(), *ST.getRegBankInfo()); + return Res; } diff --git a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h index 13dcc20a3e0a..59ac2712a02f 100644 --- a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h +++ b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h @@ -30,7 +30,7 @@ class SPIRVGlobalRegistry { // Do not confuse this with DuplicatesTracker as DT maps Type* to <MF, Reg> // where Reg = OpType... // while VRegToTypeMap tracks SPIR-V type assigned to other regs (i.e. not - // type-declaring ones) + // type-declaring ones). DenseMap<const MachineFunction *, DenseMap<Register, SPIRVType *>> VRegToTypeMap; @@ -38,6 +38,9 @@ class SPIRVGlobalRegistry { DenseMap<SPIRVType *, const Type *> SPIRVToLLVMType; + SmallPtrSet<const Type *, 4> TypesInProcessing; + DenseMap<const Type *, SPIRVType *> ForwardPointerTypes; + // Number of bits pointers and size_t integers require. const unsigned PointerSize; @@ -46,6 +49,14 @@ class SPIRVGlobalRegistry { createSPIRVType(const Type *Type, MachineIRBuilder &MIRBuilder, SPIRV::AccessQualifier AQ = SPIRV::AccessQualifier::ReadWrite, bool EmitIR = true); + SPIRVType *findSPIRVType( + const Type *Ty, MachineIRBuilder &MIRBuilder, + SPIRV::AccessQualifier accessQual = SPIRV::AccessQualifier::ReadWrite, + bool EmitIR = true); + SPIRVType *restOfCreateSPIRVType(const Type *Type, + MachineIRBuilder &MIRBuilder, + SPIRV::AccessQualifier AccessQual, + bool EmitIR); public: SPIRVGlobalRegistry(unsigned PointerSize); @@ -91,6 +102,11 @@ public: const Type *Type, Register VReg, MachineIRBuilder &MIRBuilder, SPIRV::AccessQualifier AQ = SPIRV::AccessQualifier::ReadWrite, bool EmitIR = true); + SPIRVType *assignIntTypeToVReg(unsigned BitWidth, Register VReg, + MachineInstr &I, const SPIRVInstrInfo &TII); + SPIRVType *assignVectTypeToVReg(SPIRVType *BaseType, unsigned NumElements, + Register VReg, MachineInstr &I, + const SPIRVInstrInfo &TII); // In cases where the SPIR-V type is already known, this function can be // used to map it to the given VReg via an ASSIGN_TYPE instruction. @@ -123,10 +139,7 @@ public: } // Return the VReg holding the result of the given OpTypeXXX instruction. - Register getSPIRVTypeID(const SPIRVType *SpirvType) const { - assert(SpirvType && "Attempting to get type id for nullptr type."); - return SpirvType->defs().begin()->getReg(); - } + Register getSPIRVTypeID(const SPIRVType *SpirvType) const; void setCurrentFunc(MachineFunction &MF) { CurMF = &MF; } @@ -167,19 +180,38 @@ private: SPIRVType *getOpTypeArray(uint32_t NumElems, SPIRVType *ElemType, MachineIRBuilder &MIRBuilder, bool EmitIR = true); + SPIRVType *getOpTypeOpaque(const StructType *Ty, + MachineIRBuilder &MIRBuilder); + + SPIRVType *getOpTypeStruct(const StructType *Ty, MachineIRBuilder &MIRBuilder, + bool EmitIR = true); + SPIRVType *getOpTypePointer(SPIRV::StorageClass SC, SPIRVType *ElemType, - MachineIRBuilder &MIRBuilder); + MachineIRBuilder &MIRBuilder, Register Reg); + + SPIRVType *getOpTypeForwardPointer(SPIRV::StorageClass SC, + MachineIRBuilder &MIRBuilder); SPIRVType *getOpTypeFunction(SPIRVType *RetType, const SmallVectorImpl<SPIRVType *> &ArgTypes, MachineIRBuilder &MIRBuilder); - SPIRVType *restOfCreateSPIRVType(const Type *LLVMTy, SPIRVType *SpirvType); + std::tuple<Register, ConstantInt *, bool> getOrCreateConstIntReg( + uint64_t Val, SPIRVType *SpvType, MachineIRBuilder *MIRBuilder, + MachineInstr *I = nullptr, const SPIRVInstrInfo *TII = nullptr); + SPIRVType *finishCreatingSPIRVType(const Type *LLVMTy, SPIRVType *SpirvType); public: Register buildConstantInt(uint64_t Val, MachineIRBuilder &MIRBuilder, SPIRVType *SpvType = nullptr, bool EmitIR = true); + Register getOrCreateConstInt(uint64_t Val, MachineInstr &I, + SPIRVType *SpvType, const SPIRVInstrInfo &TII); Register buildConstantFP(APFloat Val, MachineIRBuilder &MIRBuilder, SPIRVType *SpvType = nullptr); + Register getOrCreateConsIntVector(uint64_t Val, MachineInstr &I, + SPIRVType *SpvType, + const SPIRVInstrInfo &TII); + Register getOrCreateUndef(MachineInstr &I, SPIRVType *SpvType, + const SPIRVInstrInfo &TII); Register buildGlobalVariable(Register Reg, SPIRVType *BaseType, StringRef Name, const GlobalValue *GV, SPIRV::StorageClass Storage, @@ -193,19 +225,24 @@ public: SPIRVType *getOrCreateSPIRVIntegerType(unsigned BitWidth, MachineInstr &I, const SPIRVInstrInfo &TII); SPIRVType *getOrCreateSPIRVBoolType(MachineIRBuilder &MIRBuilder); + SPIRVType *getOrCreateSPIRVBoolType(MachineInstr &I, + const SPIRVInstrInfo &TII); SPIRVType *getOrCreateSPIRVVectorType(SPIRVType *BaseType, unsigned NumElements, MachineIRBuilder &MIRBuilder); SPIRVType *getOrCreateSPIRVVectorType(SPIRVType *BaseType, unsigned NumElements, MachineInstr &I, const SPIRVInstrInfo &TII); - SPIRVType *getOrCreateSPIRVPointerType( SPIRVType *BaseType, MachineIRBuilder &MIRBuilder, SPIRV::StorageClass SClass = SPIRV::StorageClass::Function); SPIRVType *getOrCreateSPIRVPointerType( SPIRVType *BaseType, MachineInstr &I, const SPIRVInstrInfo &TII, SPIRV::StorageClass SClass = SPIRV::StorageClass::Function); + SPIRVType *getOrCreateOpTypeFunctionWithArgs( + const Type *Ty, SPIRVType *RetType, + const SmallVectorImpl<SPIRVType *> &ArgTypes, + MachineIRBuilder &MIRBuilder); }; } // end namespace llvm #endif // LLLVM_LIB_TARGET_SPIRV_SPIRVTYPEMANAGER_H diff --git a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVInstrInfo.cpp index 754906308114..66d8b17b4296 100644 --- a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVInstrInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVInstrInfo.cpp @@ -52,7 +52,7 @@ bool SPIRVInstrInfo::isTypeDeclInstr(const MachineInstr &MI) const { auto DefRegClass = MRI.getRegClassOrNull(MI.getOperand(0).getReg()); return DefRegClass && DefRegClass->getID() == SPIRV::TYPERegClass.getID(); } else { - return false; + return MI.getOpcode() == SPIRV::OpTypeForwardPointer; } } @@ -193,3 +193,15 @@ void SPIRVInstrInfo::copyPhysReg(MachineBasicBlock &MBB, auto &MRI = I->getMF()->getRegInfo(); MRI.replaceRegWith(DstOp.getReg(), SrcOp.getReg()); } + +bool SPIRVInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { + if (MI.getOpcode() == SPIRV::GET_ID || MI.getOpcode() == SPIRV::GET_fID || + MI.getOpcode() == SPIRV::GET_pID || MI.getOpcode() == SPIRV::GET_vfID || + MI.getOpcode() == SPIRV::GET_vID) { + auto &MRI = MI.getMF()->getRegInfo(); + MRI.replaceRegWith(MI.getOperand(0).getReg(), MI.getOperand(1).getReg()); + MI.eraseFromParent(); + return true; + } + return false; +} diff --git a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVInstrInfo.h b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVInstrInfo.h index 2600d9cfca2e..334351c8eeae 100644 --- a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVInstrInfo.h +++ b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVInstrInfo.h @@ -48,6 +48,7 @@ public: void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc) const override; + bool expandPostRAPseudo(MachineInstr &MI) const override; }; } // namespace llvm diff --git a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td index d6fec5fd0785..d1c20795f804 100644 --- a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td +++ b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td @@ -449,6 +449,7 @@ def OpCopyLogical: UnOp<"OpCopyLogical", 400>; def OpSNegate: UnOp<"OpSNegate", 126>; def OpFNegate: UnOpTyped<"OpFNegate", 127, fID, fneg>; +def OpFNegateV: UnOpTyped<"OpFNegate", 127, vfID, fneg>; defm OpIAdd: BinOpTypedGen<"OpIAdd", 128, add, 0, 1>; defm OpFAdd: BinOpTypedGen<"OpFAdd", 129, fadd, 1, 1>; @@ -618,8 +619,10 @@ def OpAtomicCompareExchange: Op<230, (outs ID:$res), (ins TYPE:$ty, ID:$ptr, ID:$sc, ID:$eq, ID:$neq, ID:$val, ID:$cmp), "$res = OpAtomicCompareExchange $ty $ptr $sc $eq $neq $val $cmp">; -// TODO Currently the following deprecated opcode is missing: -// OpAtomicCompareExchangeWeak +def OpAtomicCompareExchangeWeak: Op<231, (outs ID:$res), + (ins TYPE:$ty, ID:$ptr, ID:$sc, ID:$eq, + ID:$neq, ID:$val, ID:$cmp), + "$res = OpAtomicCompareExchangeWeak $ty $ptr $sc $eq $neq $val $cmp">; def OpAtomicIIncrement: AtomicOp<"OpAtomicIIncrement", 232>; def OpAtomicIDecrement: AtomicOp<"OpAtomicIDecrement", 233>; @@ -660,6 +663,11 @@ def OpMemoryNamedBarrier: Op<329, (outs), (ins ID:$barr, ID:$mem, ID:$sem), // 3.42.21. Group and Subgroup Instructions +def OpGroupAsyncCopy: Op<259, (outs ID:$res), (ins TYPE:$ty, ID:$scope, + ID:$dst, ID:$src, ID:$nelts, ID:$stride, ID:$event), + "$res = OpGroupAsyncCopy $ty $scope $dst $src $nelts $stride $event">; +def OpGroupWaitEvents: Op<260, (outs), (ins ID:$scope, ID:$nelts, ID:$elist), + "OpGroupWaitEvents $scope $nelts $elist">; def OpGroupAll: Op<261, (outs ID:$res), (ins TYPE:$ty, ID:$scope, ID:$pr), "$res = OpGroupAll $ty $scope $pr">; def OpGroupAny: Op<262, (outs ID:$res), (ins TYPE:$ty, ID:$scope, ID:$pr), @@ -680,6 +688,18 @@ def OpGroupUMax: OpGroup<"UMax", 270>; def OpGroupSMax: OpGroup<"SMax", 271>; // TODO: 3.42.22. Device-Side Enqueue Instructions +def OpRetainEvent: Op<297, (outs), (ins ID:$event), "OpRetainEvent $event">; +def OpReleaseEvent: Op<298, (outs), (ins ID:$event), "OpReleaseEvent $event">; +def OpCreateUserEvent: Op<299, (outs ID:$res), (ins TYPE:$type), + "$res = OpCreateUserEvent $type">; +def OpIsValidEvent: Op<300, (outs ID:$res), (ins TYPE:$type, ID:$event), + "$res = OpIsValidEvent $type $event ">; +def OpSetUserEventStatus: Op<301, (outs), (ins ID:$event, ID:$status), + "OpSetUserEventStatus $event $status">; +def OpCaptureEventProfilingInfo: Op<302, (outs), + (ins ID:$event, ID:$info, ID:$value), + "OpCaptureEventProfilingInfo $event $info $value">; + // TODO: 3.42.23. Pipe Instructions // 3.42.24. Non-Uniform Instructions diff --git a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index 90b921a06f21..9365fd22e4e7 100644 --- a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -197,6 +197,8 @@ void SPIRVInstructionSelector::setupMF(MachineFunction &MF, GISelKnownBits *KB, InstructionSelector::setupMF(MF, KB, CoverageInfo, PSI, BFI); } +static bool isImm(const MachineOperand &MO, MachineRegisterInfo *MRI); + // Defined in SPIRVLegalizerInfo.cpp. extern bool isTypeFoldingSupported(unsigned Opcode); @@ -335,6 +337,30 @@ bool SPIRVInstructionSelector::spvSelect(Register ResVReg, return selectUnOp(ResVReg, ResType, I, SPIRV::OpBitcast); case TargetOpcode::G_ADDRSPACE_CAST: return selectAddrSpaceCast(ResVReg, ResType, I); + case TargetOpcode::G_PTR_ADD: { + // Currently, we get G_PTR_ADD only as a result of translating + // global variables, initialized with constant expressions like GV + Const + // (see test opencl/basic/progvar_prog_scope_init.ll). + // TODO: extend the handler once we have other cases. + assert(I.getOperand(1).isReg() && I.getOperand(2).isReg()); + Register GV = I.getOperand(1).getReg(); + MachineRegisterInfo::def_instr_iterator II = MRI->def_instr_begin(GV); + assert(((*II).getOpcode() == TargetOpcode::G_GLOBAL_VALUE || + (*II).getOpcode() == TargetOpcode::COPY || + (*II).getOpcode() == SPIRV::OpVariable) && + isImm(I.getOperand(2), MRI)); + Register Idx = buildZerosVal(GR.getOrCreateSPIRVIntegerType(32, I, TII), I); + MachineBasicBlock &BB = *I.getParent(); + auto MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpSpecConstantOp)) + .addDef(ResVReg) + .addUse(GR.getSPIRVTypeID(ResType)) + .addImm(static_cast<uint32_t>( + SPIRV::Opcode::InBoundsPtrAccessChain)) + .addUse(GV) + .addUse(Idx) + .addUse(I.getOperand(2).getReg()); + return MIB.constrainAllUses(TII, TRI, RBI); + } case TargetOpcode::G_ATOMICRMW_OR: return selectAtomicRMW(ResVReg, ResType, I, SPIRV::OpAtomicOr); @@ -387,23 +413,6 @@ bool SPIRVInstructionSelector::selectUnOp(Register ResVReg, Opcode); } -static SPIRV::MemorySemantics getMemSemantics(AtomicOrdering Ord) { - switch (Ord) { - case AtomicOrdering::Acquire: - return SPIRV::MemorySemantics::Acquire; - case AtomicOrdering::Release: - return SPIRV::MemorySemantics::Release; - case AtomicOrdering::AcquireRelease: - return SPIRV::MemorySemantics::AcquireRelease; - case AtomicOrdering::SequentiallyConsistent: - return SPIRV::MemorySemantics::SequentiallyConsistent; - case AtomicOrdering::Unordered: - case AtomicOrdering::Monotonic: - case AtomicOrdering::NotAtomic: - return SPIRV::MemorySemantics::None; - } -} - static SPIRV::Scope getScope(SyncScope::ID Ord) { switch (Ord) { case SyncScope::SingleThread: @@ -484,16 +493,15 @@ bool SPIRVInstructionSelector::selectMemOperation(Register ResVReg, MachineInstr &I) const { MachineBasicBlock &BB = *I.getParent(); auto MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpCopyMemorySized)) - .addDef(I.getOperand(0).getReg()) + .addUse(I.getOperand(0).getReg()) .addUse(I.getOperand(1).getReg()) .addUse(I.getOperand(2).getReg()); if (I.getNumMemOperands()) addMemoryOperands(*I.memoperands_begin(), MIB); bool Result = MIB.constrainAllUses(TII, TRI, RBI); - if (ResVReg.isValid() && ResVReg != MIB->getOperand(0).getReg()) { + if (ResVReg.isValid() && ResVReg != MIB->getOperand(0).getReg()) BuildMI(BB, I, I.getDebugLoc(), TII.get(TargetOpcode::COPY), ResVReg) .addUse(MIB->getOperand(0).getReg()); - } return Result; } @@ -541,36 +549,71 @@ bool SPIRVInstructionSelector::selectFence(MachineInstr &I) const { bool SPIRVInstructionSelector::selectAtomicCmpXchg(Register ResVReg, const SPIRVType *ResType, MachineInstr &I) const { - assert(I.hasOneMemOperand()); - const MachineMemOperand *MemOp = *I.memoperands_begin(); - uint32_t Scope = static_cast<uint32_t>(getScope(MemOp->getSyncScopeID())); - Register ScopeReg = buildI32Constant(Scope, I); - + Register ScopeReg; + Register MemSemEqReg; + Register MemSemNeqReg; Register Ptr = I.getOperand(2).getReg(); + if (I.getOpcode() != TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS) { + assert(I.hasOneMemOperand()); + const MachineMemOperand *MemOp = *I.memoperands_begin(); + unsigned Scope = static_cast<uint32_t>(getScope(MemOp->getSyncScopeID())); + ScopeReg = buildI32Constant(Scope, I); + + unsigned ScSem = static_cast<uint32_t>( + getMemSemanticsForStorageClass(GR.getPointerStorageClass(Ptr))); + AtomicOrdering AO = MemOp->getSuccessOrdering(); + unsigned MemSemEq = static_cast<uint32_t>(getMemSemantics(AO)) | ScSem; + MemSemEqReg = buildI32Constant(MemSemEq, I); + AtomicOrdering FO = MemOp->getFailureOrdering(); + unsigned MemSemNeq = static_cast<uint32_t>(getMemSemantics(FO)) | ScSem; + MemSemNeqReg = + MemSemEq == MemSemNeq ? MemSemEqReg : buildI32Constant(MemSemNeq, I); + } else { + ScopeReg = I.getOperand(5).getReg(); + MemSemEqReg = I.getOperand(6).getReg(); + MemSemNeqReg = I.getOperand(7).getReg(); + } + Register Cmp = I.getOperand(3).getReg(); Register Val = I.getOperand(4).getReg(); - SPIRVType *SpvValTy = GR.getSPIRVTypeForVReg(Val); - SPIRV::StorageClass SC = GR.getPointerStorageClass(Ptr); - uint32_t ScSem = static_cast<uint32_t>(getMemSemanticsForStorageClass(SC)); - AtomicOrdering AO = MemOp->getSuccessOrdering(); - uint32_t MemSemEq = static_cast<uint32_t>(getMemSemantics(AO)) | ScSem; - Register MemSemEqReg = buildI32Constant(MemSemEq, I); - AtomicOrdering FO = MemOp->getFailureOrdering(); - uint32_t MemSemNeq = static_cast<uint32_t>(getMemSemantics(FO)) | ScSem; - Register MemSemNeqReg = - MemSemEq == MemSemNeq ? MemSemEqReg : buildI32Constant(MemSemNeq, I); + Register ACmpRes = MRI->createVirtualRegister(&SPIRV::IDRegClass); const DebugLoc &DL = I.getDebugLoc(); - return BuildMI(*I.getParent(), I, DL, TII.get(SPIRV::OpAtomicCompareExchange)) - .addDef(ResVReg) - .addUse(GR.getSPIRVTypeID(SpvValTy)) - .addUse(Ptr) - .addUse(ScopeReg) - .addUse(MemSemEqReg) - .addUse(MemSemNeqReg) - .addUse(Val) - .addUse(Cmp) - .constrainAllUses(TII, TRI, RBI); + bool Result = + BuildMI(*I.getParent(), I, DL, TII.get(SPIRV::OpAtomicCompareExchange)) + .addDef(ACmpRes) + .addUse(GR.getSPIRVTypeID(SpvValTy)) + .addUse(Ptr) + .addUse(ScopeReg) + .addUse(MemSemEqReg) + .addUse(MemSemNeqReg) + .addUse(Val) + .addUse(Cmp) + .constrainAllUses(TII, TRI, RBI); + Register CmpSuccReg = MRI->createVirtualRegister(&SPIRV::IDRegClass); + SPIRVType *BoolTy = GR.getOrCreateSPIRVBoolType(I, TII); + Result |= BuildMI(*I.getParent(), I, DL, TII.get(SPIRV::OpIEqual)) + .addDef(CmpSuccReg) + .addUse(GR.getSPIRVTypeID(BoolTy)) + .addUse(ACmpRes) + .addUse(Cmp) + .constrainAllUses(TII, TRI, RBI); + Register TmpReg = MRI->createVirtualRegister(&SPIRV::IDRegClass); + Result |= BuildMI(*I.getParent(), I, DL, TII.get(SPIRV::OpCompositeInsert)) + .addDef(TmpReg) + .addUse(GR.getSPIRVTypeID(ResType)) + .addUse(ACmpRes) + .addUse(GR.getOrCreateUndef(I, ResType, TII)) + .addImm(0) + .constrainAllUses(TII, TRI, RBI); + Result |= BuildMI(*I.getParent(), I, DL, TII.get(SPIRV::OpCompositeInsert)) + .addDef(ResVReg) + .addUse(GR.getSPIRVTypeID(ResType)) + .addUse(CmpSuccReg) + .addUse(TmpReg) + .addImm(1) + .constrainAllUses(TII, TRI, RBI); + return Result; } static bool isGenericCastablePtr(SPIRV::StorageClass SC) { @@ -592,6 +635,27 @@ static bool isGenericCastablePtr(SPIRV::StorageClass SC) { bool SPIRVInstructionSelector::selectAddrSpaceCast(Register ResVReg, const SPIRVType *ResType, MachineInstr &I) const { + // If the AddrSpaceCast user is single and in OpConstantComposite or + // OpVariable, we should select OpSpecConstantOp. + auto UIs = MRI->use_instructions(ResVReg); + if (!UIs.empty() && ++UIs.begin() == UIs.end() && + (UIs.begin()->getOpcode() == SPIRV::OpConstantComposite || + UIs.begin()->getOpcode() == SPIRV::OpVariable || + isSpvIntrinsic(*UIs.begin(), Intrinsic::spv_init_global))) { + Register NewReg = I.getOperand(1).getReg(); + MachineBasicBlock &BB = *I.getParent(); + SPIRVType *SpvBaseTy = GR.getOrCreateSPIRVIntegerType(8, I, TII); + ResType = GR.getOrCreateSPIRVPointerType(SpvBaseTy, I, TII, + SPIRV::StorageClass::Generic); + bool Result = + BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpSpecConstantOp)) + .addDef(ResVReg) + .addUse(GR.getSPIRVTypeID(ResType)) + .addImm(static_cast<uint32_t>(SPIRV::Opcode::PtrCastToGeneric)) + .addUse(NewReg) + .constrainAllUses(TII, TRI, RBI); + return Result; + } Register SrcPtr = I.getOperand(1).getReg(); SPIRVType *SrcPtrTy = GR.getSPIRVTypeForVReg(SrcPtr); SPIRV::StorageClass SrcSC = GR.getPointerStorageClass(SrcPtr); @@ -842,7 +906,9 @@ bool SPIRVInstructionSelector::selectFCmp(Register ResVReg, Register SPIRVInstructionSelector::buildZerosVal(const SPIRVType *ResType, MachineInstr &I) const { - return buildI32Constant(0, I, ResType); + if (ResType->getOpcode() == SPIRV::OpTypeVector) + return GR.getOrCreateConsIntVector(0, I, ResType, TII); + return GR.getOrCreateConstInt(0, I, ResType, TII); } Register SPIRVInstructionSelector::buildOnesVal(bool AllOnes, @@ -851,20 +917,9 @@ Register SPIRVInstructionSelector::buildOnesVal(bool AllOnes, unsigned BitWidth = GR.getScalarOrVectorBitWidth(ResType); APInt One = AllOnes ? APInt::getAllOnesValue(BitWidth) : APInt::getOneBitSet(BitWidth, 0); - Register OneReg = buildI32Constant(One.getZExtValue(), I, ResType); - if (ResType->getOpcode() == SPIRV::OpTypeVector) { - const unsigned NumEles = ResType->getOperand(2).getImm(); - Register OneVec = MRI->createVirtualRegister(&SPIRV::IDRegClass); - unsigned Opcode = SPIRV::OpConstantComposite; - auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Opcode)) - .addDef(OneVec) - .addUse(GR.getSPIRVTypeID(ResType)); - for (unsigned i = 0; i < NumEles; ++i) - MIB.addUse(OneReg); - constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); - return OneVec; - } - return OneReg; + if (ResType->getOpcode() == SPIRV::OpTypeVector) + return GR.getOrCreateConsIntVector(One.getZExtValue(), I, ResType, TII); + return GR.getOrCreateConstInt(One.getZExtValue(), I, ResType, TII); } bool SPIRVInstructionSelector::selectSelect(Register ResVReg, @@ -959,13 +1014,23 @@ bool SPIRVInstructionSelector::selectConst(Register ResVReg, const SPIRVType *ResType, const APInt &Imm, MachineInstr &I) const { - assert(ResType->getOpcode() != SPIRV::OpTypePointer || Imm.isNullValue()); + unsigned TyOpcode = ResType->getOpcode(); + assert(TyOpcode != SPIRV::OpTypePointer || Imm.isNullValue()); MachineBasicBlock &BB = *I.getParent(); - if (ResType->getOpcode() == SPIRV::OpTypePointer && Imm.isNullValue()) { + if ((TyOpcode == SPIRV::OpTypePointer || TyOpcode == SPIRV::OpTypeEvent) && + Imm.isNullValue()) return BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpConstantNull)) .addDef(ResVReg) .addUse(GR.getSPIRVTypeID(ResType)) .constrainAllUses(TII, TRI, RBI); + if (TyOpcode == SPIRV::OpTypeInt) { + Register Reg = GR.getOrCreateConstInt(Imm.getZExtValue(), I, ResType, TII); + if (Reg == ResVReg) + return true; + return BuildMI(BB, I, I.getDebugLoc(), TII.get(TargetOpcode::COPY)) + .addDef(ResVReg) + .addUse(Reg) + .constrainAllUses(TII, TRI, RBI); } auto MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpConstantI)) .addDef(ResVReg) @@ -1006,29 +1071,29 @@ bool SPIRVInstructionSelector::selectInsertVal(Register ResVReg, const SPIRVType *ResType, MachineInstr &I) const { MachineBasicBlock &BB = *I.getParent(); - return BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpCompositeInsert)) - .addDef(ResVReg) - .addUse(GR.getSPIRVTypeID(ResType)) - // object to insert - .addUse(I.getOperand(3).getReg()) - // composite to insert into - .addUse(I.getOperand(2).getReg()) - // TODO: support arbitrary number of indices - .addImm(foldImm(I.getOperand(4), MRI)) - .constrainAllUses(TII, TRI, RBI); + auto MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpCompositeInsert)) + .addDef(ResVReg) + .addUse(GR.getSPIRVTypeID(ResType)) + // object to insert + .addUse(I.getOperand(3).getReg()) + // composite to insert into + .addUse(I.getOperand(2).getReg()); + for (unsigned i = 4; i < I.getNumOperands(); i++) + MIB.addImm(foldImm(I.getOperand(i), MRI)); + return MIB.constrainAllUses(TII, TRI, RBI); } bool SPIRVInstructionSelector::selectExtractVal(Register ResVReg, const SPIRVType *ResType, MachineInstr &I) const { MachineBasicBlock &BB = *I.getParent(); - return BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpCompositeExtract)) - .addDef(ResVReg) - .addUse(GR.getSPIRVTypeID(ResType)) - .addUse(I.getOperand(2).getReg()) - // TODO: support arbitrary number of indices - .addImm(foldImm(I.getOperand(3), MRI)) - .constrainAllUses(TII, TRI, RBI); + auto MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpCompositeExtract)) + .addDef(ResVReg) + .addUse(GR.getSPIRVTypeID(ResType)) + .addUse(I.getOperand(2).getReg()); + for (unsigned i = 3; i < I.getNumOperands(); i++) + MIB.addImm(foldImm(I.getOperand(i), MRI)); + return MIB.constrainAllUses(TII, TRI, RBI); } bool SPIRVInstructionSelector::selectInsertElt(Register ResVReg, @@ -1154,6 +1219,9 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg, } return MIB.constrainAllUses(TII, TRI, RBI); } break; + case Intrinsic::spv_cmpxchg: + return selectAtomicCmpXchg(ResVReg, ResType, I); + break; default: llvm_unreachable("Intrinsic selection not implemented"); } @@ -1239,8 +1307,32 @@ bool SPIRVInstructionSelector::selectGlobalValue( GV->getType(), MIRBuilder, SPIRV::AccessQualifier::ReadWrite, false); std::string GlobalIdent = GV->getGlobalIdentifier(); - // TODO: suport @llvm.global.annotations. + // We have functions as operands in tests with blocks of instruction e.g. in + // transcoding/global_block.ll. These operands are not used and should be + // substituted by zero constants. Their type is expected to be always + // OpTypePointer Function %uchar. + if (isa<Function>(GV)) { + const Constant *ConstVal = GV; + MachineBasicBlock &BB = *I.getParent(); + Register NewReg = GR.find(ConstVal, GR.CurMF); + if (!NewReg.isValid()) { + SPIRVType *SpvBaseTy = GR.getOrCreateSPIRVIntegerType(8, I, TII); + ResType = GR.getOrCreateSPIRVPointerType(SpvBaseTy, I, TII); + Register NewReg = ResVReg; + GR.add(ConstVal, GR.CurMF, NewReg); + return BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpConstantNull)) + .addDef(NewReg) + .addUse(GR.getSPIRVTypeID(ResType)) + .constrainAllUses(TII, TRI, RBI); + } + assert(NewReg != ResVReg); + return BuildMI(BB, I, I.getDebugLoc(), TII.get(TargetOpcode::COPY)) + .addDef(ResVReg) + .addUse(NewReg) + .constrainAllUses(TII, TRI, RBI); + } auto GlobalVar = cast<GlobalVariable>(GV); + assert(GlobalVar->getName() != "llvm.global.annotations"); bool HasInit = GlobalVar->hasInitializer() && !isa<UndefValue>(GlobalVar->getInitializer()); diff --git a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVMCInstLower.cpp b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVMCInstLower.cpp index 8e4ab973bf07..8aaac50c94d7 100644 --- a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVMCInstLower.cpp +++ b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVMCInstLower.cpp @@ -45,7 +45,12 @@ void SPIRVMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI, break; } case MachineOperand::MO_Immediate: - MCOp = MCOperand::createImm(MO.getImm()); + if (MI->getOpcode() == SPIRV::OpExtInst && i == 2) { + Register Reg = MAI->getExtInstSetReg(MO.getImm()); + MCOp = MCOperand::createReg(Reg); + } else { + MCOp = MCOperand::createImm(MO.getImm()); + } break; case MachineOperand::MO_FPImmediate: MCOp = MCOperand::createDFPImm( diff --git a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp index a39df5234935..143ddf7297dc 100644 --- a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp +++ b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp @@ -60,62 +60,50 @@ void SPIRVModuleAnalysis::setBaseInfo(const Module &M) { MAI.InstrsToDelete.clear(); MAI.FuncNameMap.clear(); MAI.GlobalVarList.clear(); + MAI.ExtInstSetMap.clear(); // TODO: determine memory model and source language from the configuratoin. - MAI.Mem = SPIRV::MemoryModel::OpenCL; - MAI.SrcLang = SPIRV::SourceLanguage::OpenCL_C; - unsigned PtrSize = ST->getPointerSize(); - MAI.Addr = PtrSize == 32 ? SPIRV::AddressingModel::Physical32 - : PtrSize == 64 ? SPIRV::AddressingModel::Physical64 - : SPIRV::AddressingModel::Logical; + if (auto MemModel = M.getNamedMetadata("spirv.MemoryModel")) { + auto MemMD = MemModel->getOperand(0); + MAI.Addr = static_cast<SPIRV::AddressingModel>(getMetadataUInt(MemMD, 0)); + MAI.Mem = static_cast<SPIRV::MemoryModel>(getMetadataUInt(MemMD, 1)); + } else { + MAI.Mem = SPIRV::MemoryModel::OpenCL; + unsigned PtrSize = ST->getPointerSize(); + MAI.Addr = PtrSize == 32 ? SPIRV::AddressingModel::Physical32 + : PtrSize == 64 ? SPIRV::AddressingModel::Physical64 + : SPIRV::AddressingModel::Logical; + } // Get the OpenCL version number from metadata. // TODO: support other source languages. - MAI.SrcLangVersion = 0; if (auto VerNode = M.getNamedMetadata("opencl.ocl.version")) { - // Construct version literal according to OpenCL 2.2 environment spec. + MAI.SrcLang = SPIRV::SourceLanguage::OpenCL_C; + // Construct version literal in accordance with SPIRV-LLVM-Translator. + // TODO: support multiple OCL version metadata. + assert(VerNode->getNumOperands() > 0 && "Invalid SPIR"); auto VersionMD = VerNode->getOperand(0); unsigned MajorNum = getMetadataUInt(VersionMD, 0, 2); unsigned MinorNum = getMetadataUInt(VersionMD, 1); unsigned RevNum = getMetadataUInt(VersionMD, 2); - MAI.SrcLangVersion = 0 | (MajorNum << 16) | (MinorNum << 8) | RevNum; + MAI.SrcLangVersion = (MajorNum * 100 + MinorNum) * 1000 + RevNum; + } else { + MAI.SrcLang = SPIRV::SourceLanguage::Unknown; + MAI.SrcLangVersion = 0; } -} -// True if there is an instruction in the MS list with all the same operands as -// the given instruction has (after the given starting index). -// TODO: maybe it needs to check Opcodes too. -static bool findSameInstrInMS(const MachineInstr &A, - SPIRV::ModuleSectionType MSType, - SPIRV::ModuleAnalysisInfo &MAI, - bool UpdateRegAliases, - unsigned StartOpIndex = 0) { - for (const auto *B : MAI.MS[MSType]) { - const unsigned NumAOps = A.getNumOperands(); - if (NumAOps == B->getNumOperands() && A.getNumDefs() == B->getNumDefs()) { - bool AllOpsMatch = true; - for (unsigned i = StartOpIndex; i < NumAOps && AllOpsMatch; ++i) { - if (A.getOperand(i).isReg() && B->getOperand(i).isReg()) { - Register RegA = A.getOperand(i).getReg(); - Register RegB = B->getOperand(i).getReg(); - AllOpsMatch = MAI.getRegisterAlias(A.getMF(), RegA) == - MAI.getRegisterAlias(B->getMF(), RegB); - } else { - AllOpsMatch = A.getOperand(i).isIdenticalTo(B->getOperand(i)); - } - } - if (AllOpsMatch) { - if (UpdateRegAliases) { - assert(A.getOperand(0).isReg() && B->getOperand(0).isReg()); - Register LocalReg = A.getOperand(0).getReg(); - Register GlobalReg = - MAI.getRegisterAlias(B->getMF(), B->getOperand(0).getReg()); - MAI.setRegisterAlias(A.getMF(), LocalReg, GlobalReg); - } - return true; - } + if (auto ExtNode = M.getNamedMetadata("opencl.used.extensions")) { + for (unsigned I = 0, E = ExtNode->getNumOperands(); I != E; ++I) { + MDNode *MD = ExtNode->getOperand(I); + if (!MD || MD->getNumOperands() == 0) + continue; + for (unsigned J = 0, N = MD->getNumOperands(); J != N; ++J) + MAI.SrcExt.insert(cast<MDString>(MD->getOperand(J))->getString()); } } - return false; + + // TODO: check if it's required by default. + MAI.ExtInstSetMap[static_cast<unsigned>(SPIRV::InstructionSet::OpenCL_std)] = + Register::index2VirtReg(MAI.getNextID()); } // Collect MI which defines the register in the given machine function. @@ -135,7 +123,7 @@ void SPIRVModuleAnalysis::collectGlobalEntities( const std::vector<SPIRV::DTSortableEntry *> &DepsGraph, SPIRV::ModuleSectionType MSType, std::function<bool(const SPIRV::DTSortableEntry *)> Pred, - bool UsePreOrder) { + bool UsePreOrder = false) { DenseSet<const SPIRV::DTSortableEntry *> Visited; for (const auto *E : DepsGraph) { std::function<void(const SPIRV::DTSortableEntry *)> RecHoistUtil; @@ -188,13 +176,41 @@ void SPIRVModuleAnalysis::processDefInstrs(const Module &M) { collectGlobalEntities( DepsGraph, SPIRV::MB_TypeConstVars, - [](const SPIRV::DTSortableEntry *E) { return !E->getIsFunc(); }, false); + [](const SPIRV::DTSortableEntry *E) { return !E->getIsFunc(); }); collectGlobalEntities( DepsGraph, SPIRV::MB_ExtFuncDecls, [](const SPIRV::DTSortableEntry *E) { return E->getIsFunc(); }, true); } +// True if there is an instruction in the MS list with all the same operands as +// the given instruction has (after the given starting index). +// TODO: maybe it needs to check Opcodes too. +static bool findSameInstrInMS(const MachineInstr &A, + SPIRV::ModuleSectionType MSType, + SPIRV::ModuleAnalysisInfo &MAI, + unsigned StartOpIndex = 0) { + for (const auto *B : MAI.MS[MSType]) { + const unsigned NumAOps = A.getNumOperands(); + if (NumAOps != B->getNumOperands() || A.getNumDefs() != B->getNumDefs()) + continue; + bool AllOpsMatch = true; + for (unsigned i = StartOpIndex; i < NumAOps && AllOpsMatch; ++i) { + if (A.getOperand(i).isReg() && B->getOperand(i).isReg()) { + Register RegA = A.getOperand(i).getReg(); + Register RegB = B->getOperand(i).getReg(); + AllOpsMatch = MAI.getRegisterAlias(A.getMF(), RegA) == + MAI.getRegisterAlias(B->getMF(), RegB); + } else { + AllOpsMatch = A.getOperand(i).isIdenticalTo(B->getOperand(i)); + } + } + if (AllOpsMatch) + return true; + } + return false; +} + // Look for IDs declared with Import linkage, and map the imported name string // to the register defining that variable (which will usually be the result of // an OpFunction). This lets us call externally imported functions using @@ -228,12 +244,16 @@ void SPIRVModuleAnalysis::collectFuncNames(MachineInstr &MI, // numbering has already occurred by this point. We can directly compare reg // arguments when detecting duplicates. static void collectOtherInstr(MachineInstr &MI, SPIRV::ModuleAnalysisInfo &MAI, - SPIRV::ModuleSectionType MSType) { + SPIRV::ModuleSectionType MSType, + bool Append = true) { MAI.setSkipEmission(&MI); - if (findSameInstrInMS(MI, MSType, MAI, false)) + if (findSameInstrInMS(MI, MSType, MAI)) return; // Found a duplicate, so don't add it. // No duplicates, so add it. - MAI.MS[MSType].push_back(&MI); + if (Append) + MAI.MS[MSType].push_back(&MI); + else + MAI.MS[MSType].insert(MAI.MS[MSType].begin(), &MI); } // Some global instructions make reference to function-local ID regs, so cannot @@ -256,15 +276,22 @@ void SPIRVModuleAnalysis::processOtherInstrs(const Module &M) { } else if (TII->isDecorationInstr(MI)) { collectOtherInstr(MI, MAI, SPIRV::MB_Annotations); collectFuncNames(MI, *F); + } else if (TII->isConstantInstr(MI)) { + // Now OpSpecConstant*s are not in DT, + // but they need to be collected anyway. + collectOtherInstr(MI, MAI, SPIRV::MB_TypeConstVars); } else if (OpCode == SPIRV::OpFunction) { collectFuncNames(MI, *F); + } else if (OpCode == SPIRV::OpTypeForwardPointer) { + collectOtherInstr(MI, MAI, SPIRV::MB_TypeConstVars, false); } } } } // Number registers in all functions globally from 0 onwards and store -// the result in global register alias table. +// the result in global register alias table. Some registers are already +// numbered in collectGlobalEntities. void SPIRVModuleAnalysis::numberRegistersGlobally(const Module &M) { for (auto F = M.begin(), E = M.end(); F != E; ++F) { if ((*F).isDeclaration()) @@ -282,11 +309,50 @@ void SPIRVModuleAnalysis::numberRegistersGlobally(const Module &M) { Register NewReg = Register::index2VirtReg(MAI.getNextID()); MAI.setRegisterAlias(MF, Reg, NewReg); } + if (MI.getOpcode() != SPIRV::OpExtInst) + continue; + auto Set = MI.getOperand(2).getImm(); + if (MAI.ExtInstSetMap.find(Set) == MAI.ExtInstSetMap.end()) + MAI.ExtInstSetMap[Set] = Register::index2VirtReg(MAI.getNextID()); } } } } +// Find OpIEqual and OpBranchConditional instructions originating from +// OpSwitches, mark them skipped for emission. Also mark MBB skipped if it +// contains only these instructions. +static void processSwitches(const Module &M, SPIRV::ModuleAnalysisInfo &MAI, + MachineModuleInfo *MMI) { + DenseSet<Register> SwitchRegs; + for (auto F = M.begin(), E = M.end(); F != E; ++F) { + MachineFunction *MF = MMI->getMachineFunction(*F); + if (!MF) + continue; + for (MachineBasicBlock &MBB : *MF) + for (MachineInstr &MI : MBB) { + if (MAI.getSkipEmission(&MI)) + continue; + if (MI.getOpcode() == SPIRV::OpSwitch) { + assert(MI.getOperand(0).isReg()); + SwitchRegs.insert(MI.getOperand(0).getReg()); + } + if (MI.getOpcode() != SPIRV::OpIEqual || !MI.getOperand(2).isReg() || + !SwitchRegs.contains(MI.getOperand(2).getReg())) + continue; + Register CmpReg = MI.getOperand(0).getReg(); + MachineInstr *CBr = MI.getNextNode(); + assert(CBr && CBr->getOpcode() == SPIRV::OpBranchConditional && + CBr->getOperand(0).isReg() && + CBr->getOperand(0).getReg() == CmpReg); + MAI.setSkipEmission(&MI); + MAI.setSkipEmission(CBr); + if (&MBB.front() == &MI && &MBB.back() == CBr) + MAI.MBBsToSkip.insert(&MBB); + } + } +} + struct SPIRV::ModuleAnalysisInfo SPIRVModuleAnalysis::MAI; void SPIRVModuleAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { @@ -305,7 +371,9 @@ bool SPIRVModuleAnalysis::runOnModule(Module &M) { setBaseInfo(M); - // TODO: Process type/const/global var/func decl instructions, number their + processSwitches(M, MAI, MMI); + + // Process type/const/global var/func decl instructions, number their // destination registers from 0 to N, collect Extensions and Capabilities. processDefInstrs(M); diff --git a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h index 585868909d28..9bcdf6e9ae2a 100644 --- a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h +++ b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h @@ -52,6 +52,9 @@ struct ModuleAnalysisInfo { SPIRV::AddressingModel Addr; SPIRV::SourceLanguage SrcLang; unsigned SrcLangVersion; + StringSet<> SrcExt; + // Maps ExtInstSet to corresponding ID register. + DenseMap<unsigned, Register> ExtInstSetMap; // Contains the list of all global OpVariables in the module. SmallVector<MachineInstr *, 4> GlobalVarList; // Maps function names to coresponding function ID registers. @@ -59,6 +62,9 @@ struct ModuleAnalysisInfo { // The set contains machine instructions which are necessary // for correct MIR but will not be emitted in function bodies. DenseSet<MachineInstr *> InstrsToDelete; + // The set contains machine basic blocks which are necessary + // for correct MIR but will not be emitted. + DenseSet<MachineBasicBlock *> MBBsToSkip; // The table contains global aliases of local registers for each machine // function. The aliases are used to substitute local registers during // code emission. @@ -75,6 +81,7 @@ struct ModuleAnalysisInfo { assert(FuncReg != FuncNameMap.end() && "Cannot find function Id"); return FuncReg->second; } + Register getExtInstSetReg(unsigned SetNum) { return ExtInstSetMap[SetNum]; } InstrList &getMSInstrs(unsigned MSType) { return MS[MSType]; } void setSkipEmission(MachineInstr *MI) { InstrsToDelete.insert(MI); } bool getSkipEmission(const MachineInstr *MI) { @@ -123,7 +130,6 @@ public: private: void setBaseInfo(const Module &M); - template <typename T> void collectTypesConstsVars(); void collectGlobalEntities( const std::vector<SPIRV::DTSortableEntry *> &DepsGraph, SPIRV::ModuleSectionType MSType, diff --git a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp index 687f84046650..e620226dcc7a 100644 --- a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp +++ b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp @@ -39,11 +39,58 @@ public: }; } // namespace -static bool isSpvIntrinsic(MachineInstr &MI, Intrinsic::ID IntrinsicID) { - if (MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS && - MI.getIntrinsicID() == IntrinsicID) - return true; - return false; +static void addConstantsToTrack(MachineFunction &MF, SPIRVGlobalRegistry *GR) { + MachineRegisterInfo &MRI = MF.getRegInfo(); + DenseMap<MachineInstr *, Register> RegsAlreadyAddedToDT; + SmallVector<MachineInstr *, 10> ToErase, ToEraseComposites; + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { + if (!isSpvIntrinsic(MI, Intrinsic::spv_track_constant)) + continue; + ToErase.push_back(&MI); + auto *Const = + cast<Constant>(cast<ConstantAsMetadata>( + MI.getOperand(3).getMetadata()->getOperand(0)) + ->getValue()); + if (auto *GV = dyn_cast<GlobalValue>(Const)) { + Register Reg = GR->find(GV, &MF); + if (!Reg.isValid()) + GR->add(GV, &MF, MI.getOperand(2).getReg()); + else + RegsAlreadyAddedToDT[&MI] = Reg; + } else { + Register Reg = GR->find(Const, &MF); + if (!Reg.isValid()) { + if (auto *ConstVec = dyn_cast<ConstantDataVector>(Const)) { + auto *BuildVec = MRI.getVRegDef(MI.getOperand(2).getReg()); + assert(BuildVec && + BuildVec->getOpcode() == TargetOpcode::G_BUILD_VECTOR); + for (unsigned i = 0; i < ConstVec->getNumElements(); ++i) + GR->add(ConstVec->getElementAsConstant(i), &MF, + BuildVec->getOperand(1 + i).getReg()); + } + GR->add(Const, &MF, MI.getOperand(2).getReg()); + } else { + RegsAlreadyAddedToDT[&MI] = Reg; + // This MI is unused and will be removed. If the MI uses + // const_composite, it will be unused and should be removed too. + assert(MI.getOperand(2).isReg() && "Reg operand is expected"); + MachineInstr *SrcMI = MRI.getVRegDef(MI.getOperand(2).getReg()); + if (SrcMI && isSpvIntrinsic(*SrcMI, Intrinsic::spv_const_composite)) + ToEraseComposites.push_back(SrcMI); + } + } + } + } + for (MachineInstr *MI : ToErase) { + Register Reg = MI->getOperand(2).getReg(); + if (RegsAlreadyAddedToDT.find(MI) != RegsAlreadyAddedToDT.end()) + Reg = RegsAlreadyAddedToDT[MI]; + MRI.replaceRegWith(MI->getOperand(0).getReg(), Reg); + MI->eraseFromParent(); + } + for (MachineInstr *MI : ToEraseComposites) + MI->eraseFromParent(); } static void foldConstantsIntoIntrinsics(MachineFunction &MF) { @@ -120,6 +167,7 @@ static SPIRVType *propagateSPIRVType(MachineInstr *MI, SPIRVGlobalRegistry *GR, } case TargetOpcode::G_TRUNC: case TargetOpcode::G_ADDRSPACE_CAST: + case TargetOpcode::G_PTR_ADD: case TargetOpcode::COPY: { MachineOperand &Op = MI->getOperand(1); MachineInstr *Def = Op.isReg() ? MRI.getVRegDef(Op.getReg()) : nullptr; @@ -308,6 +356,22 @@ static void processInstrsWithTypeFolding(MachineFunction &MF, processInstr(MI, MIB, MRI, GR); } } + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { + // We need to rewrite dst types for ASSIGN_TYPE instrs to be able + // to perform tblgen'erated selection and we can't do that on Legalizer + // as it operates on gMIR only. + if (MI.getOpcode() != SPIRV::ASSIGN_TYPE) + continue; + Register SrcReg = MI.getOperand(1).getReg(); + if (!isTypeFoldingSupported(MRI.getVRegDef(SrcReg)->getOpcode())) + continue; + Register DstReg = MI.getOperand(0).getReg(); + if (MRI.getType(DstReg).isVector()) + MRI.setRegClass(DstReg, &SPIRV::IDRegClass); + MRI.setType(DstReg, LLT::scalar(32)); + } + } } static void processSwitches(MachineFunction &MF, SPIRVGlobalRegistry *GR, @@ -421,6 +485,7 @@ bool SPIRVPreLegalizer::runOnMachineFunction(MachineFunction &MF) { SPIRVGlobalRegistry *GR = ST.getSPIRVGlobalRegistry(); GR->setCurrentFunc(MF); MachineIRBuilder MIB(MF); + addConstantsToTrack(MF, GR); foldConstantsIntoIntrinsics(MF); insertBitcasts(MF, GR, MIB); generateAssignInstrs(MF, GR, MIB); diff --git a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp new file mode 100644 index 000000000000..13c3c12c1b41 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp @@ -0,0 +1,288 @@ +//===-- SPIRVPrepareFunctions.cpp - modify function signatures --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass modifies function signatures containing aggregate arguments +// and/or return value. Also it substitutes some llvm intrinsic calls by +// function calls, generating these functions as the translator does. +// +// NOTE: this pass is a module-level one due to the necessity to modify +// GVs/functions. +// +//===----------------------------------------------------------------------===// + +#include "SPIRV.h" +#include "SPIRVTargetMachine.h" +#include "SPIRVUtils.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/LowerMemIntrinsics.h" + +using namespace llvm; + +namespace llvm { +void initializeSPIRVPrepareFunctionsPass(PassRegistry &); +} + +namespace { + +class SPIRVPrepareFunctions : public ModulePass { + Function *processFunctionSignature(Function *F); + +public: + static char ID; + SPIRVPrepareFunctions() : ModulePass(ID) { + initializeSPIRVPrepareFunctionsPass(*PassRegistry::getPassRegistry()); + } + + bool runOnModule(Module &M) override; + + StringRef getPassName() const override { return "SPIRV prepare functions"; } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + ModulePass::getAnalysisUsage(AU); + } +}; + +} // namespace + +char SPIRVPrepareFunctions::ID = 0; + +INITIALIZE_PASS(SPIRVPrepareFunctions, "prepare-functions", + "SPIRV prepare functions", false, false) + +Function *SPIRVPrepareFunctions::processFunctionSignature(Function *F) { + IRBuilder<> B(F->getContext()); + + bool IsRetAggr = F->getReturnType()->isAggregateType(); + bool HasAggrArg = + std::any_of(F->arg_begin(), F->arg_end(), [](Argument &Arg) { + return Arg.getType()->isAggregateType(); + }); + bool DoClone = IsRetAggr || HasAggrArg; + if (!DoClone) + return F; + SmallVector<std::pair<int, Type *>, 4> ChangedTypes; + Type *RetType = IsRetAggr ? B.getInt32Ty() : F->getReturnType(); + if (IsRetAggr) + ChangedTypes.push_back(std::pair<int, Type *>(-1, F->getReturnType())); + SmallVector<Type *, 4> ArgTypes; + for (const auto &Arg : F->args()) { + if (Arg.getType()->isAggregateType()) { + ArgTypes.push_back(B.getInt32Ty()); + ChangedTypes.push_back( + std::pair<int, Type *>(Arg.getArgNo(), Arg.getType())); + } else + ArgTypes.push_back(Arg.getType()); + } + FunctionType *NewFTy = + FunctionType::get(RetType, ArgTypes, F->getFunctionType()->isVarArg()); + Function *NewF = + Function::Create(NewFTy, F->getLinkage(), F->getName(), *F->getParent()); + + ValueToValueMapTy VMap; + auto NewFArgIt = NewF->arg_begin(); + for (auto &Arg : F->args()) { + StringRef ArgName = Arg.getName(); + NewFArgIt->setName(ArgName); + VMap[&Arg] = &(*NewFArgIt++); + } + SmallVector<ReturnInst *, 8> Returns; + + CloneFunctionInto(NewF, F, VMap, CloneFunctionChangeType::LocalChangesOnly, + Returns); + NewF->takeName(F); + + NamedMDNode *FuncMD = + F->getParent()->getOrInsertNamedMetadata("spv.cloned_funcs"); + SmallVector<Metadata *, 2> MDArgs; + MDArgs.push_back(MDString::get(B.getContext(), NewF->getName())); + for (auto &ChangedTyP : ChangedTypes) + MDArgs.push_back(MDNode::get( + B.getContext(), + {ConstantAsMetadata::get(B.getInt32(ChangedTyP.first)), + ValueAsMetadata::get(Constant::getNullValue(ChangedTyP.second))})); + MDNode *ThisFuncMD = MDNode::get(B.getContext(), MDArgs); + FuncMD->addOperand(ThisFuncMD); + + for (auto *U : make_early_inc_range(F->users())) { + if (auto *CI = dyn_cast<CallInst>(U)) + CI->mutateFunctionType(NewF->getFunctionType()); + U->replaceUsesOfWith(F, NewF); + } + return NewF; +} + +std::string lowerLLVMIntrinsicName(IntrinsicInst *II) { + Function *IntrinsicFunc = II->getCalledFunction(); + assert(IntrinsicFunc && "Missing function"); + std::string FuncName = IntrinsicFunc->getName().str(); + std::replace(FuncName.begin(), FuncName.end(), '.', '_'); + FuncName = "spirv." + FuncName; + return FuncName; +} + +static Function *getOrCreateFunction(Module *M, Type *RetTy, + ArrayRef<Type *> ArgTypes, + StringRef Name) { + FunctionType *FT = FunctionType::get(RetTy, ArgTypes, false); + Function *F = M->getFunction(Name); + if (F && F->getFunctionType() == FT) + return F; + Function *NewF = Function::Create(FT, GlobalValue::ExternalLinkage, Name, M); + if (F) + NewF->setDSOLocal(F->isDSOLocal()); + NewF->setCallingConv(CallingConv::SPIR_FUNC); + return NewF; +} + +static void lowerFunnelShifts(Module *M, IntrinsicInst *FSHIntrinsic) { + // Get a separate function - otherwise, we'd have to rework the CFG of the + // current one. Then simply replace the intrinsic uses with a call to the new + // function. + // Generate LLVM IR for i* @spirv.llvm_fsh?_i* (i* %a, i* %b, i* %c) + FunctionType *FSHFuncTy = FSHIntrinsic->getFunctionType(); + Type *FSHRetTy = FSHFuncTy->getReturnType(); + const std::string FuncName = lowerLLVMIntrinsicName(FSHIntrinsic); + Function *FSHFunc = + getOrCreateFunction(M, FSHRetTy, FSHFuncTy->params(), FuncName); + + if (!FSHFunc->empty()) { + FSHIntrinsic->setCalledFunction(FSHFunc); + return; + } + BasicBlock *RotateBB = BasicBlock::Create(M->getContext(), "rotate", FSHFunc); + IRBuilder<> IRB(RotateBB); + Type *Ty = FSHFunc->getReturnType(); + // Build the actual funnel shift rotate logic. + // In the comments, "int" is used interchangeably with "vector of int + // elements". + FixedVectorType *VectorTy = dyn_cast<FixedVectorType>(Ty); + Type *IntTy = VectorTy ? VectorTy->getElementType() : Ty; + unsigned BitWidth = IntTy->getIntegerBitWidth(); + ConstantInt *BitWidthConstant = IRB.getInt({BitWidth, BitWidth}); + Value *BitWidthForInsts = + VectorTy + ? IRB.CreateVectorSplat(VectorTy->getNumElements(), BitWidthConstant) + : BitWidthConstant; + Value *RotateModVal = + IRB.CreateURem(/*Rotate*/ FSHFunc->getArg(2), BitWidthForInsts); + Value *FirstShift = nullptr, *SecShift = nullptr; + if (FSHIntrinsic->getIntrinsicID() == Intrinsic::fshr) { + // Shift the less significant number right, the "rotate" number of bits + // will be 0-filled on the left as a result of this regular shift. + FirstShift = IRB.CreateLShr(FSHFunc->getArg(1), RotateModVal); + } else { + // Shift the more significant number left, the "rotate" number of bits + // will be 0-filled on the right as a result of this regular shift. + FirstShift = IRB.CreateShl(FSHFunc->getArg(0), RotateModVal); + } + // We want the "rotate" number of the more significant int's LSBs (MSBs) to + // occupy the leftmost (rightmost) "0 space" left by the previous operation. + // Therefore, subtract the "rotate" number from the integer bitsize... + Value *SubRotateVal = IRB.CreateSub(BitWidthForInsts, RotateModVal); + if (FSHIntrinsic->getIntrinsicID() == Intrinsic::fshr) { + // ...and left-shift the more significant int by this number, zero-filling + // the LSBs. + SecShift = IRB.CreateShl(FSHFunc->getArg(0), SubRotateVal); + } else { + // ...and right-shift the less significant int by this number, zero-filling + // the MSBs. + SecShift = IRB.CreateLShr(FSHFunc->getArg(1), SubRotateVal); + } + // A simple binary addition of the shifted ints yields the final result. + IRB.CreateRet(IRB.CreateOr(FirstShift, SecShift)); + + FSHIntrinsic->setCalledFunction(FSHFunc); +} + +static void buildUMulWithOverflowFunc(Module *M, Function *UMulFunc) { + // The function body is already created. + if (!UMulFunc->empty()) + return; + + BasicBlock *EntryBB = BasicBlock::Create(M->getContext(), "entry", UMulFunc); + IRBuilder<> IRB(EntryBB); + // Build the actual unsigned multiplication logic with the overflow + // indication. Do unsigned multiplication Mul = A * B. Then check + // if unsigned division Div = Mul / A is not equal to B. If so, + // then overflow has happened. + Value *Mul = IRB.CreateNUWMul(UMulFunc->getArg(0), UMulFunc->getArg(1)); + Value *Div = IRB.CreateUDiv(Mul, UMulFunc->getArg(0)); + Value *Overflow = IRB.CreateICmpNE(UMulFunc->getArg(0), Div); + + // umul.with.overflow intrinsic return a structure, where the first element + // is the multiplication result, and the second is an overflow bit. + Type *StructTy = UMulFunc->getReturnType(); + Value *Agg = IRB.CreateInsertValue(UndefValue::get(StructTy), Mul, {0}); + Value *Res = IRB.CreateInsertValue(Agg, Overflow, {1}); + IRB.CreateRet(Res); +} + +static void lowerUMulWithOverflow(Module *M, IntrinsicInst *UMulIntrinsic) { + // Get a separate function - otherwise, we'd have to rework the CFG of the + // current one. Then simply replace the intrinsic uses with a call to the new + // function. + FunctionType *UMulFuncTy = UMulIntrinsic->getFunctionType(); + Type *FSHLRetTy = UMulFuncTy->getReturnType(); + const std::string FuncName = lowerLLVMIntrinsicName(UMulIntrinsic); + Function *UMulFunc = + getOrCreateFunction(M, FSHLRetTy, UMulFuncTy->params(), FuncName); + buildUMulWithOverflowFunc(M, UMulFunc); + UMulIntrinsic->setCalledFunction(UMulFunc); +} + +static void substituteIntrinsicCalls(Module *M, Function *F) { + for (BasicBlock &BB : *F) { + for (Instruction &I : BB) { + auto Call = dyn_cast<CallInst>(&I); + if (!Call) + continue; + Call->setTailCall(false); + Function *CF = Call->getCalledFunction(); + if (!CF || !CF->isIntrinsic()) + continue; + auto *II = cast<IntrinsicInst>(Call); + if (II->getIntrinsicID() == Intrinsic::fshl || + II->getIntrinsicID() == Intrinsic::fshr) + lowerFunnelShifts(M, II); + else if (II->getIntrinsicID() == Intrinsic::umul_with_overflow) + lowerUMulWithOverflow(M, II); + } + } +} + +bool SPIRVPrepareFunctions::runOnModule(Module &M) { + for (Function &F : M) + substituteIntrinsicCalls(&M, &F); + + std::vector<Function *> FuncsWorklist; + bool Changed = false; + for (auto &F : M) + FuncsWorklist.push_back(&F); + + for (auto *Func : FuncsWorklist) { + Function *F = processFunctionSignature(Func); + + bool CreatedNewF = F != Func; + + if (Func->isDeclaration()) { + Changed |= CreatedNewF; + continue; + } + + if (CreatedNewF) + Func->eraseFromParent(); + } + + return Changed; +} + +ModulePass *llvm::createSPIRVPrepareFunctionsPass() { + return new SPIRVPrepareFunctions(); +} diff --git a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVSubtarget.cpp b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVSubtarget.cpp index cdf3a160f373..00549c7b5768 100644 --- a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVSubtarget.cpp +++ b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVSubtarget.cpp @@ -46,8 +46,7 @@ SPIRVSubtarget::SPIRVSubtarget(const Triple &TT, const std::string &CPU, PointerSize(computePointerSize(TT)), SPIRVVersion(0), InstrInfo(), FrameLowering(initSubtargetDependencies(CPU, FS)), TLInfo(TM, *this) { GR = std::make_unique<SPIRVGlobalRegistry>(PointerSize); - CallLoweringInfo = - std::make_unique<SPIRVCallLowering>(TLInfo, *this, GR.get()); + CallLoweringInfo = std::make_unique<SPIRVCallLowering>(TLInfo, GR.get()); Legalizer = std::make_unique<SPIRVLegalizerInfo>(*this); RegBankInfo = std::make_unique<SPIRVRegisterBankInfo>(); InstSelector.reset( diff --git a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp index f7c88a5c6d4a..7f5f14dc3ce8 100644 --- a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp +++ b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp @@ -140,7 +140,10 @@ TargetPassConfig *SPIRVTargetMachine::createPassConfig(PassManagerBase &PM) { return new SPIRVPassConfig(*this, PM); } -void SPIRVPassConfig::addIRPasses() { TargetPassConfig::addIRPasses(); } +void SPIRVPassConfig::addIRPasses() { + TargetPassConfig::addIRPasses(); + addPass(createSPIRVPrepareFunctionsPass()); +} void SPIRVPassConfig::addISelPrepare() { addPass(createSPIRVEmitIntrinsicsPass(&getTM<SPIRVTargetMachine>())); diff --git a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVUtils.cpp b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVUtils.cpp index b92dc12735f8..15671ef3e512 100644 --- a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVUtils.cpp +++ b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVUtils.cpp @@ -45,6 +45,14 @@ static size_t getPaddedLen(const StringRef &Str) { return (Len % 4 == 0) ? Len : Len + (4 - (Len % 4)); } +void addStringImm(const StringRef &Str, MCInst &Inst) { + const size_t PaddedLen = getPaddedLen(Str); + for (unsigned i = 0; i < PaddedLen; i += 4) { + // Add an operand for the 32-bits of chars or padding. + Inst.addOperand(MCOperand::createImm(convertCharsToWord(Str, i))); + } +} + void addStringImm(const StringRef &Str, MachineInstrBuilder &MIB) { const size_t PaddedLen = getPaddedLen(Str); for (unsigned i = 0; i < PaddedLen; i += 4) { @@ -182,6 +190,24 @@ SPIRV::MemorySemantics getMemSemanticsForStorageClass(SPIRV::StorageClass SC) { } } +SPIRV::MemorySemantics getMemSemantics(AtomicOrdering Ord) { + switch (Ord) { + case AtomicOrdering::Acquire: + return SPIRV::MemorySemantics::Acquire; + case AtomicOrdering::Release: + return SPIRV::MemorySemantics::Release; + case AtomicOrdering::AcquireRelease: + return SPIRV::MemorySemantics::AcquireRelease; + case AtomicOrdering::SequentiallyConsistent: + return SPIRV::MemorySemantics::SequentiallyConsistent; + case AtomicOrdering::Unordered: + case AtomicOrdering::Monotonic: + case AtomicOrdering::NotAtomic: + default: + return SPIRV::MemorySemantics::None; + } +} + MachineInstr *getDefInstrMaybeConstant(Register &ConstReg, const MachineRegisterInfo *MRI) { MachineInstr *ConstInstr = MRI->getVRegDef(ConstReg); @@ -202,6 +228,11 @@ uint64_t getIConstVal(Register ConstReg, const MachineRegisterInfo *MRI) { return MI->getOperand(1).getCImm()->getValue().getZExtValue(); } +bool isSpvIntrinsic(MachineInstr &MI, Intrinsic::ID IntrinsicID) { + return MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS && + MI.getIntrinsicID() == IntrinsicID; +} + Type *getMDOperandAsType(const MDNode *N, unsigned I) { return cast<ValueAsMetadata>(N->getOperand(I))->getType(); } diff --git a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVUtils.h b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVUtils.h index ffa82c9c1fe4..35e24b076570 100644 --- a/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVUtils.h +++ b/contrib/llvm-project/llvm/lib/Target/SPIRV/SPIRVUtils.h @@ -32,6 +32,7 @@ class SPIRVInstrInfo; // Add the given string as a series of integer operand, inserting null // terminators and padding to make sure the operands all have 32-bit // little-endian words. +void addStringImm(const llvm::StringRef &Str, llvm::MCInst &Inst); void addStringImm(const llvm::StringRef &Str, llvm::MachineInstrBuilder &MIB); void addStringImm(const llvm::StringRef &Str, llvm::IRBuilder<> &B, std::vector<llvm::Value *> &Args); @@ -67,6 +68,8 @@ llvm::SPIRV::StorageClass addressSpaceToStorageClass(unsigned AddrSpace); llvm::SPIRV::MemorySemantics getMemSemanticsForStorageClass(llvm::SPIRV::StorageClass SC); +llvm::SPIRV::MemorySemantics getMemSemantics(llvm::AtomicOrdering Ord); + // Find def instruction for the given ConstReg, walking through // spv_track_constant and ASSIGN_TYPE instructions. Updates ConstReg by def // of OpConstant instruction. @@ -78,6 +81,9 @@ getDefInstrMaybeConstant(llvm::Register &ConstReg, uint64_t getIConstVal(llvm::Register ConstReg, const llvm::MachineRegisterInfo *MRI); +// Check if MI is a SPIR-V specific intrinsic call. +bool isSpvIntrinsic(llvm::MachineInstr &MI, llvm::Intrinsic::ID IntrinsicID); + // Get type of i-th operand of the metadata node. llvm::Type *getMDOperandAsType(const llvm::MDNode *N, unsigned I); #endif // LLVM_LIB_TARGET_SPIRV_SPIRVUTILS_H diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp b/contrib/llvm-project/llvm/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp index 1138788ac7fa..1f8837eb0194 100644 --- a/contrib/llvm-project/llvm/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp @@ -24,10 +24,10 @@ Target &llvm::getTheSparcelTarget() { } extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSparcTargetInfo() { - RegisterTarget<Triple::sparc, /*HasJIT=*/true> X(getTheSparcTarget(), "sparc", - "Sparc", "Sparc"); - RegisterTarget<Triple::sparcv9, /*HasJIT=*/true> Y( + RegisterTarget<Triple::sparc, /*HasJIT=*/false> X(getTheSparcTarget(), + "sparc", "Sparc", "Sparc"); + RegisterTarget<Triple::sparcv9, /*HasJIT=*/false> Y( getTheSparcV9Target(), "sparcv9", "Sparc V9", "Sparc"); - RegisterTarget<Triple::sparcel, /*HasJIT=*/true> Z( + RegisterTarget<Triple::sparcel, /*HasJIT=*/false> Z( getTheSparcelTarget(), "sparcel", "Sparc LE", "Sparc"); } diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZCallingConv.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZCallingConv.cpp index 9c73757d7f5c..86eb8365d527 100644 --- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZCallingConv.cpp +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZCallingConv.cpp @@ -28,7 +28,3 @@ const MCPhysReg SystemZ::XPLINK64ArgGPRs[SystemZ::XPLINK64NumArgGPRs] = { const MCPhysReg SystemZ::XPLINK64ArgFPRs[SystemZ::XPLINK64NumArgFPRs] = { SystemZ::F0D, SystemZ::F2D, SystemZ::F4D, SystemZ::F6D }; - -const MCPhysReg SystemZ::XPLINK64ArgVRs[SystemZ::XPLINK64NumArgVRs] = { - SystemZ::V24, SystemZ::V25, SystemZ::V26, SystemZ::V27, - SystemZ::V28, SystemZ::V29, SystemZ::V30, SystemZ::V31}; diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZCallingConv.h b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZCallingConv.h index f82c61c0f344..387411942aba 100644 --- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZCallingConv.h +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZCallingConv.h @@ -27,9 +27,6 @@ namespace SystemZ { const unsigned XPLINK64NumArgFPRs = 4; extern const MCPhysReg XPLINK64ArgFPRs[XPLINK64NumArgFPRs]; - - const unsigned XPLINK64NumArgVRs = 8; - extern const MCPhysReg XPLINK64ArgVRs[XPLINK64NumArgVRs]; } // end namespace SystemZ class SystemZCCState : public CCState { @@ -205,41 +202,6 @@ inline bool CC_XPLINK64_Allocate128BitVararg(unsigned &ValNo, MVT &ValVT, return false; } -inline bool CC_XPLINK64_Shadow_Stack(unsigned &ValNo, MVT &ValVT, MVT &LocVT, - CCValAssign::LocInfo &LocInfo, - ISD::ArgFlagsTy &ArgFlags, - CCState &State) { - ArrayRef<MCPhysReg> RegList; - - switch (LocVT.SimpleTy) { - case MVT::i64: - RegList = SystemZ::XPLINK64ArgGPRs; - break; - case MVT::v16i8: - case MVT::v8i16: - case MVT::v4i32: - case MVT::v2i64: - case MVT::v4f32: - case MVT::v2f64: - RegList = SystemZ::XPLINK64ArgVRs; - break; - case MVT::f32: - case MVT::f64: - case MVT::f128: - RegList = SystemZ::XPLINK64ArgFPRs; - break; - default: - return false; - } - - unsigned UnallocatedRegisterIndex = State.getFirstUnallocated(RegList); - // Every time we can allocate a register, allocate on the stack. - if (UnallocatedRegisterIndex < RegList.size()) - State.AllocateStack(LocVT.getSizeInBits() / 8, Align(8)); - - return false; -} - inline bool RetCC_SystemZ_Error(unsigned &, MVT &, MVT &, CCValAssign::LocInfo &, ISD::ArgFlagsTy &, CCState &) { diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZCallingConv.td b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZCallingConv.td index fdd82a01f211..29b4a26736b2 100644 --- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZCallingConv.td +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZCallingConv.td @@ -221,9 +221,10 @@ def CC_SystemZ_XPLINK64 : CallingConv<[ // XPLINK64 ABI compliant code widens integral types smaller than i64 // to i64 before placing the parameters either on the stack or in registers. CCIfType<[i32], CCIfExtend<CCPromoteToType<i64>>>, - // Promote f32 to f64 and bitcast to i64, if it needs to be passed in GPRS. - CCIfType<[f32], CCIfNotFixed<CCPromoteToType<f64>>>, - CCIfType<[f64], CCIfNotFixed<CCBitConvertToType<i64>>>, + // Promote f32 to f64 and bitcast to i64, if it needs to be passed in GPRs. + // Although we assign the f32 vararg to be bitcast, it will first be promoted + // to an f64 within convertValVTToLocVT(). + CCIfType<[f32, f64], CCIfNotFixed<CCBitConvertToType<i64>>>, // long double, can only be passed in GPR2 and GPR3, if available, // hence R2Q CCIfType<[f128], CCIfNotFixed<CCCustom<"CC_XPLINK64_Allocate128BitVararg">>>, @@ -246,34 +247,29 @@ def CC_SystemZ_XPLINK64 : CallingConv<[ // The first 3 integer arguments are passed in registers R1D-R3D. // The rest will be passed in the user area. The address offset of the user // area can be found in register R4D. - CCIfType<[i64], CCCustom<"CC_XPLINK64_Shadow_Stack">>, - CCIfType<[i64], CCAssignToReg<[R1D, R2D, R3D]>>, + CCIfType<[i64], CCAssignToRegAndStack<[R1D, R2D, R3D], 8, 8>>, - // The first 8 named vector arguments are passed in V24-V31. Sub-128 vectors + // The first 8 named vector arguments are passed in V24-V31. Sub-128 vectors // are passed in the same way, but they're widened to one of these types // during type legalization. CCIfSubtarget<"hasVector()", CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], - CCIfFixed<CCCustom<"CC_XPLINK64_Shadow_Reg">>>>, - CCIfSubtarget<"hasVector()", - CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], - CCIfFixed<CCCustom<"CC_XPLINK64_Shadow_Stack">>>>, + CCIfFixed<CCCustom<"CC_XPLINK64_Shadow_Reg">>>>, CCIfSubtarget<"hasVector()", CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], - CCIfFixed<CCAssignToReg<[V24, V25, V26, V27, - V28, V29, V30, V31]>>>>, + CCIfFixed<CCAssignToRegAndStack<[V24, V25, V26, V27, + V28, V29, V30, V31], 16, 8>>>>, - // The first 4 named float and double arguments are passed in registers FPR0-FPR6. - // The rest will be passed in the user area. + // The first 4 named float and double arguments are passed in registers + // FPR0-FPR6. The rest will be passed in the user area. CCIfType<[f32, f64], CCIfFixed<CCCustom<"CC_XPLINK64_Shadow_Reg">>>, - CCIfType<[f32, f64], CCIfFixed<CCCustom<"CC_XPLINK64_Shadow_Stack">>>, - CCIfType<[f32], CCIfFixed<CCAssignToReg<[F0S, F2S, F4S, F6S]>>>, - CCIfType<[f64], CCIfFixed<CCAssignToReg<[F0D, F2D, F4D, F6D]>>>, + CCIfType<[f32], CCIfFixed<CCAssignToRegAndStack<[F0S, F2S, F4S, F6S], 4, 8>>>, + CCIfType<[f64], CCIfFixed<CCAssignToRegAndStack<[F0D, F2D, F4D, F6D], 8, 8>>>, + // The first 2 long double arguments are passed in register FPR0/FPR2 // and FPR4/FPR6. The rest will be passed in the user area. CCIfType<[f128], CCIfFixed<CCCustom<"CC_XPLINK64_Shadow_Reg">>>, - CCIfType<[f128], CCIfFixed<CCCustom<"CC_XPLINK64_Shadow_Stack">>>, - CCIfType<[f128], CCIfFixed<CCAssignToReg<[F0Q, F4Q]>>>, + CCIfType<[f128], CCIfFixed<CCAssignToRegAndStack<[F0Q, F4Q], 16, 8>>>, // Other arguments are passed in 8-byte-aligned 8-byte stack slots. CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>, diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp index 975eb8862e82..d943507b4112 100644 --- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp @@ -911,6 +911,54 @@ SystemZXPLINKFrameLowering::SystemZXPLINKFrameLowering() XPLINKSpillOffsetTable[I].Offset; } +// Checks if the function is a potential candidate for being a XPLeaf routine. +static bool isXPLeafCandidate(const MachineFunction &MF) { + const MachineFrameInfo &MFFrame = MF.getFrameInfo(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); + const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>(); + auto *Regs = + static_cast<SystemZXPLINK64Registers *>(Subtarget.getSpecialRegisters()); + + // If function calls other functions including alloca, then it is not a XPLeaf + // routine. + if (MFFrame.hasCalls()) + return false; + + // If the function has var Sized Objects, then it is not a XPLeaf routine. + if (MFFrame.hasVarSizedObjects()) + return false; + + // If the function adjusts the stack, then it is not a XPLeaf routine. + if (MFFrame.adjustsStack()) + return false; + + // If function modifies the stack pointer register, then it is not a XPLeaf + // routine. + if (MRI.isPhysRegModified(Regs->getStackPointerRegister())) + return false; + + // If function modifies the ADA register, then it is not a XPLeaf routine. + if (MRI.isPhysRegModified(Regs->getAddressOfCalleeRegister())) + return false; + + // If function modifies the return address register, then it is not a XPLeaf + // routine. + if (MRI.isPhysRegModified(Regs->getReturnFunctionAddressRegister())) + return false; + + // If the backchain pointer should be stored, then it is not a XPLeaf routine. + if (MF.getFunction().hasFnAttribute("backchain")) + return false; + + // If function acquires its own stack frame, then it is not a XPLeaf routine. + // At the time this function is called, only slots for local variables are + // allocated, so this is a very rough estimate. + if (MFFrame.estimateStackSize(MF) > 0) + return false; + + return true; +} + bool SystemZXPLINKFrameLowering::assignCalleeSavedSpillSlots( MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector<CalleeSavedInfo> &CSI) const { @@ -920,6 +968,18 @@ bool SystemZXPLINKFrameLowering::assignCalleeSavedSpillSlots( auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>(); auto &GRRegClass = SystemZ::GR64BitRegClass; + // At this point, the result of isXPLeafCandidate() is not accurate because + // the size of the save area has not yet been determined. If + // isXPLeafCandidate() indicates a potential leaf function, and there are no + // callee-save registers, then it is indeed a leaf function, and we can early + // exit. + // TODO: It is possible for leaf functions to use callee-saved registers. + // It can use the 0-2k range between R4 and the caller's stack frame without + // acquiring its own stack frame. + bool IsLeaf = CSI.empty() && isXPLeafCandidate(MF); + if (IsLeaf) + return true; + // For non-leaf functions: // - the address of callee (entry point) register R6 must be saved CSI.push_back(CalleeSavedInfo(Regs.getAddressOfCalleeRegister())); @@ -1137,16 +1197,16 @@ void SystemZXPLINKFrameLowering::emitPrologue(MachineFunction &MF, auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>(); MachineFrameInfo &MFFrame = MF.getFrameInfo(); MachineInstr *StoreInstr = nullptr; + + determineFrameLayout(MF); + bool HasFP = hasFP(MF); // Debug location must be unknown since the first debug location is used // to determine the end of the prologue. DebugLoc DL; uint64_t Offset = 0; - // TODO: Support leaf functions; only add size of save+reserved area when - // function is non-leaf. - MFFrame.setStackSize(MFFrame.getStackSize() + Regs.getCallFrameSize()); - uint64_t StackSize = MFFrame.getStackSize(); + const uint64_t StackSize = MFFrame.getStackSize(); if (ZFI->getSpillGPRRegs().LowGPR) { // Skip over the GPR saves. @@ -1213,8 +1273,8 @@ void SystemZXPLINKFrameLowering::emitPrologue(MachineFunction &MF, // Mark the FramePtr as live at the beginning of every block except // the entry block. (We'll have marked R8 as live on entry when // saving the GPRs.) - for (auto I = std::next(MF.begin()), E = MF.end(); I != E; ++I) - I->addLiveIn(Regs.getFramePointerRegister()); + for (MachineBasicBlock &B : llvm::drop_begin(MF)) + B.addLiveIn(Regs.getFramePointerRegister()); } } @@ -1321,3 +1381,32 @@ void SystemZXPLINKFrameLowering::processFunctionBeforeFrameFinalized( // Setup stack frame offset MFFrame.setOffsetAdjustment(Regs.getStackPointerBias()); } + +// Determines the size of the frame, and creates the deferred spill objects. +void SystemZXPLINKFrameLowering::determineFrameLayout( + MachineFunction &MF) const { + MachineFrameInfo &MFFrame = MF.getFrameInfo(); + const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>(); + auto *Regs = + static_cast<SystemZXPLINK64Registers *>(Subtarget.getSpecialRegisters()); + + uint64_t StackSize = MFFrame.getStackSize(); + if (StackSize == 0) + return; + + // Add the size of the register save area and the reserved area to the size. + StackSize += Regs->getCallFrameSize(); + MFFrame.setStackSize(StackSize); + + // We now know the stack size. Create the fixed spill stack objects for the + // register save area now. This has no impact on the stack frame layout, as + // this is already computed. However, it makes sure that all callee saved + // registers have a valid frame index assigned. + const unsigned RegSize = MF.getDataLayout().getPointerSize(); + for (auto &CS : MFFrame.getCalleeSavedInfo()) { + int Offset = RegSpillOffsets[CS.getReg()]; + if (Offset >= 0) + CS.setFrameIdx( + MFFrame.CreateFixedSpillStackObject(RegSize, Offset - StackSize)); + } +} diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFrameLowering.h b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFrameLowering.h index bec83a9457e0..95f30e3c0d99 100644 --- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFrameLowering.h +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFrameLowering.h @@ -134,6 +134,8 @@ public: void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS) const override; + + void determineFrameLayout(MachineFunction &MF) const; }; } // end namespace llvm diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index 42c1c77f14e4..ac4531262187 100644 --- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -1404,8 +1404,12 @@ static SDValue convertValVTToLocVT(SelectionDAG &DAG, const SDLoc &DL, return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value); case CCValAssign::BCvt: { assert(VA.getLocVT() == MVT::i64 || VA.getLocVT() == MVT::i128); - assert(VA.getValVT().isVector() || VA.getValVT() == MVT::f64 || - VA.getValVT() == MVT::f128); + assert(VA.getValVT().isVector() || VA.getValVT() == MVT::f32 || + VA.getValVT() == MVT::f64 || VA.getValVT() == MVT::f128); + // For an f32 vararg we need to first promote it to an f64 and then + // bitcast it to an i64. + if (VA.getValVT() == MVT::f32 && VA.getLocVT() == MVT::i64) + Value = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, Value); MVT BitCastToType = VA.getValVT().isVector() && VA.getLocVT() == MVT::i64 ? MVT::v2i64 : VA.getLocVT(); diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.cpp index 94ebb59c4c77..46bb85606a62 100644 --- a/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.cpp @@ -418,7 +418,9 @@ unsigned VEInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, if (MI.getOpcode() == VE::LDrii || // I64 MI.getOpcode() == VE::LDLSXrii || // I32 MI.getOpcode() == VE::LDUrii || // F32 - MI.getOpcode() == VE::LDQrii // F128 (pseudo) + MI.getOpcode() == VE::LDQrii || // F128 (pseudo) + MI.getOpcode() == VE::LDVMrii || // VM (pseudo) + MI.getOpcode() == VE::LDVM512rii // VM512 (pseudo) ) { if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0 && MI.getOperand(3).isImm() && @@ -437,10 +439,12 @@ unsigned VEInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, /// any side effects other than storing to the stack slot. unsigned VEInstrInfo::isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const { - if (MI.getOpcode() == VE::STrii || // I64 - MI.getOpcode() == VE::STLrii || // I32 - MI.getOpcode() == VE::STUrii || // F32 - MI.getOpcode() == VE::STQrii // F128 (pseudo) + if (MI.getOpcode() == VE::STrii || // I64 + MI.getOpcode() == VE::STLrii || // I32 + MI.getOpcode() == VE::STUrii || // F32 + MI.getOpcode() == VE::STQrii || // F128 (pseudo) + MI.getOpcode() == VE::STVMrii || // VM (pseudo) + MI.getOpcode() == VE::STVM512rii // VM512 (pseudo) ) { if (MI.getOperand(0).isFI() && MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0 && MI.getOperand(2).isImm() && @@ -496,6 +500,20 @@ void VEInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, .addImm(0) .addReg(SrcReg, getKillRegState(isKill)) .addMemOperand(MMO); + } else if (RC == &VE::VMRegClass) { + BuildMI(MBB, I, DL, get(VE::STVMrii)) + .addFrameIndex(FI) + .addImm(0) + .addImm(0) + .addReg(SrcReg, getKillRegState(isKill)) + .addMemOperand(MMO); + } else if (VE::VM512RegClass.hasSubClassEq(RC)) { + BuildMI(MBB, I, DL, get(VE::STVM512rii)) + .addFrameIndex(FI) + .addImm(0) + .addImm(0) + .addReg(SrcReg, getKillRegState(isKill)) + .addMemOperand(MMO); } else report_fatal_error("Can't store this register to stack slot"); } @@ -539,6 +557,18 @@ void VEInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, .addImm(0) .addImm(0) .addMemOperand(MMO); + } else if (RC == &VE::VMRegClass) { + BuildMI(MBB, I, DL, get(VE::LDVMrii), DestReg) + .addFrameIndex(FI) + .addImm(0) + .addImm(0) + .addMemOperand(MMO); + } else if (VE::VM512RegClass.hasSubClassEq(RC)) { + BuildMI(MBB, I, DL, get(VE::LDVM512rii), DestReg) + .addFrameIndex(FI) + .addImm(0) + .addImm(0) + .addMemOperand(MMO); } else report_fatal_error("Can't load this register from stack slot"); } diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEInstrPatternsVec.td b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrPatternsVec.td index 71199717a3a2..0b2f5039e3f3 100644 --- a/contrib/llvm-project/llvm/lib/Target/VE/VEInstrPatternsVec.td +++ b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrPatternsVec.td @@ -25,6 +25,20 @@ def: Pat<(i64 (repl_i32 i32:$val)), (zero_f32 (i2l $val)), (SLLri (i2l $val), 32))>; +///// Mask Load & Store ///// + +// Store for v256i1, v512i1 are implemented in 2 ways. These STVM/STVM512 +// pseudo instruction is used for frameindex related load/store instructions. +// Custom Lowering is used for other load/store instructions. + +def : Pat<(v256i1 (load ADDRrii:$addr)), + (LDVMrii ADDRrii:$addr)>; +def : Pat<(v512i1 (load ADDRrii:$addr)), + (LDVM512rii ADDRrii:$addr)>; +def : Pat<(store v256i1:$vx, ADDRrii:$addr), + (STVMrii ADDRrii:$addr, $vx)>; +def : Pat<(store v512i1:$vx, ADDRrii:$addr), + (STVM512rii ADDRrii:$addr, $vx)>; multiclass vbrd_elem32<ValueType v32, ValueType s32, SDPatternOperator ImmOp, SDNodeXForm ImmCast, OutPatFrag SuperRegCast> { diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEInstrVec.td b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrVec.td index 4a8476f7288a..327ad9ceacc5 100644 --- a/contrib/llvm-project/llvm/lib/Target/VE/VEInstrVec.td +++ b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrVec.td @@ -2,6 +2,33 @@ // Vector Instructions //===----------------------------------------------------------------------===// +// Pseudo instructions for VM/VM512 spill/restore +// +// These pseudo instructions are used for only spill/restore since +// InlineSpiller assumes storeRegToStackSlot/loadRegFromStackSlot +// functions emit only single instruction. Those functions emit a +// single store/load instruction or one of these pseudo store/load +// instructions. +// +// Specifies hasSideEffects = 0 to disable UnmodeledSideEffects. + +let mayLoad = 1, hasSideEffects = 0 in { +def LDVMrii : Pseudo< + (outs VM:$vmx), (ins MEMrii:$addr), + "# pseudo ldvm $vmx, $addr", []>; +def LDVM512rii : Pseudo< + (outs VM512:$vmx), (ins MEMrii:$addr), + "# pseudo ldvm512 $vmx, $addr", []>; +} +let mayStore = 1, hasSideEffects = 0 in { +def STVMrii : Pseudo< + (outs), (ins MEMrii:$addr, VM:$vmx), + "# pseudo stvm $addr, $vmx", []>; +def STVM512rii : Pseudo< + (outs), (ins MEMrii:$addr, VM512:$vmx), + "# pseudo stvm512 $addr, $vmx", []>; +} + //===----------------------------------------------------------------------===// // Pseudo instructions for VM512 modifications //===----------------------------------------------------------------------===// diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.cpp b/contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.cpp index f334af128162..397ea09c9a02 100644 --- a/contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/VE/VERegisterInfo.cpp @@ -180,6 +180,16 @@ class EliminateFrameIndex { int FIOperandNum); void processLDQ(MachineInstr &MI, Register FrameReg, int64_t Offset, int FIOperandNum); + // Expand and eliminate Frame Index of pseudo STVMrii and LDVMrii. + void processSTVM(MachineInstr &MI, Register FrameReg, int64_t Offset, + int FIOperandNum); + void processLDVM(MachineInstr &MI, Register FrameReg, int64_t Offset, + int FIOperandNum); + // Expand and eliminate Frame Index of pseudo STVM512rii and LDVM512rii. + void processSTVM512(MachineInstr &MI, Register FrameReg, int64_t Offset, + int FIOperandNum); + void processLDVM512(MachineInstr &MI, Register FrameReg, int64_t Offset, + int FIOperandNum); public: EliminateFrameIndex(const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, @@ -271,6 +281,185 @@ void EliminateFrameIndex::processLDQ(MachineInstr &MI, Register FrameReg, replaceFI(MI, FrameReg, Offset, FIOperandNum); } +void EliminateFrameIndex::processSTVM(MachineInstr &MI, Register FrameReg, + int64_t Offset, int FIOperandNum) { + assert(MI.getOpcode() == VE::STVMrii); + LLVM_DEBUG(dbgs() << "processSTVM: "; MI.dump()); + + // Original MI is: + // STVMrii frame-index, 0, offset, reg (, memory operand) + // Convert it to: + // SVMi tmp-reg, reg, 0 + // STrii frame-reg, 0, offset, tmp-reg + // SVMi tmp-reg, reg, 1 + // STrii frame-reg, 0, offset+8, tmp-reg + // SVMi tmp-reg, reg, 2 + // STrii frame-reg, 0, offset+16, tmp-reg + // SVMi tmp-reg, reg, 3 + // STrii frame-reg, 0, offset+24, tmp-reg + + prepareReplaceFI(MI, FrameReg, Offset, 24); + + Register SrcReg = MI.getOperand(3).getReg(); + bool isKill = MI.getOperand(3).isKill(); + // FIXME: it would be better to scavenge a register here instead of + // reserving SX16 all of the time. + Register TmpReg = VE::SX16; + for (int i = 0; i < 3; ++i) { + build(VE::SVMmr, TmpReg).addReg(SrcReg).addImm(i); + MachineInstr *StMI = + build(VE::STrii).addReg(FrameReg).addImm(0).addImm(0).addReg( + TmpReg, getKillRegState(true)); + replaceFI(*StMI, FrameReg, Offset, 0); + Offset += 8; + } + build(VE::SVMmr, TmpReg).addReg(SrcReg, getKillRegState(isKill)).addImm(3); + MI.setDesc(get(VE::STrii)); + MI.getOperand(3).ChangeToRegister(TmpReg, false, false, true); + replaceFI(MI, FrameReg, Offset, FIOperandNum); +} + +void EliminateFrameIndex::processLDVM(MachineInstr &MI, Register FrameReg, + int64_t Offset, int FIOperandNum) { + assert(MI.getOpcode() == VE::LDVMrii); + LLVM_DEBUG(dbgs() << "processLDVM: "; MI.dump()); + + // Original MI is: + // LDVMri reg, frame-index, 0, offset (, memory operand) + // Convert it to: + // LDrii tmp-reg, frame-reg, 0, offset + // LVMir vm, 0, tmp-reg + // LDrii tmp-reg, frame-reg, 0, offset+8 + // LVMir_m vm, 1, tmp-reg, vm + // LDrii tmp-reg, frame-reg, 0, offset+16 + // LVMir_m vm, 2, tmp-reg, vm + // LDrii tmp-reg, frame-reg, 0, offset+24 + // LVMir_m vm, 3, tmp-reg, vm + + prepareReplaceFI(MI, FrameReg, Offset, 24); + + Register DestReg = MI.getOperand(0).getReg(); + // FIXME: it would be better to scavenge a register here instead of + // reserving SX16 all of the time. + unsigned TmpReg = VE::SX16; + for (int i = 0; i < 4; ++i) { + if (i != 3) { + MachineInstr *StMI = + build(VE::LDrii, TmpReg).addReg(FrameReg).addImm(0).addImm(0); + replaceFI(*StMI, FrameReg, Offset, 1); + Offset += 8; + } else { + // Last LDrii replace the target instruction. + MI.setDesc(get(VE::LDrii)); + MI.getOperand(0).ChangeToRegister(TmpReg, true); + } + // First LVM is LVMir. Others are LVMir_m. Last LVM places at the + // next of the target instruction. + if (i == 0) + build(VE::LVMir, DestReg).addImm(i).addReg(TmpReg, getKillRegState(true)); + else if (i != 3) + build(VE::LVMir_m, DestReg) + .addImm(i) + .addReg(TmpReg, getKillRegState(true)) + .addReg(DestReg); + else + BuildMI(*MI.getParent(), std::next(II), DL, get(VE::LVMir_m), DestReg) + .addImm(3) + .addReg(TmpReg, getKillRegState(true)) + .addReg(DestReg); + } + replaceFI(MI, FrameReg, Offset, FIOperandNum); +} + +void EliminateFrameIndex::processSTVM512(MachineInstr &MI, Register FrameReg, + int64_t Offset, int FIOperandNum) { + assert(MI.getOpcode() == VE::STVM512rii); + LLVM_DEBUG(dbgs() << "processSTVM512: "; MI.dump()); + + prepareReplaceFI(MI, FrameReg, Offset, 56); + + Register SrcReg = MI.getOperand(3).getReg(); + Register SrcLoReg = getSubReg(SrcReg, VE::sub_vm_odd); + Register SrcHiReg = getSubReg(SrcReg, VE::sub_vm_even); + bool isKill = MI.getOperand(3).isKill(); + // FIXME: it would be better to scavenge a register here instead of + // reserving SX16 all of the time. + Register TmpReg = VE::SX16; + // store low part of VMP + MachineInstr *LastMI = nullptr; + for (int i = 0; i < 4; ++i) { + LastMI = build(VE::SVMmr, TmpReg).addReg(SrcLoReg).addImm(i); + MachineInstr *StMI = + build(VE::STrii).addReg(FrameReg).addImm(0).addImm(0).addReg( + TmpReg, getKillRegState(true)); + replaceFI(*StMI, FrameReg, Offset, 0); + Offset += 8; + } + if (isKill) + LastMI->addRegisterKilled(SrcLoReg, &TRI, true); + // store high part of VMP + for (int i = 0; i < 3; ++i) { + build(VE::SVMmr, TmpReg).addReg(SrcHiReg).addImm(i); + MachineInstr *StMI = + build(VE::STrii).addReg(FrameReg).addImm(0).addImm(0).addReg( + TmpReg, getKillRegState(true)); + replaceFI(*StMI, FrameReg, Offset, 0); + Offset += 8; + } + LastMI = build(VE::SVMmr, TmpReg).addReg(SrcHiReg).addImm(3); + if (isKill) { + LastMI->addRegisterKilled(SrcHiReg, &TRI, true); + // Add implicit super-register kills to the particular MI. + LastMI->addRegisterKilled(SrcReg, &TRI, true); + } + MI.setDesc(get(VE::STrii)); + MI.getOperand(3).ChangeToRegister(TmpReg, false, false, true); + replaceFI(MI, FrameReg, Offset, FIOperandNum); +} + +void EliminateFrameIndex::processLDVM512(MachineInstr &MI, Register FrameReg, + int64_t Offset, int FIOperandNum) { + assert(MI.getOpcode() == VE::LDVM512rii); + LLVM_DEBUG(dbgs() << "processLDVM512: "; MI.dump()); + + prepareReplaceFI(MI, FrameReg, Offset, 56); + + Register DestReg = MI.getOperand(0).getReg(); + Register DestLoReg = getSubReg(DestReg, VE::sub_vm_odd); + Register DestHiReg = getSubReg(DestReg, VE::sub_vm_even); + // FIXME: it would be better to scavenge a register here instead of + // reserving SX16 all of the time. + Register TmpReg = VE::SX16; + build(VE::IMPLICIT_DEF, DestReg); + for (int i = 0; i < 4; ++i) { + MachineInstr *LdMI = + build(VE::LDrii, TmpReg).addReg(FrameReg).addImm(0).addImm(0); + replaceFI(*LdMI, FrameReg, Offset, 1); + build(VE::LVMir_m, DestLoReg) + .addImm(i) + .addReg(TmpReg, getKillRegState(true)) + .addReg(DestLoReg); + Offset += 8; + } + for (int i = 0; i < 3; ++i) { + MachineInstr *LdMI = + build(VE::LDrii, TmpReg).addReg(FrameReg).addImm(0).addImm(0); + replaceFI(*LdMI, FrameReg, Offset, 1); + build(VE::LVMir_m, DestHiReg) + .addImm(i) + .addReg(TmpReg, getKillRegState(true)) + .addReg(DestHiReg); + Offset += 8; + } + MI.setDesc(get(VE::LDrii)); + MI.getOperand(0).ChangeToRegister(TmpReg, true); + BuildMI(*MI.getParent(), std::next(II), DL, get(VE::LVMir_m), DestHiReg) + .addImm(3) + .addReg(TmpReg, getKillRegState(true)) + .addReg(DestHiReg); + replaceFI(MI, FrameReg, Offset, FIOperandNum); +} + void EliminateFrameIndex::processMI(MachineInstr &MI, Register FrameReg, int64_t Offset, int FIOperandNum) { switch (MI.getOpcode()) { @@ -280,6 +469,18 @@ void EliminateFrameIndex::processMI(MachineInstr &MI, Register FrameReg, case VE::LDQrii: processLDQ(MI, FrameReg, Offset, FIOperandNum); return; + case VE::STVMrii: + processSTVM(MI, FrameReg, Offset, FIOperandNum); + return; + case VE::LDVMrii: + processLDVM(MI, FrameReg, Offset, FIOperandNum); + return; + case VE::STVM512rii: + processSTVM512(MI, FrameReg, Offset, FIOperandNum); + return; + case VE::LDVM512rii: + processLDVM512(MI, FrameReg, Offset, FIOperandNum); + return; } prepareReplaceFI(MI, FrameReg, Offset); replaceFI(MI, FrameReg, Offset, FIOperandNum); diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp index 9316826e3d92..d7720604d6dc 100644 --- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp @@ -40,7 +40,7 @@ WebAssemblyInstrInfo::WebAssemblyInstrInfo(const WebAssemblySubtarget &STI) RI(STI.getTargetTriple()) {} bool WebAssemblyInstrInfo::isReallyTriviallyReMaterializable( - const MachineInstr &MI, AAResults *AA) const { + const MachineInstr &MI) const { switch (MI.getOpcode()) { case WebAssembly::CONST_I32: case WebAssembly::CONST_I64: diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h index f45a3792467a..29d700bdf83f 100644 --- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h +++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h @@ -43,8 +43,7 @@ public: const WebAssemblyRegisterInfo &getRegisterInfo() const { return RI; } - bool isReallyTriviallyReMaterializable(const MachineInstr &MI, - AAResults *AA) const override; + bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override; void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp index d3ad47147ac8..f9ef45bfb41c 100644 --- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp +++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp @@ -49,7 +49,6 @@ class WebAssemblyRegStackify final : public MachineFunctionPass { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); - AU.addRequired<AAResultsWrapperPass>(); AU.addRequired<MachineDominatorTree>(); AU.addRequired<LiveIntervals>(); AU.addPreserved<MachineBlockFrequencyInfo>(); @@ -164,15 +163,15 @@ static void queryCallee(const MachineInstr &MI, bool &Read, bool &Write, // Determine whether MI reads memory, writes memory, has side effects, // and/or uses the stack pointer value. -static void query(const MachineInstr &MI, AliasAnalysis &AA, bool &Read, - bool &Write, bool &Effects, bool &StackPointer) { +static void query(const MachineInstr &MI, bool &Read, bool &Write, + bool &Effects, bool &StackPointer) { assert(!MI.isTerminator()); if (MI.isDebugInstr() || MI.isPosition()) return; // Check for loads. - if (MI.mayLoad() && !MI.isDereferenceableInvariantLoad(&AA)) + if (MI.mayLoad() && !MI.isDereferenceableInvariantLoad()) Read = true; // Check for stores. @@ -255,9 +254,9 @@ static void query(const MachineInstr &MI, AliasAnalysis &AA, bool &Read, } // Test whether Def is safe and profitable to rematerialize. -static bool shouldRematerialize(const MachineInstr &Def, AliasAnalysis &AA, +static bool shouldRematerialize(const MachineInstr &Def, const WebAssemblyInstrInfo *TII) { - return Def.isAsCheapAsAMove() && TII->isTriviallyReMaterializable(Def, &AA); + return Def.isAsCheapAsAMove() && TII->isTriviallyReMaterializable(Def); } // Identify the definition for this register at this point. This is a @@ -311,7 +310,7 @@ static bool hasOneUse(unsigned Reg, MachineInstr *Def, MachineRegisterInfo &MRI, // TODO: Compute memory dependencies in a way that uses AliasAnalysis to be // more precise. static bool isSafeToMove(const MachineOperand *Def, const MachineOperand *Use, - const MachineInstr *Insert, AliasAnalysis &AA, + const MachineInstr *Insert, const WebAssemblyFunctionInfo &MFI, const MachineRegisterInfo &MRI) { const MachineInstr *DefI = Def->getParent(); @@ -391,7 +390,7 @@ static bool isSafeToMove(const MachineOperand *Def, const MachineOperand *Use, } bool Read = false, Write = false, Effects = false, StackPointer = false; - query(*DefI, AA, Read, Write, Effects, StackPointer); + query(*DefI, Read, Write, Effects, StackPointer); // If the instruction does not access memory and has no side effects, it has // no additional dependencies. @@ -406,7 +405,7 @@ static bool isSafeToMove(const MachineOperand *Def, const MachineOperand *Use, bool InterveningWrite = false; bool InterveningEffects = false; bool InterveningStackPointer = false; - query(*I, AA, InterveningRead, InterveningWrite, InterveningEffects, + query(*I, InterveningRead, InterveningWrite, InterveningEffects, InterveningStackPointer); if (Effects && InterveningEffects) return false; @@ -808,7 +807,6 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) { WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>(); const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); const auto *TRI = MF.getSubtarget<WebAssemblySubtarget>().getRegisterInfo(); - AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); auto &MDT = getAnalysis<MachineDominatorTree>(); auto &LIS = getAnalysis<LiveIntervals>(); @@ -872,8 +870,7 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) { // supports intra-block moves) and it's MachineSink's job to catch all // the sinking opportunities anyway. bool SameBlock = DefI->getParent() == &MBB; - bool CanMove = SameBlock && - isSafeToMove(Def, &Use, Insert, AA, MFI, MRI) && + bool CanMove = SameBlock && isSafeToMove(Def, &Use, Insert, MFI, MRI) && !TreeWalker.isOnStack(Reg); if (CanMove && hasOneUse(Reg, DefI, MRI, MDT, LIS)) { Insert = moveForSingleUse(Reg, Use, DefI, MBB, Insert, LIS, MFI, MRI); @@ -883,7 +880,7 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) { // TODO: Encode this properly as a stackified value. if (MFI.isFrameBaseVirtual() && MFI.getFrameBaseVreg() == Reg) MFI.clearFrameBaseVreg(); - } else if (shouldRematerialize(*DefI, AA, TII)) { + } else if (shouldRematerialize(*DefI, TII)) { Insert = rematerializeCheapDef(Reg, Use, *DefI, MBB, Insert->getIterator(), LIS, MFI, MRI, TII, TRI); diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86.td b/contrib/llvm-project/llvm/lib/Target/X86/X86.td index a859176220c7..fa0a6bd415dc 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86.td +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86.td @@ -1277,7 +1277,7 @@ class ProcModel<string Name, SchedMachineModel Model, // enabled. It has no effect on code generation. // NOTE: As a default tuning, "generic" aims to produce code optimized for the // most common X86 processors. The tunings might be changed over time. It is -// recommended to use "x86-64" in lit tests for consistency. +// recommended to use "tune-cpu"="x86-64" in function attribute for consistency. def : ProcModel<"generic", SandyBridgeModel, [FeatureX87, FeatureCX8, FeatureX86_64], [TuningSlow3OpsLEA, diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86FixupBWInsts.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86FixupBWInsts.cpp index 16bff201dd03..db6923416177 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86FixupBWInsts.cpp +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86FixupBWInsts.cpp @@ -393,12 +393,12 @@ MachineInstr *FixupBWInstPass::tryReplaceInstr(MachineInstr *MI, switch (MI->getOpcode()) { case X86::MOV8rm: - // Only replace 8 bit loads with the zero extending versions if - // in an inner most loop and not optimizing for size. This takes - // an extra byte to encode, and provides limited performance upside. - if (MachineLoop *ML = MLI->getLoopFor(&MBB)) - if (ML->begin() == ML->end() && !OptForSize) - return tryReplaceLoad(X86::MOVZX32rm8, MI); + // Replace 8-bit loads with the zero-extending version if not optimizing + // for size. The extending op is cheaper across a wide range of uarch and + // it avoids a potentially expensive partial register stall. It takes an + // extra byte to encode, however, so don't do this when optimizing for size. + if (!OptForSize) + return tryReplaceLoad(X86::MOVZX32rm8, MI); break; case X86::MOV16rm: diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp index 12af6087cb47..5a4533c4bac4 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -555,6 +555,39 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal); + auto setF16Action = [&] (MVT VT, LegalizeAction Action) { + setOperationAction(ISD::FABS, VT, Action); + setOperationAction(ISD::FNEG, VT, Action); + setOperationAction(ISD::FCOPYSIGN, VT, Expand); + setOperationAction(ISD::FREM, VT, Action); + setOperationAction(ISD::FMA, VT, Action); + setOperationAction(ISD::FMINNUM, VT, Action); + setOperationAction(ISD::FMAXNUM, VT, Action); + setOperationAction(ISD::FMINIMUM, VT, Action); + setOperationAction(ISD::FMAXIMUM, VT, Action); + setOperationAction(ISD::FSIN, VT, Action); + setOperationAction(ISD::FCOS, VT, Action); + setOperationAction(ISD::FSINCOS, VT, Action); + setOperationAction(ISD::FSQRT, VT, Action); + setOperationAction(ISD::FPOW, VT, Action); + setOperationAction(ISD::FLOG, VT, Action); + setOperationAction(ISD::FLOG2, VT, Action); + setOperationAction(ISD::FLOG10, VT, Action); + setOperationAction(ISD::FEXP, VT, Action); + setOperationAction(ISD::FEXP2, VT, Action); + setOperationAction(ISD::FCEIL, VT, Action); + setOperationAction(ISD::FFLOOR, VT, Action); + setOperationAction(ISD::FNEARBYINT, VT, Action); + setOperationAction(ISD::FRINT, VT, Action); + setOperationAction(ISD::BR_CC, VT, Action); + setOperationAction(ISD::SETCC, VT, Action); + setOperationAction(ISD::SELECT, VT, Custom); + setOperationAction(ISD::SELECT_CC, VT, Action); + setOperationAction(ISD::FROUND, VT, Action); + setOperationAction(ISD::FROUNDEVEN, VT, Action); + setOperationAction(ISD::FTRUNC, VT, Action); + }; + if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) { // f16, f32 and f64 use SSE. // Set up the FP register classes. @@ -592,40 +625,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, } // Half type will be promoted by default. - setOperationAction(ISD::FABS, MVT::f16, Promote); - setOperationAction(ISD::FNEG, MVT::f16, Promote); - setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand); + setF16Action(MVT::f16, Promote); setOperationAction(ISD::FADD, MVT::f16, Promote); setOperationAction(ISD::FSUB, MVT::f16, Promote); setOperationAction(ISD::FMUL, MVT::f16, Promote); setOperationAction(ISD::FDIV, MVT::f16, Promote); - setOperationAction(ISD::FREM, MVT::f16, Promote); - setOperationAction(ISD::FMA, MVT::f16, Promote); - setOperationAction(ISD::FMINNUM, MVT::f16, Promote); - setOperationAction(ISD::FMAXNUM, MVT::f16, Promote); - setOperationAction(ISD::FMINIMUM, MVT::f16, Promote); - setOperationAction(ISD::FMAXIMUM, MVT::f16, Promote); - setOperationAction(ISD::FSIN, MVT::f16, Promote); - setOperationAction(ISD::FCOS, MVT::f16, Promote); - setOperationAction(ISD::FSINCOS, MVT::f16, Promote); - setOperationAction(ISD::FSQRT, MVT::f16, Promote); - setOperationAction(ISD::FPOW, MVT::f16, Promote); - setOperationAction(ISD::FLOG, MVT::f16, Promote); - setOperationAction(ISD::FLOG2, MVT::f16, Promote); - setOperationAction(ISD::FLOG10, MVT::f16, Promote); - setOperationAction(ISD::FEXP, MVT::f16, Promote); - setOperationAction(ISD::FEXP2, MVT::f16, Promote); - setOperationAction(ISD::FCEIL, MVT::f16, Promote); - setOperationAction(ISD::FFLOOR, MVT::f16, Promote); - setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote); - setOperationAction(ISD::FRINT, MVT::f16, Promote); - setOperationAction(ISD::BR_CC, MVT::f16, Promote); - setOperationAction(ISD::SETCC, MVT::f16, Promote); - setOperationAction(ISD::SELECT, MVT::f16, Custom); - setOperationAction(ISD::SELECT_CC, MVT::f16, Promote); - setOperationAction(ISD::FROUND, MVT::f16, Promote); - setOperationAction(ISD::FROUNDEVEN, MVT::f16, Promote); - setOperationAction(ISD::FTRUNC, MVT::f16, Promote); setOperationAction(ISD::FP_ROUND, MVT::f16, LibCall); setOperationAction(ISD::FP_EXTEND, MVT::f32, LibCall); setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom); @@ -1003,6 +1007,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, : &X86::VR128RegClass); addRegisterClass(MVT::v8i16, Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass); + addRegisterClass(MVT::v8f16, Subtarget.hasVLX() ? &X86::VR128XRegClass + : &X86::VR128RegClass); addRegisterClass(MVT::v4i32, Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass); addRegisterClass(MVT::v2i64, Subtarget.hasVLX() ? &X86::VR128XRegClass @@ -1084,7 +1090,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); } - for (auto VT : { MVT::v2f64, MVT::v2i64 }) { + for (auto VT : { MVT::v8f16, MVT::v2f64, MVT::v2i64 }) { setOperationAction(ISD::BUILD_VECTOR, VT, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); setOperationAction(ISD::VSELECT, VT, Custom); @@ -1095,19 +1101,25 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); } + setF16Action(MVT::v8f16, Expand); + setOperationAction(ISD::FADD, MVT::v8f16, Expand); + setOperationAction(ISD::FSUB, MVT::v8f16, Expand); + setOperationAction(ISD::FMUL, MVT::v8f16, Expand); + setOperationAction(ISD::FDIV, MVT::v8f16, Expand); // Custom lower v2i64 and v2f64 selects. setOperationAction(ISD::SELECT, MVT::v2f64, Custom); setOperationAction(ISD::SELECT, MVT::v2i64, Custom); setOperationAction(ISD::SELECT, MVT::v4i32, Custom); setOperationAction(ISD::SELECT, MVT::v8i16, Custom); + setOperationAction(ISD::SELECT, MVT::v8f16, Custom); setOperationAction(ISD::SELECT, MVT::v16i8, Custom); - setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); + setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Custom); - setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Legal); + setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Custom); setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i32, Custom); // Custom legalize these to avoid over promotion or custom promotion. @@ -1118,8 +1130,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Custom); } - setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); - setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Legal); + setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Custom); + setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Custom); setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom); setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i32, Custom); @@ -1304,6 +1316,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, : &X86::VR256RegClass); addRegisterClass(MVT::v16i16, Subtarget.hasVLX() ? &X86::VR256XRegClass : &X86::VR256RegClass); + addRegisterClass(MVT::v16f16, Subtarget.hasVLX() ? &X86::VR256XRegClass + : &X86::VR256RegClass); addRegisterClass(MVT::v8i32, Subtarget.hasVLX() ? &X86::VR256XRegClass : &X86::VR256RegClass); addRegisterClass(MVT::v8f32, Subtarget.hasVLX() ? &X86::VR256XRegClass @@ -1340,12 +1354,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i16, MVT::v8i32); setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v8i16, MVT::v8i32); setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v8i16, MVT::v8i32); - setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal); + setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Custom); - setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v8i32, Legal); + setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v8i32, Custom); - setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal); - setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v8i32, Legal); + setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Custom); + setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v8i32, Custom); + setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Custom); + setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v4f64, Custom); setOperationAction(ISD::STRICT_FP_ROUND, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FADD, MVT::v8f32, Legal); @@ -1356,7 +1372,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::STRICT_FMUL, MVT::v4f64, Legal); setOperationAction(ISD::STRICT_FDIV, MVT::v8f32, Legal); setOperationAction(ISD::STRICT_FDIV, MVT::v4f64, Legal); - setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v4f64, Legal); setOperationAction(ISD::STRICT_FSQRT, MVT::v8f32, Legal); setOperationAction(ISD::STRICT_FSQRT, MVT::v4f64, Legal); @@ -1386,6 +1401,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SELECT, MVT::v4i64, Custom); setOperationAction(ISD::SELECT, MVT::v8i32, Custom); setOperationAction(ISD::SELECT, MVT::v16i16, Custom); + setOperationAction(ISD::SELECT, MVT::v16f16, Custom); setOperationAction(ISD::SELECT, MVT::v32i8, Custom); setOperationAction(ISD::SELECT, MVT::v8f32, Custom); @@ -1507,7 +1523,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, // Custom lower several nodes for 256-bit types. for (MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, - MVT::v8f32, MVT::v4f64 }) { + MVT::v16f16, MVT::v8f32, MVT::v4f64 }) { setOperationAction(ISD::BUILD_VECTOR, VT, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); setOperationAction(ISD::VSELECT, VT, Custom); @@ -1518,6 +1534,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); setOperationAction(ISD::STORE, VT, Custom); } + setF16Action(MVT::v16f16, Expand); + setOperationAction(ISD::FADD, MVT::v16f16, Expand); + setOperationAction(ISD::FSUB, MVT::v16f16, Expand); + setOperationAction(ISD::FMUL, MVT::v16f16, Expand); + setOperationAction(ISD::FDIV, MVT::v16f16, Expand); if (HasInt256) { setOperationAction(ISD::VSELECT, MVT::v32i8, Legal); @@ -1532,11 +1553,23 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, } } - if (!Subtarget.useSoftFloat() && Subtarget.hasF16C()) { - setOperationAction(ISD::FP_ROUND, MVT::f16, Custom); - setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom); - setOperationAction(ISD::FP_EXTEND, MVT::f32, Custom); - setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Custom); + if (!Subtarget.useSoftFloat() && !Subtarget.hasFP16() && + Subtarget.hasF16C()) { + for (MVT VT : { MVT::f16, MVT::v2f16, MVT::v4f16, MVT::v8f16 }) { + setOperationAction(ISD::FP_ROUND, VT, Custom); + setOperationAction(ISD::STRICT_FP_ROUND, VT, Custom); + } + for (MVT VT : { MVT::f32, MVT::v2f32, MVT::v4f32 }) { + setOperationAction(ISD::FP_EXTEND, VT, Custom); + setOperationAction(ISD::STRICT_FP_EXTEND, VT, Custom); + } + for (unsigned Opc : {ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV}) { + setOperationPromotedToType(Opc, MVT::v8f16, MVT::v8f32); + setOperationPromotedToType(Opc, MVT::v16f16, MVT::v16f32); + } + + setOperationAction(ISD::FP_EXTEND, MVT::v8f32, Legal); + setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f32, Legal); } // This block controls legalization of the mask vector sizes that are @@ -1619,6 +1652,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, addRegisterClass(MVT::v8i64, &X86::VR512RegClass); addRegisterClass(MVT::v8f64, &X86::VR512RegClass); addRegisterClass(MVT::v32i16, &X86::VR512RegClass); + addRegisterClass(MVT::v32f16, &X86::VR512RegClass); addRegisterClass(MVT::v64i8, &X86::VR512RegClass); for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) { @@ -1645,14 +1679,16 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, VT, MVT::v16i32); setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, VT, MVT::v16i32); } - setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal); - setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal); - setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v16i32, Legal); - setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v16i32, Legal); - setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal); - setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal); - setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v16i32, Legal); - setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v16i32, Legal); + setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Custom); + setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v16i32, Custom); + setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v16i32, Custom); + setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Custom); + setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v16i32, Custom); + setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v16i32, Custom); + setOperationAction(ISD::FP_EXTEND, MVT::v8f64, Custom); + setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f64, Custom); setOperationAction(ISD::STRICT_FADD, MVT::v16f32, Legal); setOperationAction(ISD::STRICT_FADD, MVT::v8f64, Legal); @@ -1664,7 +1700,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::STRICT_FDIV, MVT::v8f64, Legal); setOperationAction(ISD::STRICT_FSQRT, MVT::v16f32, Legal); setOperationAction(ISD::STRICT_FSQRT, MVT::v8f64, Legal); - setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f64, Legal); setOperationAction(ISD::STRICT_FP_ROUND, MVT::v8f32, Legal); setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal); @@ -1799,15 +1834,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FSHR, MVT::v16i32, Custom); if (Subtarget.hasDQI()) { - setOperationAction(ISD::SINT_TO_FP, MVT::v8i64, Legal); - setOperationAction(ISD::UINT_TO_FP, MVT::v8i64, Legal); - setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v8i64, Legal); - setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i64, Legal); - setOperationAction(ISD::FP_TO_SINT, MVT::v8i64, Legal); - setOperationAction(ISD::FP_TO_UINT, MVT::v8i64, Legal); - setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v8i64, Legal); - setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i64, Legal); - + for (auto Opc : {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::STRICT_SINT_TO_FP, + ISD::STRICT_UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT, + ISD::STRICT_FP_TO_SINT, ISD::STRICT_FP_TO_UINT}) + setOperationAction(Opc, MVT::v8i64, Custom); setOperationAction(ISD::MUL, MVT::v8i64, Legal); } @@ -1831,7 +1861,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal); for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64, - MVT::v16f32, MVT::v8f64 }) { + MVT::v32f16, MVT::v16f32, MVT::v8f64 }) { setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal); setOperationAction(ISD::SELECT, VT, Custom); @@ -1842,6 +1872,15 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); } + setF16Action(MVT::v32f16, Expand); + setOperationAction(ISD::FP_ROUND, MVT::v16f16, Custom); + setOperationAction(ISD::STRICT_FP_ROUND, MVT::v16f16, Custom); + setOperationAction(ISD::FP_EXTEND, MVT::v16f32, Legal); + setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v16f32, Legal); + for (unsigned Opc : {ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV}) { + setOperationPromotedToType(Opc, MVT::v16f16, MVT::v16f32); + setOperationPromotedToType(Opc, MVT::v32f16, MVT::v32f32); + } for (auto VT : { MVT::v16i32, MVT::v8i64, MVT::v16f32, MVT::v8f64 }) { setOperationAction(ISD::MLOAD, VT, Legal); @@ -1881,23 +1920,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, // These operations are handled on non-VLX by artificially widening in // isel patterns. - setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, - Subtarget.hasVLX() ? Legal : Custom); - setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, - Subtarget.hasVLX() ? Legal : Custom); - setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i32, - Subtarget.hasVLX() ? Legal : Custom); - setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, - Subtarget.hasVLX() ? Legal : Custom); + setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i32, Custom); + setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Custom); setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i32, Custom); - setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, - Subtarget.hasVLX() ? Legal : Custom); - setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, - Subtarget.hasVLX() ? Legal : Custom); - setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i32, - Subtarget.hasVLX() ? Legal : Custom); - setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, - Subtarget.hasVLX() ? Legal : Custom); if (Subtarget.hasDQI()) { // Fast v2f32 SINT_TO_FP( v2i64 ) custom conversion. @@ -1934,25 +1959,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::MSCATTER, VT, Custom); if (Subtarget.hasDQI()) { - for (auto VT : { MVT::v2i64, MVT::v4i64 }) { - setOperationAction(ISD::SINT_TO_FP, VT, - Subtarget.hasVLX() ? Legal : Custom); - setOperationAction(ISD::UINT_TO_FP, VT, - Subtarget.hasVLX() ? Legal : Custom); - setOperationAction(ISD::STRICT_SINT_TO_FP, VT, - Subtarget.hasVLX() ? Legal : Custom); - setOperationAction(ISD::STRICT_UINT_TO_FP, VT, - Subtarget.hasVLX() ? Legal : Custom); - setOperationAction(ISD::FP_TO_SINT, VT, - Subtarget.hasVLX() ? Legal : Custom); - setOperationAction(ISD::FP_TO_UINT, VT, - Subtarget.hasVLX() ? Legal : Custom); - setOperationAction(ISD::STRICT_FP_TO_SINT, VT, - Subtarget.hasVLX() ? Legal : Custom); - setOperationAction(ISD::STRICT_FP_TO_UINT, VT, - Subtarget.hasVLX() ? Legal : Custom); - setOperationAction(ISD::MUL, VT, Legal); + for (auto Opc : {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::STRICT_SINT_TO_FP, + ISD::STRICT_UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT, + ISD::STRICT_FP_TO_SINT, ISD::STRICT_FP_TO_UINT}) { + setOperationAction(Opc, MVT::v2i64, Custom); + setOperationAction(Opc, MVT::v4i64, Custom); } + setOperationAction(ISD::MUL, MVT::v2i64, Legal); + setOperationAction(ISD::MUL, MVT::v4i64, Legal); } if (Subtarget.hasCDI()) { @@ -2052,7 +2066,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, // AVX512_FP16 scalar operations setGroup(MVT::f16); - addRegisterClass(MVT::f16, &X86::FR16XRegClass); setOperationAction(ISD::FREM, MVT::f16, Promote); setOperationAction(ISD::STRICT_FREM, MVT::f16, Promote); setOperationAction(ISD::SELECT_CC, MVT::f16, Expand); @@ -2066,6 +2079,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::STRICT_FROUNDEVEN, MVT::f16, Legal); setOperationAction(ISD::FP_ROUND, MVT::f16, Custom); setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom); + setOperationAction(ISD::FP_EXTEND, MVT::f32, Legal); setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal); setCondCodeAction(ISD::SETOEQ, MVT::f16, Expand); @@ -2073,14 +2087,17 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, if (Subtarget.useAVX512Regs()) { setGroup(MVT::v32f16); - addRegisterClass(MVT::v32f16, &X86::VR512RegClass); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v32f16, Custom); setOperationAction(ISD::SINT_TO_FP, MVT::v32i16, Legal); setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v32i16, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::v32i16, Legal); setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v32i16, Legal); + setOperationAction(ISD::FP_ROUND, MVT::v16f16, Legal); setOperationAction(ISD::STRICT_FP_ROUND, MVT::v16f16, Legal); + setOperationAction(ISD::FP_EXTEND, MVT::v16f32, Legal); setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v16f32, Legal); + setOperationAction(ISD::FP_EXTEND, MVT::v8f64, Legal); + setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f64, Legal); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32f16, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::v32i16, Custom); @@ -2112,8 +2129,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, } if (Subtarget.hasVLX()) { - addRegisterClass(MVT::v8f16, &X86::VR128XRegClass); - addRegisterClass(MVT::v16f16, &X86::VR256XRegClass); setGroup(MVT::v8f16); setGroup(MVT::v16f16); @@ -2132,8 +2147,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v8i16, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::v8i16, Custom); setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i16, Custom); + setOperationAction(ISD::FP_ROUND, MVT::v8f16, Legal); setOperationAction(ISD::STRICT_FP_ROUND, MVT::v8f16, Legal); + setOperationAction(ISD::FP_EXTEND, MVT::v8f32, Legal); setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f32, Legal); + setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Legal); + setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v4f64, Legal); // INSERT_VECTOR_ELT v8f16 extended to VECTOR_SHUFFLE setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8f16, Custom); @@ -2347,7 +2366,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, ISD::FP16_TO_FP, ISD::FP_EXTEND, ISD::STRICT_FP_EXTEND, - ISD::FP_ROUND}); + ISD::FP_ROUND, + ISD::STRICT_FP_ROUND}); computeRegisterProperties(Subtarget.getRegisterInfo()); @@ -2404,6 +2424,10 @@ X86TargetLowering::getPreferredVectorAction(MVT VT) const { return TypeSplitVector; if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 && + !Subtarget.hasF16C() && VT.getVectorElementType() == MVT::f16) + return TypeSplitVector; + + if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 && VT.getVectorElementType() != MVT::i1) return TypeWidenVector; @@ -2447,22 +2471,21 @@ handleMaskRegisterForCallingConv(unsigned NumElts, CallingConv::ID CC, MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const { - if (VT.isVector() && VT.getVectorElementType() == MVT::i1 && - Subtarget.hasAVX512()) { - unsigned NumElts = VT.getVectorNumElements(); + if (VT.isVector()) { + if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) { + unsigned NumElts = VT.getVectorNumElements(); - MVT RegisterVT; - unsigned NumRegisters; - std::tie(RegisterVT, NumRegisters) = - handleMaskRegisterForCallingConv(NumElts, CC, Subtarget); - if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE) - return RegisterVT; - } + MVT RegisterVT; + unsigned NumRegisters; + std::tie(RegisterVT, NumRegisters) = + handleMaskRegisterForCallingConv(NumElts, CC, Subtarget); + if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE) + return RegisterVT; + } - // v3f16 will be widen to v4f16. But we don't assign register class for v4f16. - // So its default register type is f16. We override the type to v8f16 here. - if (VT == MVT::v3f16 && Subtarget.hasFP16()) - return MVT::v8f16; + if (VT.getVectorElementType() == MVT::f16 && VT.getVectorNumElements() < 8) + return MVT::v8f16; + } // We will use more GPRs for f64 and f80 on 32 bits when x87 is disabled. if ((VT == MVT::f64 || VT == MVT::f80) && !Subtarget.is64Bit() && @@ -2475,22 +2498,21 @@ MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const { - if (VT.isVector() && VT.getVectorElementType() == MVT::i1 && - Subtarget.hasAVX512()) { - unsigned NumElts = VT.getVectorNumElements(); + if (VT.isVector()) { + if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) { + unsigned NumElts = VT.getVectorNumElements(); - MVT RegisterVT; - unsigned NumRegisters; - std::tie(RegisterVT, NumRegisters) = - handleMaskRegisterForCallingConv(NumElts, CC, Subtarget); - if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE) - return NumRegisters; - } + MVT RegisterVT; + unsigned NumRegisters; + std::tie(RegisterVT, NumRegisters) = + handleMaskRegisterForCallingConv(NumElts, CC, Subtarget); + if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE) + return NumRegisters; + } - // v3f16 will be widen to v4f16. But we don't assign register class for v4f16. - // So its default register number is 3. We override the number to 1 here. - if (VT == MVT::v3f16 && Subtarget.hasFP16()) - return 1; + if (VT.getVectorElementType() == MVT::f16 && VT.getVectorNumElements() < 8) + return 1; + } // We have to split f64 to 2 registers and f80 to 3 registers on 32 bits if // x87 is disabled. @@ -9646,13 +9668,13 @@ static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp, EVT CVT = Ld.getValueType(); assert(!CVT.isVector() && "Must not broadcast a vector type"); - // Splat f32, i32, v4f64, v4i64 in all cases with AVX2. + // Splat f16, f32, i32, v4f64, v4i64 in all cases with AVX2. // For size optimization, also splat v2f64 and v2i64, and for size opt // with AVX2, also splat i8 and i16. // With pattern matching, the VBROADCAST node may become a VMOVDDUP. if (ScalarSize == 32 || (ScalarSize == 64 && (IsGE256 || Subtarget.hasVLX())) || - (ScalarSize == 16 && Subtarget.hasFP16() && CVT.isFloatingPoint()) || + CVT == MVT::f16 || (OptForSize && (ScalarSize == 64 || Subtarget.hasAVX2()))) { const Constant *C = nullptr; if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Ld)) @@ -14129,6 +14151,16 @@ static bool isShuffleFoldableLoad(SDValue V) { ISD::isNON_EXTLoad(peekThroughOneUseBitcasts(V).getNode()); } +template<typename T> +static bool isSoftFP16(T VT, const X86Subtarget &Subtarget) { + return VT.getScalarType() == MVT::f16 && !Subtarget.hasFP16(); +} + +template<typename T> +bool X86TargetLowering::isSoftFP16(T VT) const { + return ::isSoftFP16(VT, Subtarget); +} + /// Try to lower insertion of a single element into a zero vector. /// /// This is a common pattern that we have especially efficient patterns to lower @@ -14140,6 +14172,9 @@ static SDValue lowerShuffleAsElementInsertion( MVT ExtVT = VT; MVT EltVT = VT.getVectorElementType(); + if (isSoftFP16(EltVT, Subtarget)) + return SDValue(); + int V2Index = find_if(Mask, [&Mask](int M) { return M >= (int)Mask.size(); }) - Mask.begin(); @@ -19444,6 +19479,15 @@ SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const { SDValue LHS = Op.getOperand(1); SDValue RHS = Op.getOperand(2); + SDLoc dl(Op); + MVT VT = Op.getSimpleValueType(); + if (isSoftFP16(VT)) { + MVT NVT = VT.changeVectorElementTypeToInteger(); + return DAG.getBitcast(VT, DAG.getNode(ISD::VSELECT, dl, NVT, Cond, + DAG.getBitcast(NVT, LHS), + DAG.getBitcast(NVT, RHS))); + } + // A vselect where all conditions and data are constants can be optimized into // a single vector load by SelectionDAGLegalize::ExpandBUILD_VECTOR(). if (ISD::isBuildVectorOfConstantSDNodes(Cond.getNode()) && @@ -19467,8 +19511,6 @@ SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const { if (!Subtarget.hasSSE41()) return SDValue(); - SDLoc dl(Op); - MVT VT = Op.getSimpleValueType(); unsigned EltSize = VT.getScalarSizeInBits(); unsigned NumElts = VT.getVectorNumElements(); @@ -20856,16 +20898,6 @@ static SDValue lowerINT_TO_FP_vXi64(SDValue Op, SelectionDAG &DAG, return Cvt; } -template<typename T> -static bool isSoftFP16(T VT, const X86Subtarget &Subtarget) { - return VT == MVT::f16 && !Subtarget.hasFP16(); -} - -template<typename T> -bool X86TargetLowering::isSoftFP16(T VT) const { - return ::isSoftFP16(VT, Subtarget); -} - static SDValue promoteXINT_TO_FP(SDValue Op, SelectionDAG &DAG) { bool IsStrict = Op->isStrictFPOpcode(); SDValue Src = Op.getOperand(IsStrict ? 1 : 0); @@ -20885,6 +20917,26 @@ static SDValue promoteXINT_TO_FP(SDValue Op, SelectionDAG &DAG) { DAG.getNode(Op.getOpcode(), dl, NVT, Src), Rnd); } +static bool isLegalConversion(MVT VT, bool IsSigned, + const X86Subtarget &Subtarget) { + if (VT == MVT::v4i32 && Subtarget.hasSSE2() && IsSigned) + return true; + if (VT == MVT::v8i32 && Subtarget.hasAVX() && IsSigned) + return true; + if (Subtarget.hasVLX() && (VT == MVT::v4i32 || VT == MVT::v8i32)) + return true; + if (Subtarget.useAVX512Regs()) { + if (VT == MVT::v16i32) + return true; + if (VT == MVT::v8i64 && Subtarget.hasDQI()) + return true; + } + if (Subtarget.hasDQI() && Subtarget.hasVLX() && + (VT == MVT::v2i64 || VT == MVT::v4i64)) + return true; + return false; +} + SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { bool IsStrict = Op->isStrictFPOpcode(); @@ -20897,6 +20949,8 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, if (isSoftFP16(VT)) return promoteXINT_TO_FP(Op, DAG); + else if (isLegalConversion(SrcVT, true, Subtarget)) + return Op; if (Subtarget.isTargetWin64() && SrcVT == MVT::i128) return LowerWin64_INT128_TO_FP(Op, DAG); @@ -21400,6 +21454,8 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, if (isSoftFP16(DstVT)) return promoteXINT_TO_FP(Op, DAG); + else if (isLegalConversion(SrcVT, false, Subtarget)) + return Op; if (DstVT.isVector()) return lowerUINT_TO_FP_vec(Op, DAG, Subtarget); @@ -22229,6 +22285,8 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const { {NVT, MVT::Other}, {Chain, Src})}); return DAG.getNode(Op.getOpcode(), dl, VT, DAG.getNode(ISD::FP_EXTEND, dl, NVT, Src)); + } else if (isTypeLegal(SrcVT) && isLegalConversion(VT, IsSigned, Subtarget)) { + return Op; } if (VT.isVector()) { @@ -22826,7 +22884,7 @@ SDValue X86TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { return Op; if (SVT.getVectorElementType() == MVT::f16) { - assert(Subtarget.hasFP16() && Subtarget.hasVLX() && "Unexpected features!"); + assert(Subtarget.hasF16C() && "Unexpected features!"); if (SVT == MVT::v2f16) In = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4f16, In, DAG.getUNDEF(MVT::v2f16)); @@ -22836,6 +22894,8 @@ SDValue X86TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(X86ISD::STRICT_VFPEXT, DL, {VT, MVT::Other}, {Op->getOperand(0), Res}); return DAG.getNode(X86ISD::VFPEXT, DL, VT, Res); + } else if (VT == MVT::v4f64 || VT == MVT::v8f64) { + return Op; } assert(SVT == MVT::v2f32 && "Only customize MVT::v2f32 type legalization!"); @@ -22854,34 +22914,19 @@ SDValue X86TargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue(); SDValue In = Op.getOperand(IsStrict ? 1 : 0); - SDValue Op2 = Op.getOperand(IsStrict ? 2 : 1); MVT VT = Op.getSimpleValueType(); MVT SVT = In.getSimpleValueType(); if (SVT == MVT::f128 || (VT == MVT::f16 && SVT == MVT::f80)) return SDValue(); - if (VT == MVT::f16) { - if (Subtarget.hasFP16()) - return Op; - - if (SVT != MVT::f32) { - if (IsStrict) - return DAG.getNode( - ISD::STRICT_FP_ROUND, DL, {VT, MVT::Other}, - {Chain, - DAG.getNode(ISD::STRICT_FP_ROUND, DL, {MVT::f32, MVT::Other}, - {Chain, In, Op2}), - Op2}); - - return DAG.getNode(ISD::FP_ROUND, DL, VT, - DAG.getNode(ISD::FP_ROUND, DL, MVT::f32, In, Op2), - Op2); - } - - if (!Subtarget.hasF16C()) + if (VT.getScalarType() == MVT::f16 && !Subtarget.hasFP16()) { + if (!Subtarget.hasF16C() || SVT.getScalarType() != MVT::f32) return SDValue(); + if (VT.isVector()) + return Op; + SDValue Res; SDValue Rnd = DAG.getTargetConstant(X86::STATIC_ROUNDING::CUR_DIRECTION, DL, MVT::i32); @@ -24176,10 +24221,10 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget, SDLoc dl(Op); if (isFP) { -#ifndef NDEBUG MVT EltVT = Op0.getSimpleValueType().getVectorElementType(); assert(EltVT == MVT::f16 || EltVT == MVT::f32 || EltVT == MVT::f64); -#endif + if (isSoftFP16(EltVT, Subtarget)) + return SDValue(); bool IsSignaling = Op.getOpcode() == ISD::STRICT_FSETCCS; SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue(); @@ -24741,6 +24786,9 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(IsStrict ? 3 : 2))->get(); + if (isSoftFP16(Op0.getValueType())) + return SDValue(); + // Handle f128 first, since one possible outcome is a normal integer // comparison which gets handled by emitFlagsForSetcc. if (Op0.getValueType() == MVT::f128) { @@ -24931,10 +24979,12 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { MVT VT = Op1.getSimpleValueType(); SDValue CC; - if (isSoftFP16(VT)) - return DAG.getBitcast(MVT::f16, DAG.getNode(ISD::SELECT, DL, MVT::i16, Cond, - DAG.getBitcast(MVT::i16, Op1), - DAG.getBitcast(MVT::i16, Op2))); + if (isSoftFP16(VT)) { + MVT NVT = VT.changeTypeToInteger(); + return DAG.getBitcast(VT, DAG.getNode(ISD::SELECT, DL, NVT, Cond, + DAG.getBitcast(NVT, Op1), + DAG.getBitcast(NVT, Op2))); + } // Lower FP selects into a CMP/AND/ANDN/OR sequence when the necessary SSE ops // are available or VBLENDV if AVX is available. @@ -27268,27 +27318,6 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, } return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT); } - case Intrinsic::swift_async_context_addr: { - auto &MF = DAG.getMachineFunction(); - auto X86FI = MF.getInfo<X86MachineFunctionInfo>(); - if (Subtarget.is64Bit()) { - MF.getFrameInfo().setFrameAddressIsTaken(true); - X86FI->setHasSwiftAsyncContext(true); - return SDValue( - DAG.getMachineNode( - X86::SUB64ri8, dl, MVT::i64, - DAG.getCopyFromReg(DAG.getEntryNode(), dl, X86::RBP, MVT::i64), - DAG.getTargetConstant(8, dl, MVT::i32)), - 0); - } else { - // 32-bit so no special extended frame, create or reuse an existing stack - // slot. - if (!X86FI->getSwiftAsyncContextFrameIdx()) - X86FI->setSwiftAsyncContextFrameIdx( - MF.getFrameInfo().CreateStackObject(4, Align(4), false)); - return DAG.getFrameIndex(*X86FI->getSwiftAsyncContextFrameIdx(), MVT::i32); - } - } case Intrinsic::x86_avx512_vp2intersect_q_512: case Intrinsic::x86_avx512_vp2intersect_q_256: case Intrinsic::x86_avx512_vp2intersect_q_128: @@ -27668,6 +27697,37 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget, const IntrinsicData *IntrData = getIntrinsicWithChain(IntNo); if (!IntrData) { switch (IntNo) { + + case Intrinsic::swift_async_context_addr: { + SDLoc dl(Op); + auto &MF = DAG.getMachineFunction(); + auto X86FI = MF.getInfo<X86MachineFunctionInfo>(); + if (Subtarget.is64Bit()) { + MF.getFrameInfo().setFrameAddressIsTaken(true); + X86FI->setHasSwiftAsyncContext(true); + SDValue Chain = Op->getOperand(0); + SDValue CopyRBP = DAG.getCopyFromReg(Chain, dl, X86::RBP, MVT::i64); + SDValue Result = + SDValue(DAG.getMachineNode(X86::SUB64ri8, dl, MVT::i64, CopyRBP, + DAG.getTargetConstant(8, dl, MVT::i32)), + 0); + // Return { result, chain }. + return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Result, + CopyRBP.getValue(1)); + } else { + // 32-bit so no special extended frame, create or reuse an existing + // stack slot. + if (!X86FI->getSwiftAsyncContextFrameIdx()) + X86FI->setSwiftAsyncContextFrameIdx( + MF.getFrameInfo().CreateStackObject(4, Align(4), false)); + SDValue Result = + DAG.getFrameIndex(*X86FI->getSwiftAsyncContextFrameIdx(), MVT::i32); + // Return { result, chain }. + return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Result, + Op->getOperand(0)); + } + } + case llvm::Intrinsic::x86_seh_ehregnode: return MarkEHRegistrationNode(Op, DAG); case llvm::Intrinsic::x86_seh_ehguard: @@ -32901,20 +32961,39 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, case ISD::STRICT_FP_ROUND: case ISD::FP_ROUND: { bool IsStrict = N->isStrictFPOpcode(); + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); SDValue Src = N->getOperand(IsStrict ? 1 : 0); + SDValue Rnd = N->getOperand(IsStrict ? 2 : 1); + EVT SrcVT = Src.getValueType(); EVT VT = N->getValueType(0); - EVT NewVT = VT.getVectorElementType() == MVT::f16 ? MVT::v8f16 : MVT::v4f32; + SDValue V; if (VT == MVT::v2f16 && Src.getValueType() == MVT::v2f32) { SDValue Ext = IsStrict ? DAG.getConstantFP(0.0, dl, MVT::v2f32) : DAG.getUNDEF(MVT::v2f32); Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src, Ext); } + if (!Subtarget.hasFP16() && VT.getVectorElementType() == MVT::f16) { + assert(Subtarget.hasF16C() && "Cannot widen f16 without F16C"); + if (SrcVT.getVectorElementType() != MVT::f32) + return; + + if (IsStrict) + V = DAG.getNode(X86ISD::STRICT_CVTPS2PH, dl, {MVT::v8i16, MVT::Other}, + {Chain, Src, Rnd}); + else + V = DAG.getNode(X86ISD::CVTPS2PH, dl, MVT::v8i16, Src, Rnd); + + Results.push_back(DAG.getBitcast(MVT::v8f16, V)); + if (IsStrict) + Results.push_back(V.getValue(1)); + return; + } if (!isTypeLegal(Src.getValueType())) return; - SDValue V; + EVT NewVT = VT.getVectorElementType() == MVT::f16 ? MVT::v8f16 : MVT::v4f32; if (IsStrict) V = DAG.getNode(X86ISD::STRICT_VFPROUND, dl, {NewVT, MVT::Other}, - {N->getOperand(0), Src}); + {Chain, Src}); else V = DAG.getNode(X86ISD::VFPROUND, dl, NewVT, Src); Results.push_back(V); @@ -37342,6 +37421,7 @@ static bool matchBinaryShuffle(MVT MaskVT, ArrayRef<int> Mask, bool IsUnary) { unsigned NumMaskElts = Mask.size(); unsigned EltSizeInBits = MaskVT.getScalarSizeInBits(); + unsigned SizeInBits = MaskVT.getSizeInBits(); if (MaskVT.is128BitVector()) { if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0}, DAG) && @@ -37409,7 +37489,10 @@ static bool matchBinaryShuffle(MVT MaskVT, ArrayRef<int> Mask, // Attempt to match against a OR if we're performing a blend shuffle and the // non-blended source element is zero in each case. - if ((EltSizeInBits % V1.getScalarValueSizeInBits()) == 0 && + // TODO: Handle cases where V1/V2 sizes doesn't match SizeInBits. + if (SizeInBits == V1.getValueSizeInBits() && + SizeInBits == V2.getValueSizeInBits() && + (EltSizeInBits % V1.getScalarValueSizeInBits()) == 0 && (EltSizeInBits % V2.getScalarValueSizeInBits()) == 0) { bool IsBlend = true; unsigned NumV1Elts = V1.getValueType().getVectorNumElements(); @@ -39652,11 +39735,6 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG, SmallVector<int, 4> Mask; unsigned Opcode = N.getOpcode(); - // FIXME: Remove this after we support vector FP16 - if (isSoftFP16(peekThroughBitcasts(N.getOperand(0)).getSimpleValueType(), - Subtarget)) - return SDValue(); - if (SDValue R = combineCommutableSHUFP(N, VT, DL, DAG)) return R; @@ -40947,12 +41025,20 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode( EltBits)) { OpBits.clearAllBits(); OpElts.clearAllBits(); - for (int I = 0; I != NumElts; ++I) - if (DemandedElts[I] && ((Invert && !EltBits[I].isAllOnes()) || - (!Invert && !EltBits[I].isZero()))) { + for (int I = 0; I != NumElts; ++I) { + if (!DemandedElts[I]) + continue; + if (UndefElts[I]) { + // We can't assume an undef src element gives an undef dst - the + // other src might be zero. + OpBits.setAllBits(); + OpElts.setBit(I); + } else if ((Invert && !EltBits[I].isAllOnes()) || + (!Invert && !EltBits[I].isZero())) { OpBits |= Invert ? ~EltBits[I] : EltBits[I]; OpElts.setBit(I); } + } } return std::make_pair(OpBits, OpElts); }; @@ -44715,7 +44801,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, } // Early exit check - if (!TLI.isTypeLegal(VT)) + if (!TLI.isTypeLegal(VT) || isSoftFP16(VT, Subtarget)) return SDValue(); if (SDValue V = combineVSelectWithAllOnesOrZeros(N, DAG, DCI, Subtarget)) @@ -47798,11 +47884,17 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG, EltBits)) { DemandedBits.clearAllBits(); DemandedElts.clearAllBits(); - for (int I = 0; I != NumElts; ++I) - if (!EltBits[I].isZero()) { + for (int I = 0; I != NumElts; ++I) { + if (UndefElts[I]) { + // We can't assume an undef src element gives an undef dst - the + // other src might be zero. + DemandedBits.setAllBits(); + DemandedElts.setBit(I); + } else if (!EltBits[I].isZero()) { DemandedBits |= EltBits[I]; DemandedElts.setBit(I); } + } } return std::make_pair(DemandedBits, DemandedElts); }; @@ -51042,6 +51134,8 @@ static SDValue combineAndnp(SDNode *N, SelectionDAG &DAG, SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); MVT VT = N->getSimpleValueType(0); + int NumElts = VT.getVectorNumElements(); + unsigned EltSizeInBits = VT.getScalarSizeInBits(); // ANDNP(undef, x) -> 0 // ANDNP(x, undef) -> 0 @@ -51060,6 +51154,19 @@ static SDValue combineAndnp(SDNode *N, SelectionDAG &DAG, if (SDValue Not = IsNOT(N0, DAG)) return DAG.getNode(ISD::AND, SDLoc(N), VT, DAG.getBitcast(VT, Not), N1); + // Constant Folding + APInt Undefs0, Undefs1; + SmallVector<APInt> EltBits0, EltBits1; + if (getTargetConstantBitsFromNode(N0, EltSizeInBits, Undefs0, EltBits0) && + getTargetConstantBitsFromNode(N1, EltSizeInBits, Undefs1, EltBits1)) { + SDLoc DL(N); + SmallVector<APInt> ResultBits; + for (int I = 0; I != NumElts; ++I) + ResultBits.push_back(~EltBits0[I] & EltBits1[I]); + APInt ResultUndefs = APInt::getZero(NumElts); + return getConstVector(ResultBits, ResultUndefs, VT, DAG, DL); + } + // TODO: Constant fold NOT(N0) to allow us to use AND. // TODO: Do this in IsNOT with suitable oneuse checks? @@ -51074,20 +51181,24 @@ static SDValue combineAndnp(SDNode *N, SelectionDAG &DAG, auto GetDemandedMasks = [&](SDValue Op, bool Invert = false) { APInt UndefElts; SmallVector<APInt> EltBits; - int NumElts = VT.getVectorNumElements(); - int EltSizeInBits = VT.getScalarSizeInBits(); APInt DemandedBits = APInt::getAllOnes(EltSizeInBits); APInt DemandedElts = APInt::getAllOnes(NumElts); if (getTargetConstantBitsFromNode(Op, EltSizeInBits, UndefElts, EltBits)) { DemandedBits.clearAllBits(); DemandedElts.clearAllBits(); - for (int I = 0; I != NumElts; ++I) - if ((Invert && !EltBits[I].isAllOnes()) || - (!Invert && !EltBits[I].isZero())) { + for (int I = 0; I != NumElts; ++I) { + if (UndefElts[I]) { + // We can't assume an undef src element gives an undef dst - the + // other src might be zero. + DemandedBits.setAllBits(); + DemandedElts.setBit(I); + } else if ((Invert && !EltBits[I].isAllOnes()) || + (!Invert && !EltBits[I].isZero())) { DemandedBits |= Invert ? ~EltBits[I] : EltBits[I]; DemandedElts.setBit(I); } + } } return std::make_pair(DemandedBits, DemandedElts); }; @@ -54714,8 +54825,9 @@ static SDValue combineFP_ROUND(SDNode *N, SelectionDAG &DAG, if (Subtarget.hasFP16()) return SDValue(); + bool IsStrict = N->isStrictFPOpcode(); EVT VT = N->getValueType(0); - SDValue Src = N->getOperand(0); + SDValue Src = N->getOperand(IsStrict ? 1 : 0); EVT SrcVT = Src.getValueType(); if (!VT.isVector() || VT.getVectorElementType() != MVT::f16 || @@ -54736,8 +54848,15 @@ static SDValue combineFP_ROUND(SDNode *N, SelectionDAG &DAG, // Destination is v8i16 with at least 8 elements. EVT CvtVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16, std::max(8U, NumElts)); - SDValue Cvt = DAG.getNode(X86ISD::CVTPS2PH, dl, CvtVT, Src, - DAG.getTargetConstant(4, dl, MVT::i32)); + SDValue Cvt, Chain; + SDValue Rnd = DAG.getTargetConstant(4, dl, MVT::i32); + if (IsStrict) { + Cvt = DAG.getNode(X86ISD::STRICT_CVTPS2PH, dl, {CvtVT, MVT::Other}, + {N->getOperand(0), Src, Rnd}); + Chain = Cvt.getValue(1); + } else { + Cvt = DAG.getNode(X86ISD::CVTPS2PH, dl, CvtVT, Src, Rnd); + } // Extract down to real number of elements. if (NumElts < 8) { @@ -54746,7 +54865,12 @@ static SDValue combineFP_ROUND(SDNode *N, SelectionDAG &DAG, DAG.getIntPtrConstant(0, dl)); } - return DAG.getBitcast(VT, Cvt); + Cvt = DAG.getBitcast(VT, Cvt); + + if (IsStrict) + return DAG.getMergeValues({Cvt, Chain}, dl); + + return Cvt; } static SDValue combineMOVDQ2Q(SDNode *N, SelectionDAG &DAG) { @@ -54954,6 +55078,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::FP16_TO_FP: return combineFP16_TO_FP(N, DAG, Subtarget); case ISD::STRICT_FP_EXTEND: case ISD::FP_EXTEND: return combineFP_EXTEND(N, DAG, Subtarget); + case ISD::STRICT_FP_ROUND: case ISD::FP_ROUND: return combineFP_ROUND(N, DAG, Subtarget); case X86ISD::VBROADCAST_LOAD: case X86ISD::SUBV_BROADCAST_LOAD: return combineBROADCAST_LOAD(N, DAG, DCI); diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td index 48da7b3ac882..c105bde78ad1 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td @@ -3769,12 +3769,16 @@ let Predicates = [HasAVX512] in { (VMOVDQA64Zrm addr:$src)>; def : Pat<(alignedloadv32i16 addr:$src), (VMOVDQA64Zrm addr:$src)>; + def : Pat<(alignedloadv32f16 addr:$src), + (VMOVAPSZrm addr:$src)>; def : Pat<(alignedloadv64i8 addr:$src), (VMOVDQA64Zrm addr:$src)>; def : Pat<(loadv16i32 addr:$src), (VMOVDQU64Zrm addr:$src)>; def : Pat<(loadv32i16 addr:$src), (VMOVDQU64Zrm addr:$src)>; + def : Pat<(loadv32f16 addr:$src), + (VMOVUPSZrm addr:$src)>; def : Pat<(loadv64i8 addr:$src), (VMOVDQU64Zrm addr:$src)>; @@ -3783,12 +3787,16 @@ let Predicates = [HasAVX512] in { (VMOVDQA64Zmr addr:$dst, VR512:$src)>; def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst), (VMOVDQA64Zmr addr:$dst, VR512:$src)>; + def : Pat<(alignedstore (v32f16 VR512:$src), addr:$dst), + (VMOVAPSZmr addr:$dst, VR512:$src)>; def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst), (VMOVDQA64Zmr addr:$dst, VR512:$src)>; def : Pat<(store (v16i32 VR512:$src), addr:$dst), (VMOVDQU64Zmr addr:$dst, VR512:$src)>; def : Pat<(store (v32i16 VR512:$src), addr:$dst), (VMOVDQU64Zmr addr:$dst, VR512:$src)>; + def : Pat<(store (v32f16 VR512:$src), addr:$dst), + (VMOVUPSZmr addr:$dst, VR512:$src)>; def : Pat<(store (v64i8 VR512:$src), addr:$dst), (VMOVDQU64Zmr addr:$dst, VR512:$src)>; } @@ -3799,12 +3807,16 @@ let Predicates = [HasVLX] in { (VMOVDQA64Z128rm addr:$src)>; def : Pat<(alignedloadv8i16 addr:$src), (VMOVDQA64Z128rm addr:$src)>; + def : Pat<(alignedloadv8f16 addr:$src), + (VMOVAPSZ128rm addr:$src)>; def : Pat<(alignedloadv16i8 addr:$src), (VMOVDQA64Z128rm addr:$src)>; def : Pat<(loadv4i32 addr:$src), (VMOVDQU64Z128rm addr:$src)>; def : Pat<(loadv8i16 addr:$src), (VMOVDQU64Z128rm addr:$src)>; + def : Pat<(loadv8f16 addr:$src), + (VMOVUPSZ128rm addr:$src)>; def : Pat<(loadv16i8 addr:$src), (VMOVDQU64Z128rm addr:$src)>; @@ -3813,12 +3825,16 @@ let Predicates = [HasVLX] in { (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>; def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst), (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>; + def : Pat<(alignedstore (v8f16 VR128X:$src), addr:$dst), + (VMOVAPSZ128mr addr:$dst, VR128X:$src)>; def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst), (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>; def : Pat<(store (v4i32 VR128X:$src), addr:$dst), (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>; def : Pat<(store (v8i16 VR128X:$src), addr:$dst), (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>; + def : Pat<(store (v8f16 VR128X:$src), addr:$dst), + (VMOVUPSZ128mr addr:$dst, VR128X:$src)>; def : Pat<(store (v16i8 VR128X:$src), addr:$dst), (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>; @@ -3827,12 +3843,16 @@ let Predicates = [HasVLX] in { (VMOVDQA64Z256rm addr:$src)>; def : Pat<(alignedloadv16i16 addr:$src), (VMOVDQA64Z256rm addr:$src)>; + def : Pat<(alignedloadv16f16 addr:$src), + (VMOVAPSZ256rm addr:$src)>; def : Pat<(alignedloadv32i8 addr:$src), (VMOVDQA64Z256rm addr:$src)>; def : Pat<(loadv8i32 addr:$src), (VMOVDQU64Z256rm addr:$src)>; def : Pat<(loadv16i16 addr:$src), (VMOVDQU64Z256rm addr:$src)>; + def : Pat<(loadv16f16 addr:$src), + (VMOVUPSZ256rm addr:$src)>; def : Pat<(loadv32i8 addr:$src), (VMOVDQU64Z256rm addr:$src)>; @@ -3841,12 +3861,16 @@ let Predicates = [HasVLX] in { (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>; def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst), (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>; + def : Pat<(alignedstore (v16f16 VR256X:$src), addr:$dst), + (VMOVAPSZ256mr addr:$dst, VR256X:$src)>; def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst), (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>; def : Pat<(store (v8i32 VR256X:$src), addr:$dst), (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>; def : Pat<(store (v16i16 VR256X:$src), addr:$dst), (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>; + def : Pat<(store (v16f16 VR256X:$src), addr:$dst), + (VMOVUPSZ256mr addr:$dst, VR256X:$src)>; def : Pat<(store (v32i8 VR256X:$src), addr:$dst), (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>; } @@ -3855,16 +3879,12 @@ let Predicates = [HasBWI] in { (VMOVDQU16Zrrk VR512:$src0, VK32WM:$mask, VR512:$src1)>; def : Pat<(v32f16 (vselect VK32WM:$mask, (v32f16 VR512:$src1), v32f16_info.ImmAllZerosV)), (VMOVDQU16Zrrkz VK32WM:$mask, VR512:$src1)>; - def : Pat<(v32f16 (alignedloadv32f16 addr:$src)), - (VMOVAPSZrm addr:$src)>; def : Pat<(v32f16 (vselect VK32WM:$mask, (v32f16 (alignedloadv32f16 addr:$src)), (v32f16 VR512:$src0))), (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>; def : Pat<(v32f16 (vselect VK32WM:$mask, (v32f16 (alignedloadv32f16 addr:$src)), v32f16_info.ImmAllZerosV)), (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>; - def : Pat<(v32f16 (loadv32f16 addr:$src)), - (VMOVUPSZrm addr:$src)>; def : Pat<(v32f16 (vselect VK32WM:$mask, (v32f16 (loadv32f16 addr:$src)), (v32f16 VR512:$src0))), (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>; @@ -3878,10 +3898,6 @@ let Predicates = [HasBWI] in { def : Pat<(v32f16 (masked_load addr:$src, VK32WM:$mask, v32f16_info.ImmAllZerosV)), (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>; - def : Pat<(alignedstore (v32f16 VR512:$src), addr:$dst), - (VMOVAPSZmr addr:$dst, VR512:$src)>; - def : Pat<(store (v32f16 VR512:$src), addr:$dst), - (VMOVUPSZmr addr:$dst, VR512:$src)>; def : Pat<(masked_store (v32f16 VR512:$src), addr:$dst, VK32WM:$mask), (VMOVDQU16Zmrk addr:$dst, VK32WM:$mask, VR512:$src)>; } @@ -3890,16 +3906,12 @@ let Predicates = [HasBWI, HasVLX] in { (VMOVDQU16Z256rrk VR256X:$src0, VK16WM:$mask, VR256X:$src1)>; def : Pat<(v16f16 (vselect VK16WM:$mask, (v16f16 VR256X:$src1), v16f16x_info.ImmAllZerosV)), (VMOVDQU16Z256rrkz VK16WM:$mask, VR256X:$src1)>; - def : Pat<(v16f16 (alignedloadv16f16 addr:$src)), - (VMOVAPSZ256rm addr:$src)>; def : Pat<(v16f16 (vselect VK16WM:$mask, (v16f16 (alignedloadv16f16 addr:$src)), (v16f16 VR256X:$src0))), (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>; def : Pat<(v16f16 (vselect VK16WM:$mask, (v16f16 (alignedloadv16f16 addr:$src)), v16f16x_info.ImmAllZerosV)), (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>; - def : Pat<(v16f16 (loadv16f16 addr:$src)), - (VMOVUPSZ256rm addr:$src)>; def : Pat<(v16f16 (vselect VK16WM:$mask, (v16f16 (loadv16f16 addr:$src)), (v16f16 VR256X:$src0))), (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>; @@ -3913,10 +3925,6 @@ let Predicates = [HasBWI, HasVLX] in { def : Pat<(v16f16 (masked_load addr:$src, VK16WM:$mask, v16f16x_info.ImmAllZerosV)), (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>; - def : Pat<(alignedstore (v16f16 VR256X:$src), addr:$dst), - (VMOVAPSZ256mr addr:$dst, VR256X:$src)>; - def : Pat<(store (v16f16 VR256X:$src), addr:$dst), - (VMOVUPSZ256mr addr:$dst, VR256X:$src)>; def : Pat<(masked_store (v16f16 VR256X:$src), addr:$dst, VK16WM:$mask), (VMOVDQU16Z256mrk addr:$dst, VK16WM:$mask, VR256X:$src)>; @@ -3924,16 +3932,12 @@ let Predicates = [HasBWI, HasVLX] in { (VMOVDQU16Z128rrk VR128X:$src0, VK8WM:$mask, VR128X:$src1)>; def : Pat<(v8f16 (vselect VK8WM:$mask, (v8f16 VR128X:$src1), v8f16x_info.ImmAllZerosV)), (VMOVDQU16Z128rrkz VK8WM:$mask, VR128X:$src1)>; - def : Pat<(v8f16 (alignedloadv8f16 addr:$src)), - (VMOVAPSZ128rm addr:$src)>; def : Pat<(v8f16 (vselect VK8WM:$mask, (v8f16 (alignedloadv8f16 addr:$src)), (v8f16 VR128X:$src0))), (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>; def : Pat<(v8f16 (vselect VK8WM:$mask, (v8f16 (alignedloadv8f16 addr:$src)), v8f16x_info.ImmAllZerosV)), (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>; - def : Pat<(v8f16 (loadv8f16 addr:$src)), - (VMOVUPSZ128rm addr:$src)>; def : Pat<(v8f16 (vselect VK8WM:$mask, (v8f16 (loadv8f16 addr:$src)), (v8f16 VR128X:$src0))), (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>; @@ -3947,10 +3951,6 @@ let Predicates = [HasBWI, HasVLX] in { def : Pat<(v8f16 (masked_load addr:$src, VK8WM:$mask, v8f16x_info.ImmAllZerosV)), (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>; - def : Pat<(alignedstore (v8f16 VR128X:$src), addr:$dst), - (VMOVAPSZ128mr addr:$dst, VR128X:$src)>; - def : Pat<(store (v8f16 VR128X:$src), addr:$dst), - (VMOVUPSZ128mr addr:$dst, VR128X:$src)>; def : Pat<(masked_store (v8f16 VR128X:$src), addr:$dst, VK8WM:$mask), (VMOVDQU16Z128mrk addr:$dst, VK8WM:$mask, VR128X:$src)>; } diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.cpp index ec32ac2acad1..74ef831e1658 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -742,8 +742,8 @@ static bool regIsPICBase(Register BaseReg, const MachineRegisterInfo &MRI) { return isPICBase; } -bool X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI, - AAResults *AA) const { +bool X86InstrInfo::isReallyTriviallyReMaterializable( + const MachineInstr &MI) const { switch (MI.getOpcode()) { default: // This function should only be called for opcodes with the ReMaterializable @@ -869,7 +869,7 @@ bool X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI, MI.getOperand(1 + X86::AddrScaleAmt).isImm() && MI.getOperand(1 + X86::AddrIndexReg).isReg() && MI.getOperand(1 + X86::AddrIndexReg).getReg() == 0 && - MI.isDereferenceableInvariantLoad(AA)) { + MI.isDereferenceableInvariantLoad()) { Register BaseReg = MI.getOperand(1 + X86::AddrBaseReg).getReg(); if (BaseReg == 0 || BaseReg == X86::RIP) return true; @@ -3892,6 +3892,10 @@ void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, Register DestReg, int FrameIdx, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { + const MachineFunction &MF = *MBB.getParent(); + const MachineFrameInfo &MFI = MF.getFrameInfo(); + assert(MFI.getObjectSize(FrameIdx) >= TRI->getSpillSize(*RC) && + "Load size exceeds stack slot"); if (RC->getID() == X86::TILERegClassID) { unsigned Opc = X86::TILELOADD; // tileloadd (%sp, %idx), %tmm @@ -3913,8 +3917,6 @@ void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, addFrameReference(BuildMI(MBB, MI, DebugLoc(), get(Opc), DestReg), FrameIdx); } else { - const MachineFunction &MF = *MBB.getParent(); - const MachineFrameInfo &MFI = MF.getFrameInfo(); unsigned Alignment = std::max<uint32_t>(TRI->getSpillSize(*RC), 16); bool isAligned = (Subtarget.getFrameLowering()->getStackAlign() >= Alignment) || diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.h b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.h index 4943d2152fd2..98da00c39bdb 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.h +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.h @@ -240,8 +240,7 @@ public: unsigned isStoreToStackSlotPostFE(const MachineInstr &MI, int &FrameIndex) const override; - bool isReallyTriviallyReMaterializable(const MachineInstr &MI, - AAResults *AA) const override; + bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override; void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig, diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSSE.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSSE.td index 06cb280e860a..c5557bd5df4e 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSSE.td +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrSSE.td @@ -140,6 +140,7 @@ def V_SET0 : I<0, Pseudo, (outs VR128:$dst), (ins), "", let Predicates = [NoAVX512] in { def : Pat<(v16i8 immAllZerosV), (V_SET0)>; def : Pat<(v8i16 immAllZerosV), (V_SET0)>; +def : Pat<(v8f16 immAllZerosV), (V_SET0)>; def : Pat<(v4i32 immAllZerosV), (V_SET0)>; def : Pat<(v2i64 immAllZerosV), (V_SET0)>; def : Pat<(v2f64 immAllZerosV), (V_SET0)>; @@ -159,6 +160,7 @@ def AVX_SET0 : I<0, Pseudo, (outs VR256:$dst), (ins), "", let Predicates = [NoAVX512] in { def : Pat<(v32i8 immAllZerosV), (AVX_SET0)>; def : Pat<(v16i16 immAllZerosV), (AVX_SET0)>; +def : Pat<(v16f16 immAllZerosV), (AVX_SET0)>; def : Pat<(v4i64 immAllZerosV), (AVX_SET0)>; def : Pat<(v8f32 immAllZerosV), (AVX_SET0)>; def : Pat<(v4f64 immAllZerosV), (AVX_SET0)>; @@ -572,6 +574,23 @@ let Predicates = [HasAVX, NoVLX] in { (VMOVUPSYmr addr:$dst, VR256:$src)>; def : Pat<(store (v32i8 VR256:$src), addr:$dst), (VMOVUPSYmr addr:$dst, VR256:$src)>; + + def : Pat<(alignedloadv8f16 addr:$src), + (VMOVAPSrm addr:$src)>; + def : Pat<(loadv8f16 addr:$src), + (VMOVUPSrm addr:$src)>; + def : Pat<(alignedstore (v8f16 VR128:$src), addr:$dst), + (VMOVAPSmr addr:$dst, VR128:$src)>; + def : Pat<(store (v8f16 VR128:$src), addr:$dst), + (VMOVUPSmr addr:$dst, VR128:$src)>; + def : Pat<(alignedloadv16f16 addr:$src), + (VMOVAPSYrm addr:$src)>; + def : Pat<(loadv16f16 addr:$src), + (VMOVUPSYrm addr:$src)>; + def : Pat<(alignedstore (v16f16 VR256:$src), addr:$dst), + (VMOVAPSYmr addr:$dst, VR256:$src)>; + def : Pat<(store (v16f16 VR256:$src), addr:$dst), + (VMOVUPSYmr addr:$dst, VR256:$src)>; } // Use movaps / movups for SSE integer load / store (one byte shorter). @@ -613,6 +632,17 @@ let Predicates = [UseSSE1] in { (MOVUPSmr addr:$dst, VR128:$src)>; } +let Predicates = [UseSSE2] in { + def : Pat<(alignedloadv8f16 addr:$src), + (MOVAPSrm addr:$src)>; + def : Pat<(loadv8f16 addr:$src), + (MOVUPSrm addr:$src)>; + def : Pat<(alignedstore (v8f16 VR128:$src), addr:$dst), + (MOVAPSmr addr:$dst, VR128:$src)>; + def : Pat<(store (v8f16 VR128:$src), addr:$dst), + (MOVUPSmr addr:$dst, VR128:$src)>; +} + //===----------------------------------------------------------------------===// // SSE 1 & 2 - Move Low packed FP Instructions //===----------------------------------------------------------------------===// @@ -3136,6 +3166,8 @@ let Predicates = [HasAVX, NoVLX] in { (VMOVNTDQYmr addr:$dst, VR256:$src)>; def : Pat<(alignednontemporalstore (v16i16 VR256:$src), addr:$dst), (VMOVNTDQYmr addr:$dst, VR256:$src)>; + def : Pat<(alignednontemporalstore (v16f16 VR256:$src), addr:$dst), + (VMOVNTDQYmr addr:$dst, VR256:$src)>; def : Pat<(alignednontemporalstore (v32i8 VR256:$src), addr:$dst), (VMOVNTDQYmr addr:$dst, VR256:$src)>; @@ -3143,6 +3175,8 @@ let Predicates = [HasAVX, NoVLX] in { (VMOVNTDQmr addr:$dst, VR128:$src)>; def : Pat<(alignednontemporalstore (v8i16 VR128:$src), addr:$dst), (VMOVNTDQmr addr:$dst, VR128:$src)>; + def : Pat<(alignednontemporalstore (v8f16 VR128:$src), addr:$dst), + (VMOVNTDQmr addr:$dst, VR128:$src)>; def : Pat<(alignednontemporalstore (v16i8 VR128:$src), addr:$dst), (VMOVNTDQmr addr:$dst, VR128:$src)>; } @@ -3152,6 +3186,8 @@ let Predicates = [UseSSE2] in { (MOVNTDQmr addr:$dst, VR128:$src)>; def : Pat<(alignednontemporalstore (v8i16 VR128:$src), addr:$dst), (MOVNTDQmr addr:$dst, VR128:$src)>; + def : Pat<(alignednontemporalstore (v8f16 VR128:$src), addr:$dst), + (MOVNTDQmr addr:$dst, VR128:$src)>; def : Pat<(alignednontemporalstore (v16i8 VR128:$src), addr:$dst), (MOVNTDQmr addr:$dst, VR128:$src)>; } @@ -3374,12 +3410,16 @@ let Predicates = [HasAVX, NoVLX] in { (VMOVDQArm addr:$src)>; def : Pat<(alignedloadv8i16 addr:$src), (VMOVDQArm addr:$src)>; + def : Pat<(alignedloadv8f16 addr:$src), + (VMOVDQArm addr:$src)>; def : Pat<(alignedloadv16i8 addr:$src), (VMOVDQArm addr:$src)>; def : Pat<(loadv4i32 addr:$src), (VMOVDQUrm addr:$src)>; def : Pat<(loadv8i16 addr:$src), (VMOVDQUrm addr:$src)>; + def : Pat<(loadv8f16 addr:$src), + (VMOVDQUrm addr:$src)>; def : Pat<(loadv16i8 addr:$src), (VMOVDQUrm addr:$src)>; @@ -3387,12 +3427,16 @@ let Predicates = [HasAVX, NoVLX] in { (VMOVDQAmr addr:$dst, VR128:$src)>; def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst), (VMOVDQAmr addr:$dst, VR128:$src)>; + def : Pat<(alignedstore (v8f16 VR128:$src), addr:$dst), + (VMOVDQAmr addr:$dst, VR128:$src)>; def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst), (VMOVDQAmr addr:$dst, VR128:$src)>; def : Pat<(store (v4i32 VR128:$src), addr:$dst), (VMOVDQUmr addr:$dst, VR128:$src)>; def : Pat<(store (v8i16 VR128:$src), addr:$dst), (VMOVDQUmr addr:$dst, VR128:$src)>; + def : Pat<(store (v8f16 VR128:$src), addr:$dst), + (VMOVDQUmr addr:$dst, VR128:$src)>; def : Pat<(store (v16i8 VR128:$src), addr:$dst), (VMOVDQUmr addr:$dst, VR128:$src)>; } @@ -6431,6 +6475,8 @@ let Predicates = [HasAVX2, NoVLX] in { (VMOVNTDQAYrm addr:$src)>; def : Pat<(v16i16 (alignednontemporalload addr:$src)), (VMOVNTDQAYrm addr:$src)>; + def : Pat<(v16f16 (alignednontemporalload addr:$src)), + (VMOVNTDQAYrm addr:$src)>; def : Pat<(v32i8 (alignednontemporalload addr:$src)), (VMOVNTDQAYrm addr:$src)>; } @@ -6446,6 +6492,8 @@ let Predicates = [HasAVX, NoVLX] in { (VMOVNTDQArm addr:$src)>; def : Pat<(v8i16 (alignednontemporalload addr:$src)), (VMOVNTDQArm addr:$src)>; + def : Pat<(v8f16 (alignednontemporalload addr:$src)), + (VMOVNTDQArm addr:$src)>; def : Pat<(v16i8 (alignednontemporalload addr:$src)), (VMOVNTDQArm addr:$src)>; } @@ -6461,6 +6509,8 @@ let Predicates = [UseSSE41] in { (MOVNTDQArm addr:$src)>; def : Pat<(v8i16 (alignednontemporalload addr:$src)), (MOVNTDQArm addr:$src)>; + def : Pat<(v8f16 (alignednontemporalload addr:$src)), + (MOVNTDQArm addr:$src)>; def : Pat<(v16i8 (alignednontemporalload addr:$src)), (MOVNTDQArm addr:$src)>; } @@ -7050,6 +7100,8 @@ def : Pat<(v8i32 (X86SubVBroadcastld128 addr:$src)), (VBROADCASTF128 addr:$src)>; def : Pat<(v16i16 (X86SubVBroadcastld128 addr:$src)), (VBROADCASTF128 addr:$src)>; +def : Pat<(v16f16 (X86SubVBroadcastld128 addr:$src)), + (VBROADCASTF128 addr:$src)>; def : Pat<(v32i8 (X86SubVBroadcastld128 addr:$src)), (VBROADCASTF128 addr:$src)>; } @@ -7095,6 +7147,7 @@ let Predicates = [HasAVX1Only] in { defm : vperm2x128_lowering<"VPERM2F128", v4i64, loadv4i64>; defm : vperm2x128_lowering<"VPERM2F128", v8i32, loadv8i32>; defm : vperm2x128_lowering<"VPERM2F128", v16i16, loadv16i16>; + defm : vperm2x128_lowering<"VPERM2F128", v16f16, loadv16f16>; defm : vperm2x128_lowering<"VPERM2F128", v32i8, loadv32i8>; } @@ -7150,6 +7203,8 @@ let Predicates = [HasAVX1Only] in { defm : vinsert_lowering<"VINSERTF128", "VPERM2F128", v2i64, v4i64, loadv2i64, loadv4i64>; defm : vinsert_lowering<"VINSERTF128", "VPERM2F128", v4i32, v8i32, loadv4i32, loadv8i32>; defm : vinsert_lowering<"VINSERTF128", "VPERM2F128", v8i16, v16i16, loadv8i16, loadv16i16>; + defm : vinsert_lowering<"VINSERTF128", "VPERM2F128", v8f16, v16f16, loadv8f16, loadv16f16>; + defm : vinsert_lowering<"VINSERTF128", "VPERM2F128", v16i8, v32i8, loadv16i8, loadv32i8>; defm : vinsert_lowering<"VINSERTF128", "VPERM2F128", v16i8, v32i8, loadv16i8, loadv32i8>; } @@ -7189,6 +7244,8 @@ let Predicates = [HasAVX1Only] in { defm : vextract_lowering<"VEXTRACTF128", v4i64, v2i64>; defm : vextract_lowering<"VEXTRACTF128", v8i32, v4i32>; defm : vextract_lowering<"VEXTRACTF128", v16i16, v8i16>; + defm : vextract_lowering<"VEXTRACTF128", v16f16, v8f16>; + defm : vextract_lowering<"VEXTRACTF128", v32i8, v16i8>; defm : vextract_lowering<"VEXTRACTF128", v32i8, v16i8>; } @@ -7503,6 +7560,10 @@ def : Pat<(insert_subvector (v16i16 VR256:$src1), (v8i16 VR128:$src2), (iPTR 0)) (VBLENDPSYrri VR256:$src1, (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128:$src2, sub_xmm), 0xf)>; +def : Pat<(insert_subvector (v16f16 VR256:$src1), (v8f16 VR128:$src2), (iPTR 0)), + (VBLENDPSYrri VR256:$src1, + (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), + VR128:$src2, sub_xmm), 0xf)>; def : Pat<(insert_subvector (v32i8 VR256:$src1), (v16i8 VR128:$src2), (iPTR 0)), (VBLENDPSYrri VR256:$src1, (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), @@ -7517,6 +7578,9 @@ def : Pat<(insert_subvector (loadv4i64 addr:$src2), (v2i64 VR128:$src1), (iPTR 0 def : Pat<(insert_subvector (loadv16i16 addr:$src2), (v8i16 VR128:$src1), (iPTR 0)), (VBLENDPSYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128:$src1, sub_xmm), addr:$src2, 0xf0)>; +def : Pat<(insert_subvector (loadv16f16 addr:$src2), (v8f16 VR128:$src1), (iPTR 0)), + (VBLENDPSYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), + VR128:$src1, sub_xmm), addr:$src2, 0xf0)>; def : Pat<(insert_subvector (loadv32i8 addr:$src2), (v16i8 VR128:$src1), (iPTR 0)), (VBLENDPSYrmi (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128:$src1, sub_xmm), addr:$src2, 0xf0)>; @@ -7759,6 +7823,8 @@ let Predicates = [HasAVX2] in { defm : vperm2x128_lowering<"VPERM2I128", v4i64, loadv4i64>; defm : vperm2x128_lowering<"VPERM2I128", v8i32, loadv8i32>; defm : vperm2x128_lowering<"VPERM2I128", v16i16, loadv16i16>; + defm : vperm2x128_lowering<"VPERM2I128", v16f16, loadv16f16>; + defm : vperm2x128_lowering<"VPERM2I128", v32i8, loadv32i8>; defm : vperm2x128_lowering<"VPERM2I128", v32i8, loadv32i8>; } @@ -7781,6 +7847,8 @@ let Predicates = [HasAVX2, NoVLX] in { defm : vinsert_lowering<"VINSERTI128", "VPERM2I128", v2i64, v4i64, loadv2i64, loadv4i64>; defm : vinsert_lowering<"VINSERTI128", "VPERM2I128", v4i32, v8i32, loadv4i32, loadv8i32>; defm : vinsert_lowering<"VINSERTI128", "VPERM2I128", v8i16, v16i16, loadv8i16, loadv16i16>; + defm : vinsert_lowering<"VINSERTI128", "VPERM2I128", v8f16, v16f16, loadv8f16, loadv16f16>; + defm : vinsert_lowering<"VINSERTI128", "VPERM2I128", v16i8, v32i8, loadv16i8, loadv32i8>; defm : vinsert_lowering<"VINSERTI128", "VPERM2I128", v16i8, v32i8, loadv16i8, loadv32i8>; } @@ -7801,6 +7869,8 @@ let Predicates = [HasAVX2, NoVLX] in { defm : vextract_lowering<"VEXTRACTI128", v4i64, v2i64>; defm : vextract_lowering<"VEXTRACTI128", v8i32, v4i32>; defm : vextract_lowering<"VEXTRACTI128", v16i16, v8i16>; + defm : vextract_lowering<"VEXTRACTI128", v16f16, v8f16>; + defm : vextract_lowering<"VEXTRACTI128", v32i8, v16i8>; defm : vextract_lowering<"VEXTRACTI128", v32i8, v16i8>; } diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86TargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86TargetMachine.cpp index f4e25e4194db..1de2a1725954 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86TargetMachine.cpp +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86TargetMachine.cpp @@ -254,8 +254,12 @@ X86TargetMachine::getSubtargetImpl(const Function &F) const { StringRef CPU = CPUAttr.isValid() ? CPUAttr.getValueAsString() : (StringRef)TargetCPU; - StringRef TuneCPU = - TuneAttr.isValid() ? TuneAttr.getValueAsString() : (StringRef)CPU; + // "x86-64" is a default target setting for many front ends. In these cases, + // they actually request for "generic" tuning unless the "tune-cpu" was + // specified. + StringRef TuneCPU = TuneAttr.isValid() ? TuneAttr.getValueAsString() + : CPU == "x86-64" ? "generic" + : (StringRef)CPU; StringRef FS = FSAttr.isValid() ? FSAttr.getValueAsString() : (StringRef)TargetFS; diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index b36f8a3d06d0..b27aac9c4e93 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -1297,29 +1297,6 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, LT.first = NumOfDests * NumOfShufflesPerDest; } - static const CostTblEntry AVX512FP16ShuffleTbl[] = { - {TTI::SK_Broadcast, MVT::v32f16, 1}, // vpbroadcastw - {TTI::SK_Broadcast, MVT::v16f16, 1}, // vpbroadcastw - {TTI::SK_Broadcast, MVT::v8f16, 1}, // vpbroadcastw - - {TTI::SK_Reverse, MVT::v32f16, 2}, // vpermw - {TTI::SK_Reverse, MVT::v16f16, 2}, // vpermw - {TTI::SK_Reverse, MVT::v8f16, 1}, // vpshufb - - {TTI::SK_PermuteSingleSrc, MVT::v32f16, 2}, // vpermw - {TTI::SK_PermuteSingleSrc, MVT::v16f16, 2}, // vpermw - {TTI::SK_PermuteSingleSrc, MVT::v8f16, 1}, // vpshufb - - {TTI::SK_PermuteTwoSrc, MVT::v32f16, 2}, // vpermt2w - {TTI::SK_PermuteTwoSrc, MVT::v16f16, 2}, // vpermt2w - {TTI::SK_PermuteTwoSrc, MVT::v8f16, 2} // vpermt2w - }; - - if (!ST->useSoftFloat() && ST->hasFP16()) - if (const auto *Entry = - CostTableLookup(AVX512FP16ShuffleTbl, Kind, LT.second)) - return LT.first * Entry->Cost; - static const CostTblEntry AVX512VBMIShuffleTbl[] = { {TTI::SK_Reverse, MVT::v64i8, 1}, // vpermb {TTI::SK_Reverse, MVT::v32i8, 1}, // vpermb @@ -1339,17 +1316,22 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, static const CostTblEntry AVX512BWShuffleTbl[] = { {TTI::SK_Broadcast, MVT::v32i16, 1}, // vpbroadcastw + {TTI::SK_Broadcast, MVT::v32f16, 1}, // vpbroadcastw {TTI::SK_Broadcast, MVT::v64i8, 1}, // vpbroadcastb {TTI::SK_Reverse, MVT::v32i16, 2}, // vpermw + {TTI::SK_Reverse, MVT::v32f16, 2}, // vpermw {TTI::SK_Reverse, MVT::v16i16, 2}, // vpermw {TTI::SK_Reverse, MVT::v64i8, 2}, // pshufb + vshufi64x2 {TTI::SK_PermuteSingleSrc, MVT::v32i16, 2}, // vpermw + {TTI::SK_PermuteSingleSrc, MVT::v32f16, 2}, // vpermw {TTI::SK_PermuteSingleSrc, MVT::v16i16, 2}, // vpermw + {TTI::SK_PermuteSingleSrc, MVT::v16f16, 2}, // vpermw {TTI::SK_PermuteSingleSrc, MVT::v64i8, 8}, // extend to v32i16 {TTI::SK_PermuteTwoSrc, MVT::v32i16, 2}, // vpermt2w + {TTI::SK_PermuteTwoSrc, MVT::v32f16, 2}, // vpermt2w {TTI::SK_PermuteTwoSrc, MVT::v16i16, 2}, // vpermt2w {TTI::SK_PermuteTwoSrc, MVT::v8i16, 2}, // vpermt2w {TTI::SK_PermuteTwoSrc, MVT::v64i8, 19}, // 6 * v32i8 + 1 @@ -1369,6 +1351,7 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, {TTI::SK_Broadcast, MVT::v8i64, 1}, // vpbroadcastq {TTI::SK_Broadcast, MVT::v16i32, 1}, // vpbroadcastd {TTI::SK_Broadcast, MVT::v32i16, 1}, // vpbroadcastw + {TTI::SK_Broadcast, MVT::v32f16, 1}, // vpbroadcastw {TTI::SK_Broadcast, MVT::v64i8, 1}, // vpbroadcastb {TTI::SK_Reverse, MVT::v8f64, 1}, // vpermpd @@ -1376,6 +1359,7 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, {TTI::SK_Reverse, MVT::v8i64, 1}, // vpermq {TTI::SK_Reverse, MVT::v16i32, 1}, // vpermd {TTI::SK_Reverse, MVT::v32i16, 7}, // per mca + {TTI::SK_Reverse, MVT::v32f16, 7}, // per mca {TTI::SK_Reverse, MVT::v64i8, 7}, // per mca {TTI::SK_PermuteSingleSrc, MVT::v8f64, 1}, // vpermpd @@ -1408,11 +1392,14 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, // FIXME: This just applies the type legalization cost rules above // assuming these completely split. {TTI::SK_PermuteSingleSrc, MVT::v32i16, 14}, + {TTI::SK_PermuteSingleSrc, MVT::v32f16, 14}, {TTI::SK_PermuteSingleSrc, MVT::v64i8, 14}, {TTI::SK_PermuteTwoSrc, MVT::v32i16, 42}, + {TTI::SK_PermuteTwoSrc, MVT::v32f16, 42}, {TTI::SK_PermuteTwoSrc, MVT::v64i8, 42}, {TTI::SK_Select, MVT::v32i16, 1}, // vpternlogq + {TTI::SK_Select, MVT::v32f16, 1}, // vpternlogq {TTI::SK_Select, MVT::v64i8, 1}, // vpternlogq {TTI::SK_Select, MVT::v8f64, 1}, // vblendmpd {TTI::SK_Select, MVT::v16f32, 1}, // vblendmps @@ -1430,6 +1417,7 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, {TTI::SK_Broadcast, MVT::v4i64, 1}, // vpbroadcastq {TTI::SK_Broadcast, MVT::v8i32, 1}, // vpbroadcastd {TTI::SK_Broadcast, MVT::v16i16, 1}, // vpbroadcastw + {TTI::SK_Broadcast, MVT::v16f16, 1}, // vpbroadcastw {TTI::SK_Broadcast, MVT::v32i8, 1}, // vpbroadcastb {TTI::SK_Reverse, MVT::v4f64, 1}, // vpermpd @@ -1437,9 +1425,11 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, {TTI::SK_Reverse, MVT::v4i64, 1}, // vpermq {TTI::SK_Reverse, MVT::v8i32, 1}, // vpermd {TTI::SK_Reverse, MVT::v16i16, 2}, // vperm2i128 + pshufb + {TTI::SK_Reverse, MVT::v16f16, 2}, // vperm2i128 + pshufb {TTI::SK_Reverse, MVT::v32i8, 2}, // vperm2i128 + pshufb {TTI::SK_Select, MVT::v16i16, 1}, // vpblendvb + {TTI::SK_Select, MVT::v16f16, 1}, // vpblendvb {TTI::SK_Select, MVT::v32i8, 1}, // vpblendvb {TTI::SK_PermuteSingleSrc, MVT::v4f64, 1}, // vpermpd @@ -1448,6 +1438,8 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, {TTI::SK_PermuteSingleSrc, MVT::v8i32, 1}, // vpermd {TTI::SK_PermuteSingleSrc, MVT::v16i16, 4}, // vperm2i128 + 2*vpshufb // + vpblendvb + {TTI::SK_PermuteSingleSrc, MVT::v16f16, 4}, // vperm2i128 + 2*vpshufb + // + vpblendvb {TTI::SK_PermuteSingleSrc, MVT::v32i8, 4}, // vperm2i128 + 2*vpshufb // + vpblendvb @@ -1457,6 +1449,8 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, {TTI::SK_PermuteTwoSrc, MVT::v8i32, 3}, // 2*vpermd + vpblendd {TTI::SK_PermuteTwoSrc, MVT::v16i16, 7}, // 2*vperm2i128 + 4*vpshufb // + vpblendvb + {TTI::SK_PermuteTwoSrc, MVT::v16f16, 7}, // 2*vperm2i128 + 4*vpshufb + // + vpblendvb {TTI::SK_PermuteTwoSrc, MVT::v32i8, 7}, // 2*vperm2i128 + 4*vpshufb // + vpblendvb }; @@ -1493,6 +1487,7 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, {TTI::SK_Broadcast, MVT::v4i64, 2}, // vperm2f128 + vpermilpd {TTI::SK_Broadcast, MVT::v8i32, 2}, // vperm2f128 + vpermilps {TTI::SK_Broadcast, MVT::v16i16, 3}, // vpshuflw + vpshufd + vinsertf128 + {TTI::SK_Broadcast, MVT::v16f16, 3}, // vpshuflw + vpshufd + vinsertf128 {TTI::SK_Broadcast, MVT::v32i8, 2}, // vpshufb + vinsertf128 {TTI::SK_Reverse, MVT::v4f64, 2}, // vperm2f128 + vpermilpd @@ -1501,6 +1496,8 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, {TTI::SK_Reverse, MVT::v8i32, 2}, // vperm2f128 + vpermilps {TTI::SK_Reverse, MVT::v16i16, 4}, // vextractf128 + 2*pshufb // + vinsertf128 + {TTI::SK_Reverse, MVT::v16f16, 4}, // vextractf128 + 2*pshufb + // + vinsertf128 {TTI::SK_Reverse, MVT::v32i8, 4}, // vextractf128 + 2*pshufb // + vinsertf128 @@ -1509,6 +1506,7 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, {TTI::SK_Select, MVT::v8i32, 1}, // vblendps {TTI::SK_Select, MVT::v8f32, 1}, // vblendps {TTI::SK_Select, MVT::v16i16, 3}, // vpand + vpandn + vpor + {TTI::SK_Select, MVT::v16f16, 3}, // vpand + vpandn + vpor {TTI::SK_Select, MVT::v32i8, 3}, // vpand + vpandn + vpor {TTI::SK_PermuteSingleSrc, MVT::v4f64, 2}, // vperm2f128 + vshufpd @@ -1517,6 +1515,8 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, {TTI::SK_PermuteSingleSrc, MVT::v8i32, 4}, // 2*vperm2f128 + 2*vshufps {TTI::SK_PermuteSingleSrc, MVT::v16i16, 8}, // vextractf128 + 4*pshufb // + 2*por + vinsertf128 + {TTI::SK_PermuteSingleSrc, MVT::v16f16, 8}, // vextractf128 + 4*pshufb + // + 2*por + vinsertf128 {TTI::SK_PermuteSingleSrc, MVT::v32i8, 8}, // vextractf128 + 4*pshufb // + 2*por + vinsertf128 @@ -1526,6 +1526,8 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, {TTI::SK_PermuteTwoSrc, MVT::v8i32, 4}, // 2*vperm2f128 + 2*vshufps {TTI::SK_PermuteTwoSrc, MVT::v16i16, 15}, // 2*vextractf128 + 8*pshufb // + 4*por + vinsertf128 + {TTI::SK_PermuteTwoSrc, MVT::v16f16, 15}, // 2*vextractf128 + 8*pshufb + // + 4*por + vinsertf128 {TTI::SK_PermuteTwoSrc, MVT::v32i8, 15}, // 2*vextractf128 + 8*pshufb // + 4*por + vinsertf128 }; @@ -1540,6 +1542,7 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, {TTI::SK_Select, MVT::v4i32, 1}, // pblendw {TTI::SK_Select, MVT::v4f32, 1}, // blendps {TTI::SK_Select, MVT::v8i16, 1}, // pblendw + {TTI::SK_Select, MVT::v8f16, 1}, // pblendw {TTI::SK_Select, MVT::v16i8, 1} // pblendvb }; @@ -1549,18 +1552,23 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, static const CostTblEntry SSSE3ShuffleTbl[] = { {TTI::SK_Broadcast, MVT::v8i16, 1}, // pshufb + {TTI::SK_Broadcast, MVT::v8f16, 1}, // pshufb {TTI::SK_Broadcast, MVT::v16i8, 1}, // pshufb {TTI::SK_Reverse, MVT::v8i16, 1}, // pshufb + {TTI::SK_Reverse, MVT::v8f16, 1}, // pshufb {TTI::SK_Reverse, MVT::v16i8, 1}, // pshufb {TTI::SK_Select, MVT::v8i16, 3}, // 2*pshufb + por + {TTI::SK_Select, MVT::v8f16, 3}, // 2*pshufb + por {TTI::SK_Select, MVT::v16i8, 3}, // 2*pshufb + por {TTI::SK_PermuteSingleSrc, MVT::v8i16, 1}, // pshufb + {TTI::SK_PermuteSingleSrc, MVT::v8f16, 1}, // pshufb {TTI::SK_PermuteSingleSrc, MVT::v16i8, 1}, // pshufb {TTI::SK_PermuteTwoSrc, MVT::v8i16, 3}, // 2*pshufb + por + {TTI::SK_PermuteTwoSrc, MVT::v8f16, 3}, // 2*pshufb + por {TTI::SK_PermuteTwoSrc, MVT::v16i8, 3}, // 2*pshufb + por }; @@ -1573,12 +1581,14 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, {TTI::SK_Broadcast, MVT::v2i64, 1}, // pshufd {TTI::SK_Broadcast, MVT::v4i32, 1}, // pshufd {TTI::SK_Broadcast, MVT::v8i16, 2}, // pshuflw + pshufd + {TTI::SK_Broadcast, MVT::v8f16, 2}, // pshuflw + pshufd {TTI::SK_Broadcast, MVT::v16i8, 3}, // unpck + pshuflw + pshufd {TTI::SK_Reverse, MVT::v2f64, 1}, // shufpd {TTI::SK_Reverse, MVT::v2i64, 1}, // pshufd {TTI::SK_Reverse, MVT::v4i32, 1}, // pshufd {TTI::SK_Reverse, MVT::v8i16, 3}, // pshuflw + pshufhw + pshufd + {TTI::SK_Reverse, MVT::v8f16, 3}, // pshuflw + pshufhw + pshufd {TTI::SK_Reverse, MVT::v16i8, 9}, // 2*pshuflw + 2*pshufhw // + 2*pshufd + 2*unpck + packus @@ -1586,6 +1596,7 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, {TTI::SK_Select, MVT::v2f64, 1}, // movsd {TTI::SK_Select, MVT::v4i32, 2}, // 2*shufps {TTI::SK_Select, MVT::v8i16, 3}, // pand + pandn + por + {TTI::SK_Select, MVT::v8f16, 3}, // pand + pandn + por {TTI::SK_Select, MVT::v16i8, 3}, // pand + pandn + por {TTI::SK_PermuteSingleSrc, MVT::v2f64, 1}, // shufpd @@ -1593,6 +1604,8 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, {TTI::SK_PermuteSingleSrc, MVT::v4i32, 1}, // pshufd {TTI::SK_PermuteSingleSrc, MVT::v8i16, 5}, // 2*pshuflw + 2*pshufhw // + pshufd/unpck + {TTI::SK_PermuteSingleSrc, MVT::v8f16, 5}, // 2*pshuflw + 2*pshufhw + // + pshufd/unpck { TTI::SK_PermuteSingleSrc, MVT::v16i8, 10 }, // 2*pshuflw + 2*pshufhw // + 2*pshufd + 2*unpck + 2*packus @@ -1600,6 +1613,7 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, { TTI::SK_PermuteTwoSrc, MVT::v2i64, 1 }, // shufpd { TTI::SK_PermuteTwoSrc, MVT::v4i32, 2 }, // 2*{unpck,movsd,pshufd} { TTI::SK_PermuteTwoSrc, MVT::v8i16, 8 }, // blend+permute + { TTI::SK_PermuteTwoSrc, MVT::v8f16, 8 }, // blend+permute { TTI::SK_PermuteTwoSrc, MVT::v16i8, 13 }, // blend+permute }; @@ -5219,7 +5233,7 @@ bool X86TTIImpl::isLegalMaskedLoad(Type *DataTy, Align Alignment) { if (ScalarTy->isFloatTy() || ScalarTy->isDoubleTy()) return true; - if (ScalarTy->isHalfTy() && ST->hasBWI() && ST->hasFP16()) + if (ScalarTy->isHalfTy() && ST->hasBWI()) return true; if (!ScalarTy->isIntegerTy()) @@ -5674,8 +5688,7 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCost( if (EltTy->isFloatTy() || EltTy->isDoubleTy() || EltTy->isIntegerTy(64) || EltTy->isIntegerTy(32) || EltTy->isPointerTy()) return true; - if (EltTy->isIntegerTy(16) || EltTy->isIntegerTy(8) || - (!ST->useSoftFloat() && ST->hasFP16() && EltTy->isHalfTy())) + if (EltTy->isIntegerTy(16) || EltTy->isIntegerTy(8) || EltTy->isHalfTy()) return HasBW; return false; }; diff --git a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreFrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreFrameLowering.cpp index 19ebcb3ea3e8..2fb06e29bf3b 100644 --- a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreFrameLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreFrameLowering.cpp @@ -27,7 +27,7 @@ #include "llvm/IR/Function.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetOptions.h" -#include <algorithm> // std::sort +#include <algorithm> using namespace llvm; diff --git a/contrib/llvm-project/llvm/lib/ToolDrivers/llvm-lib/Options.td b/contrib/llvm-project/llvm/lib/ToolDrivers/llvm-lib/Options.td index 9d969b040ef2..4af250e8ad73 100644 --- a/contrib/llvm-project/llvm/lib/ToolDrivers/llvm-lib/Options.td +++ b/contrib/llvm-project/llvm/lib/ToolDrivers/llvm-lib/Options.td @@ -48,3 +48,4 @@ def nodefaultlib: P<"nodefaultlib", "">; def nodefaultlib_all: F<"nodefaultlib">; def nologo : F<"nologo">; def subsystem : P<"subsystem", "">; +def verbose : F<"verbose">; diff --git a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp index f7bbdcffd2ec..81b43a2ab2c2 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp @@ -112,8 +112,8 @@ static bool declaresCoroCleanupIntrinsics(const Module &M) { return coro::declaresIntrinsics( M, {"llvm.coro.alloc", "llvm.coro.begin", "llvm.coro.subfn.addr", "llvm.coro.free", "llvm.coro.id", "llvm.coro.id.retcon", - "llvm.coro.id.retcon.once", "llvm.coro.async.size.replace", - "llvm.coro.async.resume"}); + "llvm.coro.id.async", "llvm.coro.id.retcon.once", + "llvm.coro.async.size.replace", "llvm.coro.async.resume"}); } PreservedAnalyses CoroCleanupPass::run(Module &M, diff --git a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroInternal.h b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroInternal.h index 5557370c82ba..af35b45c2eaf 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroInternal.h +++ b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroInternal.h @@ -17,8 +17,6 @@ namespace llvm { class CallGraph; -class CallGraphSCC; -class PassRegistry; namespace coro { diff --git a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroSplit.cpp index 9c1b247cdb39..722a1c6ec0ce 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroSplit.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroSplit.cpp @@ -1961,6 +1961,13 @@ static coro::Shape splitCoroutine(Function &F, return Shape; } +/// Remove calls to llvm.coro.end in the original function. +static void removeCoroEnds(const coro::Shape &Shape) { + for (auto End : Shape.CoroEnds) { + replaceCoroEnd(End, Shape, Shape.FramePtr, /*in resume*/ false, nullptr); + } +} + static void updateCallGraphAfterCoroutineSplit( LazyCallGraph::Node &N, const coro::Shape &Shape, const SmallVectorImpl<Function *> &Clones, LazyCallGraph::SCC &C, @@ -1969,10 +1976,14 @@ static void updateCallGraphAfterCoroutineSplit( if (!Shape.CoroBegin) return; - for (llvm::AnyCoroEndInst *End : Shape.CoroEnds) { - auto &Context = End->getContext(); - End->replaceAllUsesWith(ConstantInt::getFalse(Context)); - End->eraseFromParent(); + if (Shape.ABI != coro::ABI::Switch) + removeCoroEnds(Shape); + else { + for (llvm::AnyCoroEndInst *End : Shape.CoroEnds) { + auto &Context = End->getContext(); + End->replaceAllUsesWith(ConstantInt::getFalse(Context)); + End->eraseFromParent(); + } } if (!Clones.empty()) { diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/Attributor.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/Attributor.cpp index e5ff98e4f73f..37c773bd47d6 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/Attributor.cpp @@ -326,7 +326,7 @@ static bool getPotentialCopiesOfMemoryValue( << " (only exact: " << OnlyExact << ")\n";); Value &Ptr = *I.getPointerOperand(); - SmallVector<Value *, 8> Objects; + SmallSetVector<Value *, 8> Objects; if (!AA::getAssumedUnderlyingObjects(A, Ptr, Objects, QueryingAA, &I, UsedAssumedInformation)) { LLVM_DEBUG( @@ -343,6 +343,7 @@ static bool getPotentialCopiesOfMemoryValue( const auto *TLI = A.getInfoCache().getTargetLibraryInfoForFunction(*I.getFunction()); + LLVM_DEBUG(dbgs() << "Visit " << Objects.size() << " objects:\n"); for (Value *Obj : Objects) { LLVM_DEBUG(dbgs() << "Visit underlying object " << *Obj << "\n"); if (isa<UndefValue>(Obj)) @@ -352,8 +353,8 @@ static bool getPotentialCopiesOfMemoryValue( // be OK. We do not try to optimize the latter. if (!NullPointerIsDefined(I.getFunction(), Ptr.getType()->getPointerAddressSpace()) && - A.getAssumedSimplified(Ptr, QueryingAA, UsedAssumedInformation) == - Obj) + A.getAssumedSimplified(Ptr, QueryingAA, UsedAssumedInformation, + AA::Interprocedural) == Obj) continue; LLVM_DEBUG( dbgs() << "Underlying object is a valid nullptr, giving up.\n";); @@ -375,25 +376,37 @@ static bool getPotentialCopiesOfMemoryValue( return false; } - if (IsLoad) { - Value *InitialValue = AA::getInitialValueForObj(*Obj, *I.getType(), TLI); - if (!InitialValue) - return false; - NewCopies.push_back(InitialValue); - NewCopyOrigins.push_back(nullptr); - } + bool NullOnly = true; + bool NullRequired = false; + auto CheckForNullOnlyAndUndef = [&](Optional<Value *> V, bool IsExact) { + if (!V || *V == nullptr) + NullOnly = false; + else if (isa<UndefValue>(*V)) + /* No op */; + else if (isa<Constant>(*V) && cast<Constant>(*V)->isNullValue()) + NullRequired = !IsExact; + else + NullOnly = false; + }; auto CheckAccess = [&](const AAPointerInfo::Access &Acc, bool IsExact) { if ((IsLoad && !Acc.isWrite()) || (!IsLoad && !Acc.isRead())) return true; if (IsLoad && Acc.isWrittenValueYetUndetermined()) return true; - if (OnlyExact && !IsExact && + CheckForNullOnlyAndUndef(Acc.getContent(), IsExact); + if (OnlyExact && !IsExact && !NullOnly && !isa_and_nonnull<UndefValue>(Acc.getWrittenValue())) { LLVM_DEBUG(dbgs() << "Non exact access " << *Acc.getRemoteInst() << ", abort!\n"); return false; } + if (NullRequired && !NullOnly) { + LLVM_DEBUG(dbgs() << "Required all `null` accesses due to non exact " + "one, however found non-null one: " + << *Acc.getRemoteInst() << ", abort!\n"); + return false; + } if (IsLoad) { assert(isa<LoadInst>(I) && "Expected load or store instruction only!"); if (!Acc.isWrittenValueUnknown()) { @@ -424,15 +437,36 @@ static bool getPotentialCopiesOfMemoryValue( return true; }; + // If the value has been written to we don't need the initial value of the + // object. + bool HasBeenWrittenTo = false; + auto &PI = A.getAAFor<AAPointerInfo>(QueryingAA, IRPosition::value(*Obj), DepClassTy::NONE); - if (!PI.forallInterferingAccesses(A, QueryingAA, I, CheckAccess)) { + if (!PI.forallInterferingAccesses(A, QueryingAA, I, CheckAccess, + HasBeenWrittenTo)) { LLVM_DEBUG( dbgs() << "Failed to verify all interfering accesses for underlying object: " << *Obj << "\n"); return false; } + + if (IsLoad && !HasBeenWrittenTo) { + Value *InitialValue = AA::getInitialValueForObj(*Obj, *I.getType(), TLI); + if (!InitialValue) + return false; + CheckForNullOnlyAndUndef(InitialValue, /* IsExact */ true); + if (NullRequired && !NullOnly) { + LLVM_DEBUG(dbgs() << "Non exact access but initial value that is not " + "null or undef, abort!\n"); + return false; + } + + NewCopies.push_back(InitialValue); + NewCopyOrigins.push_back(nullptr); + } + PIs.push_back(&PI); } @@ -520,12 +554,21 @@ isPotentiallyReachable(Attributor &A, const Instruction &FromI, << " from " << FromI << " [GBCB: " << bool(GoBackwardsCB) << "]\n"); + // TODO: If we can go arbitrarily backwards we will eventually reach an + // entry point that can reach ToI. Only once this takes a set of blocks + // through which we cannot go, or once we track internal functions not + // accessible from the outside, it makes sense to perform backwards analysis + // in the absence of a GoBackwardsCB. + if (!GoBackwardsCB) { + LLVM_DEBUG(dbgs() << "[AA] check @" << ToFn.getName() << " from " << FromI + << " is not checked backwards, abort\n"); + return true; + } + SmallPtrSet<const Instruction *, 8> Visited; SmallVector<const Instruction *> Worklist; Worklist.push_back(&FromI); - const auto &NoRecurseAA = A.getAAFor<AANoRecurse>( - QueryingAA, IRPosition::function(ToFn), DepClassTy::OPTIONAL); while (!Worklist.empty()) { const Instruction *CurFromI = Worklist.pop_back_val(); if (!Visited.insert(CurFromI).second) @@ -545,26 +588,13 @@ isPotentiallyReachable(Attributor &A, const Instruction &FromI, << *ToI << " [Intra]\n"); if (Result) return true; - if (NoRecurseAA.isAssumedNoRecurse()) - continue; - } - - // TODO: If we can go arbitrarily backwards we will eventually reach an - // entry point that can reach ToI. Only once this takes a set of blocks - // through which we cannot go, or once we track internal functions not - // accessible from the outside, it makes sense to perform backwards analysis - // in the absence of a GoBackwardsCB. - if (!GoBackwardsCB) { - LLVM_DEBUG(dbgs() << "[AA] check @" << ToFn.getName() << " from " - << *CurFromI << " is not checked backwards, abort\n"); - return true; } // Check if the current instruction is already known to reach the ToFn. const auto &FnReachabilityAA = A.getAAFor<AAFunctionReachability>( QueryingAA, IRPosition::function(*FromFn), DepClassTy::OPTIONAL); bool Result = FnReachabilityAA.instructionCanReach( - A, *CurFromI, ToFn, /* UseBackwards */ false); + A, *CurFromI, ToFn); LLVM_DEBUG(dbgs() << "[AA] " << *CurFromI << " in @" << FromFn->getName() << " " << (Result ? "can potentially " : "cannot ") << "reach @" << ToFn.getName() << " [FromFn]\n"); @@ -1038,60 +1068,74 @@ Attributor::getAssumedConstant(const IRPosition &IRP, } if (auto *C = dyn_cast<Constant>(&IRP.getAssociatedValue())) return C; - const auto &ValueSimplifyAA = - getAAFor<AAValueSimplify>(AA, IRP, DepClassTy::NONE); - Optional<Value *> SimplifiedV = - ValueSimplifyAA.getAssumedSimplifiedValue(*this); - bool IsKnown = ValueSimplifyAA.isAtFixpoint(); - UsedAssumedInformation |= !IsKnown; - if (!SimplifiedV) { - recordDependence(ValueSimplifyAA, AA, DepClassTy::OPTIONAL); - return llvm::None; - } - if (isa_and_nonnull<UndefValue>(SimplifiedV.value())) { - recordDependence(ValueSimplifyAA, AA, DepClassTy::OPTIONAL); - return UndefValue::get(IRP.getAssociatedType()); + SmallVector<AA::ValueAndContext> Values; + if (getAssumedSimplifiedValues(IRP, &AA, Values, + AA::ValueScope::Interprocedural, + UsedAssumedInformation)) { + if (Values.empty()) + return llvm::None; + if (auto *C = dyn_cast_or_null<Constant>( + AAPotentialValues::getSingleValue(*this, AA, IRP, Values))) + return C; } - Constant *CI = dyn_cast_or_null<Constant>(SimplifiedV.value()); - if (CI) - CI = dyn_cast_or_null<Constant>( - AA::getWithType(*CI, *IRP.getAssociatedType())); - if (CI) - recordDependence(ValueSimplifyAA, AA, DepClassTy::OPTIONAL); - return CI; + return nullptr; } -Optional<Value *> -Attributor::getAssumedSimplified(const IRPosition &IRP, - const AbstractAttribute *AA, - bool &UsedAssumedInformation) { +Optional<Value *> Attributor::getAssumedSimplified(const IRPosition &IRP, + const AbstractAttribute *AA, + bool &UsedAssumedInformation, + AA::ValueScope S) { // First check all callbacks provided by outside AAs. If any of them returns // a non-null value that is different from the associated value, or None, we // assume it's simplified. for (auto &CB : SimplificationCallbacks.lookup(IRP)) return CB(IRP, AA, UsedAssumedInformation); - // If no high-level/outside simplification occurred, use AAValueSimplify. - const auto &ValueSimplifyAA = - getOrCreateAAFor<AAValueSimplify>(IRP, AA, DepClassTy::NONE); - Optional<Value *> SimplifiedV = - ValueSimplifyAA.getAssumedSimplifiedValue(*this); - bool IsKnown = ValueSimplifyAA.isAtFixpoint(); - UsedAssumedInformation |= !IsKnown; - if (!SimplifiedV) { - if (AA) - recordDependence(ValueSimplifyAA, *AA, DepClassTy::OPTIONAL); + SmallVector<AA::ValueAndContext> Values; + if (!getAssumedSimplifiedValues(IRP, AA, Values, S, UsedAssumedInformation)) + return &IRP.getAssociatedValue(); + if (Values.empty()) return llvm::None; + if (AA) + if (Value *V = AAPotentialValues::getSingleValue(*this, *AA, IRP, Values)) + return V; + if (IRP.getPositionKind() == IRPosition::IRP_RETURNED || + IRP.getPositionKind() == IRPosition::IRP_CALL_SITE_RETURNED) + return nullptr; + return &IRP.getAssociatedValue(); +} + +bool Attributor::getAssumedSimplifiedValues( + const IRPosition &IRP, const AbstractAttribute *AA, + SmallVectorImpl<AA::ValueAndContext> &Values, AA::ValueScope S, + bool &UsedAssumedInformation) { + // First check all callbacks provided by outside AAs. If any of them returns + // a non-null value that is different from the associated value, or None, we + // assume it's simplified. + const auto &SimplificationCBs = SimplificationCallbacks.lookup(IRP); + for (auto &CB : SimplificationCBs) { + Optional<Value *> CBResult = CB(IRP, AA, UsedAssumedInformation); + if (!CBResult.has_value()) + continue; + Value *V = CBResult.value(); + if (!V) + return false; + if ((S & AA::ValueScope::Interprocedural) || + AA::isValidInScope(*V, IRP.getAnchorScope())) + Values.push_back(AA::ValueAndContext{*V, nullptr}); + else + return false; } - if (*SimplifiedV == nullptr) - return const_cast<Value *>(&IRP.getAssociatedValue()); - if (Value *SimpleV = - AA::getWithType(**SimplifiedV, *IRP.getAssociatedType())) { - if (AA) - recordDependence(ValueSimplifyAA, *AA, DepClassTy::OPTIONAL); - return SimpleV; - } - return const_cast<Value *>(&IRP.getAssociatedValue()); + if (!SimplificationCBs.empty()) + return true; + + // If no high-level/outside simplification occurred, use AAPotentialValues. + const auto &PotentialValuesAA = + getOrCreateAAFor<AAPotentialValues>(IRP, AA, DepClassTy::OPTIONAL); + if (!PotentialValuesAA.getAssumedSimplifiedValues(*this, Values, S)) + return false; + UsedAssumedInformation |= !PotentialValuesAA.isAtFixpoint(); + return true; } Optional<Value *> Attributor::translateArgumentToCallSiteContent( @@ -1106,7 +1150,7 @@ Optional<Value *> Attributor::translateArgumentToCallSiteContent( if (!Arg->hasPointeeInMemoryValueAttr()) return getAssumedSimplified( IRPosition::callsite_argument(CB, Arg->getArgNo()), AA, - UsedAssumedInformation); + UsedAssumedInformation, AA::Intraprocedural); return nullptr; } @@ -1295,8 +1339,21 @@ bool Attributor::checkForAllUses( SmallVector<const Use *, 16> Worklist; SmallPtrSet<const Use *, 16> Visited; - for (const Use &U : V.uses()) - Worklist.push_back(&U); + auto AddUsers = [&](const Value &V, const Use *OldUse) { + for (const Use &UU : V.uses()) { + if (OldUse && EquivalentUseCB && !EquivalentUseCB(*OldUse, UU)) { + LLVM_DEBUG(dbgs() << "[Attributor] Potential copy was " + "rejected by the equivalence call back: " + << *UU << "!\n"); + return false; + } + + Worklist.push_back(&UU); + } + return true; + }; + + AddUsers(V, /* OldUse */ nullptr); LLVM_DEBUG(dbgs() << "[Attributor] Got " << Worklist.size() << " initial uses to check\n"); @@ -1342,15 +1399,8 @@ bool Attributor::checkForAllUses( << PotentialCopies.size() << " potential copies instead!\n"); for (Value *PotentialCopy : PotentialCopies) - for (const Use &CopyUse : PotentialCopy->uses()) { - if (EquivalentUseCB && !EquivalentUseCB(*U, CopyUse)) { - LLVM_DEBUG(dbgs() << "[Attributor] Potential copy was " - "rejected by the equivalence call back: " - << *CopyUse << "!\n"); - return false; - } - Worklist.push_back(&CopyUse); - } + if (!AddUsers(*PotentialCopy, U)) + return false; continue; } } @@ -1361,8 +1411,25 @@ bool Attributor::checkForAllUses( return false; if (!Follow) continue; - for (const Use &UU : U->getUser()->uses()) - Worklist.push_back(&UU); + + User &Usr = *U->getUser(); + AddUsers(Usr, /* OldUse */ nullptr); + + auto *RI = dyn_cast<ReturnInst>(&Usr); + if (!RI) + continue; + + Function &F = *RI->getFunction(); + auto CallSitePred = [&](AbstractCallSite ACS) { + return AddUsers(*ACS.getInstruction(), U); + }; + if (!checkForAllCallSites(CallSitePred, F, /* RequireAllCallSites */ true, + &QueryingAA, UsedAssumedInformation)) { + LLVM_DEBUG(dbgs() << "[Attributor] Could not follow return instruction " + "to all call sites: " + << *RI << "\n"); + return false; + } } return true; @@ -1918,7 +1985,8 @@ ChangeStatus Attributor::cleanupIR() { << ToBeDeletedInsts.size() << " instructions and " << ToBeChangedValues.size() << " values and " << ToBeChangedUses.size() << " uses. To insert " - << ToBeChangedToUnreachableInsts.size() << " unreachables." + << ToBeChangedToUnreachableInsts.size() + << " unreachables.\n" << "Preserve manifest added " << ManifestAddedBlocks.size() << " blocks\n"); @@ -2046,6 +2114,8 @@ ChangeStatus Attributor::cleanupIR() { } for (auto &V : ToBeChangedToUnreachableInsts) if (Instruction *I = dyn_cast_or_null<Instruction>(V)) { + LLVM_DEBUG(dbgs() << "[Attributor] Change to unreachable: " << *I + << "\n"); assert(isRunOn(*I->getFunction()) && "Cannot replace an instruction outside the current SCC!"); CGModifiedFunctions.insert(I->getFunction()); @@ -2877,7 +2947,8 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) { // Every function might be simplified. bool UsedAssumedInformation = false; - getAssumedSimplified(RetPos, nullptr, UsedAssumedInformation); + getAssumedSimplified(RetPos, nullptr, UsedAssumedInformation, + AA::Intraprocedural); // Every returned value might be marked noundef. getOrCreateAAFor<AANoUndef>(RetPos); @@ -2906,7 +2977,8 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) { // interface though as outside AAs can register custom simplification // callbacks. bool UsedAssumedInformation = false; - getAssumedSimplified(ArgPos, /* AA */ nullptr, UsedAssumedInformation); + getAssumedSimplified(ArgPos, /* AA */ nullptr, UsedAssumedInformation, + AA::Intraprocedural); // Every argument might be dead. getOrCreateAAFor<AAIsDead>(ArgPos); @@ -2970,7 +3042,8 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) { IRPosition CBRetPos = IRPosition::callsite_returned(CB); bool UsedAssumedInformation = false; - getAssumedSimplified(CBRetPos, nullptr, UsedAssumedInformation); + getAssumedSimplified(CBRetPos, nullptr, UsedAssumedInformation, + AA::Intraprocedural); } for (int I = 0, E = CB.arg_size(); I < E; ++I) { @@ -2984,7 +3057,8 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) { // Attributor interface though as outside AAs can register custom // simplification callbacks. bool UsedAssumedInformation = false; - getAssumedSimplified(CBArgPos, /* AA */ nullptr, UsedAssumedInformation); + getAssumedSimplified(CBArgPos, /* AA */ nullptr, UsedAssumedInformation, + AA::Intraprocedural); // Every call site argument might be marked "noundef". getOrCreateAAFor<AANoUndef>(CBArgPos); @@ -3034,12 +3108,12 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) { IRPosition::value(*cast<LoadInst>(I).getPointerOperand())); if (SimplifyAllLoads) getAssumedSimplified(IRPosition::value(I), nullptr, - UsedAssumedInformation); + UsedAssumedInformation, AA::Intraprocedural); } else { auto &SI = cast<StoreInst>(I); getOrCreateAAFor<AAIsDead>(IRPosition::inst(I)); getAssumedSimplified(IRPosition::value(*SI.getValueOperand()), nullptr, - UsedAssumedInformation); + UsedAssumedInformation, AA::Intraprocedural); getOrCreateAAFor<AAAlign>(IRPosition::value(*SI.getPointerOperand())); } return true; @@ -3126,6 +3200,26 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, return OS; } +raw_ostream &llvm::operator<<(raw_ostream &OS, + const PotentialLLVMValuesState &S) { + OS << "set-state(< {"; + if (!S.isValidState()) + OS << "full-set"; + else { + for (auto &It : S.getAssumedSet()) { + if (auto *F = dyn_cast<Function>(It.first.getValue())) + OS << "@" << F->getName() << "[" << int(It.second) << "], "; + else + OS << *It.first.getValue() << "[" << int(It.second) << "], "; + } + if (S.undefIsContained()) + OS << "undef "; + } + OS << "} >)"; + + return OS; +} + void AbstractAttribute::print(raw_ostream &OS) const { OS << "["; OS << getName(); diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 1ff54b78e27e..660ff3ee9563 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -14,12 +14,14 @@ #include "llvm/Transforms/IPO/Attributor.h" #include "llvm/ADT/APInt.h" +#include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SCCIterator.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetOperations.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumeBundleQueries.h" @@ -35,11 +37,13 @@ #include "llvm/IR/Argument.h" #include "llvm/IR/Assumptions.h" #include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" @@ -72,6 +76,8 @@ static cl::opt<int> MaxHeapToStackSize("max-heap-to-stack-size", cl::init(128), template <> unsigned llvm::PotentialConstantIntValuesState::MaxPotentialValues = 0; +template <> unsigned llvm::PotentialLLVMValuesState::MaxPotentialValues = -1; + static cl::opt<unsigned, true> MaxPotentialValues( "attributor-max-potential-values", cl::Hidden, cl::desc("Maximum number of potential values to be " @@ -79,6 +85,12 @@ static cl::opt<unsigned, true> MaxPotentialValues( cl::location(llvm::PotentialConstantIntValuesState::MaxPotentialValues), cl::init(7)); +static cl::opt<int> MaxPotentialValuesIterations( + "attributor-max-potential-values-iterations", cl::Hidden, + cl::desc( + "Maximum number of iterations we keep dismantling potential values."), + cl::init(64)); + static cl::opt<unsigned> MaxInterferingAccesses( "attributor-max-interfering-accesses", cl::Hidden, cl::desc("Maximum number of interfering accesses to " @@ -162,6 +174,7 @@ PIPE_OPERATOR(AAValueConstantRange) PIPE_OPERATOR(AAPrivatizablePtr) PIPE_OPERATOR(AAUndefinedBehavior) PIPE_OPERATOR(AAPotentialConstantValues) +PIPE_OPERATOR(AAPotentialValues) PIPE_OPERATOR(AANoUndef) PIPE_OPERATOR(AACallEdges) PIPE_OPERATOR(AAFunctionReachability) @@ -293,228 +306,35 @@ static Value *constructPointer(Type *ResTy, Type *PtrElemTy, Value *Ptr, return Ptr; } -/// Recursively visit all values that might become \p IRP at some point. This -/// will be done by looking through cast instructions, selects, phis, and calls -/// with the "returned" attribute. Once we cannot look through the value any -/// further, the callback \p VisitValueCB is invoked and passed the current -/// value, the \p State, and a flag to indicate if we stripped anything. -/// Stripped means that we unpacked the value associated with \p IRP at least -/// once. Note that the value used for the callback may still be the value -/// associated with \p IRP (due to PHIs). To limit how much effort is invested, -/// we will never visit more values than specified by \p MaxValues. -/// If \p VS does not contain the Interprocedural bit, only values valid in the -/// scope of \p CtxI will be visited and simplification into other scopes is -/// prevented. -template <typename StateTy> -static bool genericValueTraversal( - Attributor &A, IRPosition IRP, const AbstractAttribute &QueryingAA, - StateTy &State, - function_ref<bool(Value &, const Instruction *, StateTy &, bool)> - VisitValueCB, - const Instruction *CtxI, bool &UsedAssumedInformation, - bool UseValueSimplify = true, int MaxValues = 16, - function_ref<Value *(Value *)> StripCB = nullptr, - AA::ValueScope VS = AA::Interprocedural) { - - struct LivenessInfo { - const AAIsDead *LivenessAA = nullptr; - bool AnyDead = false; - }; - SmallMapVector<const Function *, LivenessInfo, 4> LivenessAAs; - auto GetLivenessInfo = [&](const Function &F) -> LivenessInfo & { - LivenessInfo &LI = LivenessAAs[&F]; - if (!LI.LivenessAA) - LI.LivenessAA = &A.getAAFor<AAIsDead>(QueryingAA, IRPosition::function(F), - DepClassTy::NONE); - return LI; - }; - - Value *InitialV = &IRP.getAssociatedValue(); - using Item = std::pair<Value *, const Instruction *>; - SmallSet<Item, 16> Visited; - SmallVector<Item, 16> Worklist; - Worklist.push_back({InitialV, CtxI}); - - int Iteration = 0; - do { - Item I = Worklist.pop_back_val(); - Value *V = I.first; - CtxI = I.second; - if (StripCB) - V = StripCB(V); - - // Check if we should process the current value. To prevent endless - // recursion keep a record of the values we followed! - if (!Visited.insert(I).second) - continue; - - // Make sure we limit the compile time for complex expressions. - if (Iteration++ >= MaxValues) { - LLVM_DEBUG(dbgs() << "Generic value traversal reached iteration limit: " - << Iteration << "!\n"); - return false; - } - - // Explicitly look through calls with a "returned" attribute if we do - // not have a pointer as stripPointerCasts only works on them. - Value *NewV = nullptr; - if (V->getType()->isPointerTy()) { - NewV = V->stripPointerCasts(); - } else { - auto *CB = dyn_cast<CallBase>(V); - if (CB && CB->getCalledFunction()) { - for (Argument &Arg : CB->getCalledFunction()->args()) - if (Arg.hasReturnedAttr()) { - NewV = CB->getArgOperand(Arg.getArgNo()); - break; - } - } - } - if (NewV && NewV != V) { - Worklist.push_back({NewV, CtxI}); - continue; - } - - // Look through select instructions, visit assumed potential values. - if (auto *SI = dyn_cast<SelectInst>(V)) { - Optional<Constant *> C = A.getAssumedConstant( - *SI->getCondition(), QueryingAA, UsedAssumedInformation); - bool NoValueYet = !C; - if (NoValueYet || isa_and_nonnull<UndefValue>(*C)) - continue; - if (auto *CI = dyn_cast_or_null<ConstantInt>(*C)) { - if (CI->isZero()) - Worklist.push_back({SI->getFalseValue(), CtxI}); - else - Worklist.push_back({SI->getTrueValue(), CtxI}); - continue; - } - // We could not simplify the condition, assume both values.( - Worklist.push_back({SI->getTrueValue(), CtxI}); - Worklist.push_back({SI->getFalseValue(), CtxI}); - continue; - } - - // Look through phi nodes, visit all live operands. - if (auto *PHI = dyn_cast<PHINode>(V)) { - LivenessInfo &LI = GetLivenessInfo(*PHI->getFunction()); - for (unsigned u = 0, e = PHI->getNumIncomingValues(); u < e; u++) { - BasicBlock *IncomingBB = PHI->getIncomingBlock(u); - if (LI.LivenessAA->isEdgeDead(IncomingBB, PHI->getParent())) { - LI.AnyDead = true; - UsedAssumedInformation |= !LI.LivenessAA->isAtFixpoint(); - continue; - } - Worklist.push_back( - {PHI->getIncomingValue(u), IncomingBB->getTerminator()}); - } - continue; - } - - if (auto *Arg = dyn_cast<Argument>(V)) { - if ((VS & AA::Interprocedural) && !Arg->hasPassPointeeByValueCopyAttr()) { - SmallVector<Item> CallSiteValues; - bool UsedAssumedInformation = false; - if (A.checkForAllCallSites( - [&](AbstractCallSite ACS) { - // Callbacks might not have a corresponding call site operand, - // stick with the argument in that case. - Value *CSOp = ACS.getCallArgOperand(*Arg); - if (!CSOp) - return false; - CallSiteValues.push_back({CSOp, ACS.getInstruction()}); - return true; - }, - *Arg->getParent(), true, &QueryingAA, UsedAssumedInformation)) { - Worklist.append(CallSiteValues); - continue; - } - } - } - - if (UseValueSimplify && !isa<Constant>(V)) { - Optional<Value *> SimpleV = - A.getAssumedSimplified(*V, QueryingAA, UsedAssumedInformation); - if (!SimpleV) - continue; - Value *NewV = SimpleV.value(); - if (NewV && NewV != V) { - if ((VS & AA::Interprocedural) || !CtxI || - AA::isValidInScope(*NewV, CtxI->getFunction())) { - Worklist.push_back({NewV, CtxI}); - continue; - } - } - } - - if (auto *LI = dyn_cast<LoadInst>(V)) { - bool UsedAssumedInformation = false; - // If we ask for the potentially loaded values from the initial pointer we - // will simply end up here again. The load is as far as we can make it. - if (LI->getPointerOperand() != InitialV) { - SmallSetVector<Value *, 4> PotentialCopies; - SmallSetVector<Instruction *, 4> PotentialValueOrigins; - if (AA::getPotentiallyLoadedValues(A, *LI, PotentialCopies, - PotentialValueOrigins, QueryingAA, - UsedAssumedInformation, - /* OnlyExact */ true)) { - // Values have to be dynamically unique or we loose the fact that a - // single llvm::Value might represent two runtime values (e.g., stack - // locations in different recursive calls). - bool DynamicallyUnique = - llvm::all_of(PotentialCopies, [&A, &QueryingAA](Value *PC) { - return AA::isDynamicallyUnique(A, QueryingAA, *PC); - }); - if (DynamicallyUnique && - ((VS & AA::Interprocedural) || !CtxI || - llvm::all_of(PotentialCopies, [CtxI](Value *PC) { - return AA::isValidInScope(*PC, CtxI->getFunction()); - }))) { - for (auto *PotentialCopy : PotentialCopies) - Worklist.push_back({PotentialCopy, CtxI}); - continue; - } - } - } - } - - // Once a leaf is reached we inform the user through the callback. - if (!VisitValueCB(*V, CtxI, State, Iteration > 1)) { - LLVM_DEBUG(dbgs() << "Generic value traversal visit callback failed for: " - << *V << "!\n"); - return false; - } - } while (!Worklist.empty()); - - // If we actually used liveness information so we have to record a dependence. - for (auto &It : LivenessAAs) - if (It.second.AnyDead) - A.recordDependence(*It.second.LivenessAA, QueryingAA, - DepClassTy::OPTIONAL); - - // All values have been visited. - return true; -} - bool AA::getAssumedUnderlyingObjects(Attributor &A, const Value &Ptr, - SmallVectorImpl<Value *> &Objects, + SmallSetVector<Value *, 8> &Objects, const AbstractAttribute &QueryingAA, const Instruction *CtxI, bool &UsedAssumedInformation, - AA::ValueScope VS) { - auto StripCB = [&](Value *V) { return getUnderlyingObject(V); }; - SmallPtrSet<Value *, 8> SeenObjects; - auto VisitValueCB = [&SeenObjects](Value &Val, const Instruction *, - SmallVectorImpl<Value *> &Objects, - bool) -> bool { - if (SeenObjects.insert(&Val).second) - Objects.push_back(&Val); + AA::ValueScope S, + SmallPtrSetImpl<Value *> *SeenObjects) { + SmallPtrSet<Value *, 8> LocalSeenObjects; + if (!SeenObjects) + SeenObjects = &LocalSeenObjects; + + SmallVector<AA::ValueAndContext> Values; + if (!A.getAssumedSimplifiedValues(IRPosition::value(Ptr), &QueryingAA, Values, + S, UsedAssumedInformation)) { + Objects.insert(const_cast<Value *>(&Ptr)); return true; - }; - if (!genericValueTraversal<decltype(Objects)>( - A, IRPosition::value(Ptr), QueryingAA, Objects, VisitValueCB, CtxI, - UsedAssumedInformation, true, 32, StripCB, VS)) - return false; + } + + for (auto &VAC : Values) { + Value *UO = getUnderlyingObject(VAC.getValue()); + if (UO && UO != VAC.getValue() && SeenObjects->insert(UO).second) { + if (!getAssumedUnderlyingObjects(A, *UO, Objects, QueryingAA, + VAC.getCtxI(), UsedAssumedInformation, S, + SeenObjects)) + return false; + continue; + } + Objects.insert(VAC.getValue()); + } return true; } @@ -1122,9 +942,6 @@ struct AAPointerInfoImpl using BaseTy = StateWrapper<AA::PointerInfo::State, AAPointerInfo>; AAPointerInfoImpl(const IRPosition &IRP, Attributor &A) : BaseTy(IRP) {} - /// See AbstractAttribute::initialize(...). - void initialize(Attributor &A) override { AAPointerInfo::initialize(A); } - /// See AbstractAttribute::getAsStr(). const std::string getAsStr() const override { return std::string("PointerInfo ") + @@ -1144,9 +961,14 @@ struct AAPointerInfoImpl const override { return State::forallInterferingAccesses(OAS, CB); } - bool forallInterferingAccesses( - Attributor &A, const AbstractAttribute &QueryingAA, Instruction &I, - function_ref<bool(const Access &, bool)> UserCB) const override { + + bool + forallInterferingAccesses(Attributor &A, const AbstractAttribute &QueryingAA, + Instruction &I, + function_ref<bool(const Access &, bool)> UserCB, + bool &HasBeenWrittenTo) const override { + HasBeenWrittenTo = false; + SmallPtrSet<const Access *, 8> DominatingWrites; SmallVector<std::pair<const Access *, bool>, 8> InterferingAccesses; @@ -1182,14 +1004,12 @@ struct AAPointerInfoImpl const bool FindInterferingWrites = I.mayReadFromMemory(); const bool FindInterferingReads = I.mayWriteToMemory(); - const bool UseDominanceReasoning = FindInterferingWrites; + const bool UseDominanceReasoning = + FindInterferingWrites && NoRecurseAA.isKnownNoRecurse(); const bool CanUseCFGResoning = CanIgnoreThreading(I); InformationCache &InfoCache = A.getInfoCache(); const DominatorTree *DT = - NoRecurseAA.isKnownNoRecurse() && UseDominanceReasoning - ? InfoCache.getAnalysisResultForFunction<DominatorTreeAnalysis>( - Scope) - : nullptr; + InfoCache.getAnalysisResultForFunction<DominatorTreeAnalysis>(Scope); enum GPUAddressSpace : unsigned { Generic = 0, @@ -1246,22 +1066,17 @@ struct AAPointerInfoImpl (!FindInterferingReads || !Acc.isRead())) return true; + bool Dominates = DT && Exact && Acc.isMustAccess() && + (Acc.getLocalInst()->getFunction() == &Scope) && + DT->dominates(Acc.getRemoteInst(), &I); + if (FindInterferingWrites && Dominates) + HasBeenWrittenTo = true; + // For now we only filter accesses based on CFG reasoning which does not // work yet if we have threading effects, or the access is complicated. - if (CanUseCFGResoning) { - if ((!Acc.isWrite() || - !AA::isPotentiallyReachable(A, *Acc.getLocalInst(), I, QueryingAA, - IsLiveInCalleeCB)) && - (!Acc.isRead() || - !AA::isPotentiallyReachable(A, I, *Acc.getLocalInst(), QueryingAA, - IsLiveInCalleeCB))) - return true; - if (DT && Exact && (Acc.getLocalInst()->getFunction() == &Scope) && - IsSameThreadAsLoad(Acc)) { - if (DT->dominates(Acc.getLocalInst(), &I)) - DominatingWrites.insert(&Acc); - } - } + if (CanUseCFGResoning && Dominates && UseDominanceReasoning && + IsSameThreadAsLoad(Acc)) + DominatingWrites.insert(&Acc); InterferingAccesses.push_back({&Acc, Exact}); return true; @@ -1269,19 +1084,27 @@ struct AAPointerInfoImpl if (!State::forallInterferingAccesses(I, AccessCB)) return false; - // If we cannot use CFG reasoning we only filter the non-write accesses - // and are done here. - if (!CanUseCFGResoning) { - for (auto &It : InterferingAccesses) - if (!UserCB(*It.first, It.second)) - return false; - return true; + if (HasBeenWrittenTo) { + const Function *ScopePtr = &Scope; + IsLiveInCalleeCB = [ScopePtr](const Function &Fn) { + return ScopePtr != &Fn; + }; } // Helper to determine if we can skip a specific write access. This is in // the worst case quadratic as we are looking for another write that will // hide the effect of this one. auto CanSkipAccess = [&](const Access &Acc, bool Exact) { + if ((!Acc.isWrite() || + !AA::isPotentiallyReachable(A, *Acc.getLocalInst(), I, QueryingAA, + IsLiveInCalleeCB)) && + (!Acc.isRead() || + !AA::isPotentiallyReachable(A, I, *Acc.getLocalInst(), QueryingAA, + IsLiveInCalleeCB))) + return true; + + if (!DT || !UseDominanceReasoning) + return false; if (!IsSameThreadAsLoad(Acc)) return false; if (!DominatingWrites.count(&Acc)) @@ -1303,7 +1126,7 @@ struct AAPointerInfoImpl // succeeded for all or not. unsigned NumInterferingAccesses = InterferingAccesses.size(); for (auto &It : InterferingAccesses) { - if (!DT || NumInterferingAccesses > MaxInterferingAccesses || + if (NumInterferingAccesses > MaxInterferingAccesses || !CanSkipAccess(*It.first, It.second)) { if (!UserCB(*It.first, It.second)) return false; @@ -1339,8 +1162,9 @@ struct AAPointerInfoImpl if (FromCallee) { Content = A.translateArgumentToCallSiteContent( RAcc.getContent(), CB, *this, UsedAssumedInformation); - AK = AccessKind( - AK & (IsByval ? AccessKind::AK_READ : AccessKind::AK_READ_WRITE)); + AK = + AccessKind(AK & (IsByval ? AccessKind::AK_R : AccessKind::AK_RW)); + AK = AccessKind(AK | (RAcc.isMayAccess() ? AK_MAY : AK_MUST)); } Changed = Changed | addAccess(A, OAS.getOffset(), OAS.getSize(), CB, Content, @@ -1353,6 +1177,27 @@ struct AAPointerInfoImpl /// Statistic tracking for all AAPointerInfo implementations. /// See AbstractAttribute::trackStatistics(). void trackPointerInfoStatistics(const IRPosition &IRP) const {} + + /// Dump the state into \p O. + void dumpState(raw_ostream &O) { + for (auto &It : AccessBins) { + O << "[" << It.first.getOffset() << "-" + << It.first.getOffset() + It.first.getSize() + << "] : " << It.getSecond()->size() << "\n"; + for (auto &Acc : *It.getSecond()) { + O << " - " << Acc.getKind() << " - " << *Acc.getLocalInst() << "\n"; + if (Acc.getLocalInst() != Acc.getRemoteInst()) + O << " --> " << *Acc.getRemoteInst() + << "\n"; + if (!Acc.isWrittenValueYetUndetermined()) { + if (Acc.getWrittenValue()) + O << " - c: " << *Acc.getWrittenValue() << "\n"; + else + O << " - c: <unknown>\n"; + } + } + } + } }; struct AAPointerInfoFloating : public AAPointerInfoImpl { @@ -1360,9 +1205,6 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl { AAPointerInfoFloating(const IRPosition &IRP, Attributor &A) : AAPointerInfoImpl(IRP, A) {} - /// See AbstractAttribute::initialize(...). - void initialize(Attributor &A) override { AAPointerInfoImpl::initialize(A); } - /// Deal with an access and signal if it was handled successfully. bool handleAccess(Attributor &A, Instruction &I, Value &Ptr, Optional<Value *> Content, AccessKind Kind, int64_t Offset, @@ -1460,7 +1302,7 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl { Follow = true; return true; } - if (isa<CastInst>(Usr) || isa<SelectInst>(Usr)) + if (isa<CastInst>(Usr) || isa<SelectInst>(Usr) || isa<ReturnInst>(Usr)) return HandlePassthroughUser(Usr, OffsetInfoMap[CurPtr], Follow); // For PHIs we need to take care of the recurrence explicitly as the value @@ -1469,6 +1311,7 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl { if (isa<PHINode>(Usr)) { // Note the order here, the Usr access might change the map, CurPtr is // already in it though. + bool IsFirstPHIUser = !OffsetInfoMap.count(Usr); OffsetInfo &UsrOI = OffsetInfoMap[Usr]; OffsetInfo &PtrOI = OffsetInfoMap[CurPtr]; // Check if the PHI is invariant (so far). @@ -1484,52 +1327,69 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl { } // Check if the PHI operand is not dependent on the PHI itself. - // TODO: This is not great as we look at the pointer type. However, it - // is unclear where the Offset size comes from with typeless pointers. APInt Offset( DL.getIndexSizeInBits(CurPtr->getType()->getPointerAddressSpace()), 0); - if (&AssociatedValue == CurPtr->stripAndAccumulateConstantOffsets( - DL, Offset, /* AllowNonInbounds */ true)) { - if (Offset != PtrOI.Offset) { - LLVM_DEBUG(dbgs() - << "[AAPointerInfo] PHI operand pointer offset mismatch " - << *CurPtr << " in " << *Usr << "\n"); - return false; - } - return HandlePassthroughUser(Usr, PtrOI, Follow); + Value *CurPtrBase = CurPtr->stripAndAccumulateConstantOffsets( + DL, Offset, /* AllowNonInbounds */ true); + auto It = OffsetInfoMap.find(CurPtrBase); + if (It != OffsetInfoMap.end()) { + Offset += It->getSecond().Offset; + if (IsFirstPHIUser || Offset == UsrOI.Offset) + return HandlePassthroughUser(Usr, PtrOI, Follow); + LLVM_DEBUG(dbgs() + << "[AAPointerInfo] PHI operand pointer offset mismatch " + << *CurPtr << " in " << *Usr << "\n"); + } else { + LLVM_DEBUG(dbgs() << "[AAPointerInfo] PHI operand is too complex " + << *CurPtr << " in " << *Usr << "\n"); } // TODO: Approximate in case we know the direction of the recurrence. - LLVM_DEBUG(dbgs() << "[AAPointerInfo] PHI operand is too complex " - << *CurPtr << " in " << *Usr << "\n"); UsrOI = PtrOI; UsrOI.Offset = OffsetAndSize::Unknown; Follow = true; return true; } - if (auto *LoadI = dyn_cast<LoadInst>(Usr)) - return handleAccess(A, *LoadI, *CurPtr, /* Content */ nullptr, - AccessKind::AK_READ, OffsetInfoMap[CurPtr].Offset, - Changed, LoadI->getType()); + if (auto *LoadI = dyn_cast<LoadInst>(Usr)) { + // If the access is to a pointer that may or may not be the associated + // value, e.g. due to a PHI, we cannot assume it will be read. + AccessKind AK = AccessKind::AK_R; + if (getUnderlyingObject(CurPtr) == &AssociatedValue) + AK = AccessKind(AK | AccessKind::AK_MUST); + else + AK = AccessKind(AK | AccessKind::AK_MAY); + return handleAccess(A, *LoadI, *CurPtr, /* Content */ nullptr, AK, + OffsetInfoMap[CurPtr].Offset, Changed, + LoadI->getType()); + } + if (auto *StoreI = dyn_cast<StoreInst>(Usr)) { if (StoreI->getValueOperand() == CurPtr) { LLVM_DEBUG(dbgs() << "[AAPointerInfo] Escaping use in store " << *StoreI << "\n"); return false; } + // If the access is to a pointer that may or may not be the associated + // value, e.g. due to a PHI, we cannot assume it will be written. + AccessKind AK = AccessKind::AK_W; + if (getUnderlyingObject(CurPtr) == &AssociatedValue) + AK = AccessKind(AK | AccessKind::AK_MUST); + else + AK = AccessKind(AK | AccessKind::AK_MAY); bool UsedAssumedInformation = false; - Optional<Value *> Content = A.getAssumedSimplified( - *StoreI->getValueOperand(), *this, UsedAssumedInformation); - return handleAccess(A, *StoreI, *CurPtr, Content, AccessKind::AK_WRITE, + Optional<Value *> Content = + A.getAssumedSimplified(*StoreI->getValueOperand(), *this, + UsedAssumedInformation, AA::Interprocedural); + return handleAccess(A, *StoreI, *CurPtr, Content, AK, OffsetInfoMap[CurPtr].Offset, Changed, StoreI->getValueOperand()->getType()); } if (auto *CB = dyn_cast<CallBase>(Usr)) { if (CB->isLifetimeStartOrEnd()) return true; - if (TLI && isFreeCall(CB, TLI)) + if (getFreedOperand(CB, TLI) == U) return true; if (CB->isArgOperand(&U)) { unsigned ArgNo = CB->getArgOperandNo(&U); @@ -1539,7 +1399,7 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl { Changed = translateAndAddState(A, CSArgPI, OffsetInfoMap[CurPtr].Offset, *CB) | Changed; - return true; + return isValidState(); } LLVM_DEBUG(dbgs() << "[AAPointerInfo] Call user not handled " << *CB << "\n"); @@ -1551,36 +1411,30 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl { return false; }; auto EquivalentUseCB = [&](const Use &OldU, const Use &NewU) { - if (OffsetInfoMap.count(NewU)) + if (OffsetInfoMap.count(NewU)) { + LLVM_DEBUG({ + if (!(OffsetInfoMap[NewU] == OffsetInfoMap[OldU])) { + dbgs() << "[AAPointerInfo] Equivalent use callback failed: " + << OffsetInfoMap[NewU].Offset << " vs " + << OffsetInfoMap[OldU].Offset << "\n"; + } + }); return OffsetInfoMap[NewU] == OffsetInfoMap[OldU]; + } OffsetInfoMap[NewU] = OffsetInfoMap[OldU]; return true; }; if (!A.checkForAllUses(UsePred, *this, AssociatedValue, /* CheckBBLivenessOnly */ true, DepClassTy::OPTIONAL, - /* IgnoreDroppableUses */ true, EquivalentUseCB)) + /* IgnoreDroppableUses */ true, EquivalentUseCB)) { + LLVM_DEBUG( + dbgs() << "[AAPointerInfo] Check for all uses failed, abort!\n"); return indicatePessimisticFixpoint(); + } LLVM_DEBUG({ dbgs() << "Accesses by bin after update:\n"; - for (auto &It : AccessBins) { - dbgs() << "[" << It.first.getOffset() << "-" - << It.first.getOffset() + It.first.getSize() - << "] : " << It.getSecond()->size() << "\n"; - for (auto &Acc : *It.getSecond()) { - dbgs() << " - " << Acc.getKind() << " - " << *Acc.getLocalInst() - << "\n"; - if (Acc.getLocalInst() != Acc.getRemoteInst()) - dbgs() << " --> " - << *Acc.getRemoteInst() << "\n"; - if (!Acc.isWrittenValueYetUndetermined()) { - if (Acc.getWrittenValue()) - dbgs() << " - c: " << *Acc.getWrittenValue() << "\n"; - else - dbgs() << " - c: <unknown>\n"; - } - } - } + dumpState(dbgs()); }); return Changed; @@ -1643,16 +1497,22 @@ struct AAPointerInfoCallSiteArgument final : AAPointerInfoFloating { unsigned ArgNo = getIRPosition().getCallSiteArgNo(); ChangeStatus Changed = ChangeStatus::UNCHANGED; if (ArgNo == 0) { - handleAccess(A, *MI, Ptr, nullptr, AccessKind::AK_WRITE, 0, Changed, - nullptr, LengthVal); + handleAccess(A, *MI, Ptr, nullptr, AccessKind::AK_MUST_WRITE, 0, + Changed, nullptr, LengthVal); } else if (ArgNo == 1) { - handleAccess(A, *MI, Ptr, nullptr, AccessKind::AK_READ, 0, Changed, + handleAccess(A, *MI, Ptr, nullptr, AccessKind::AK_MUST_READ, 0, Changed, nullptr, LengthVal); } else { LLVM_DEBUG(dbgs() << "[AAPointerInfo] Unhandled memory intrinsic " << *MI << "\n"); return indicatePessimisticFixpoint(); } + + LLVM_DEBUG({ + dbgs() << "Accesses by bin after update:\n"; + dumpState(dbgs()); + }); + return Changed; } @@ -1954,23 +1814,23 @@ bool AAReturnedValuesImpl::checkForAllReturnedValuesAndReturnInsts( ChangeStatus AAReturnedValuesImpl::updateImpl(Attributor &A) { ChangeStatus Changed = ChangeStatus::UNCHANGED; - auto ReturnValueCB = [&](Value &V, const Instruction *CtxI, ReturnInst &Ret, - bool) -> bool { - assert(AA::isValidInScope(V, Ret.getFunction()) && - "Assumed returned value should be valid in function scope!"); - if (ReturnedValues[&V].insert(&Ret)) - Changed = ChangeStatus::CHANGED; - return true; - }; - + SmallVector<AA::ValueAndContext> Values; bool UsedAssumedInformation = false; auto ReturnInstCB = [&](Instruction &I) { ReturnInst &Ret = cast<ReturnInst>(I); - return genericValueTraversal<ReturnInst>( - A, IRPosition::value(*Ret.getReturnValue()), *this, Ret, ReturnValueCB, - &I, UsedAssumedInformation, /* UseValueSimplify */ true, - /* MaxValues */ 16, - /* StripCB */ nullptr, AA::Intraprocedural); + Values.clear(); + if (!A.getAssumedSimplifiedValues(IRPosition::value(*Ret.getReturnValue()), + *this, Values, AA::Intraprocedural, + UsedAssumedInformation)) + Values.push_back({*Ret.getReturnValue(), Ret}); + + for (auto &VAC : Values) { + assert(AA::isValidInScope(*VAC.getValue(), Ret.getFunction()) && + "Assumed returned value should be valid in function scope!"); + if (ReturnedValues[VAC.getValue()].insert(&Ret)) + Changed = ChangeStatus::CHANGED; + } + return true; }; // Discover returned values from all live returned instructions in the @@ -2472,6 +2332,18 @@ struct AANonNullFloating : public AANonNullImpl { ChangeStatus updateImpl(Attributor &A) override { const DataLayout &DL = A.getDataLayout(); + bool Stripped; + bool UsedAssumedInformation = false; + SmallVector<AA::ValueAndContext> Values; + if (!A.getAssumedSimplifiedValues(getIRPosition(), *this, Values, + AA::AnyScope, UsedAssumedInformation)) { + Values.push_back({getAssociatedValue(), getCtxI()}); + Stripped = false; + } else { + Stripped = Values.size() != 1 || + Values.front().getValue() != &getAssociatedValue(); + } + DominatorTree *DT = nullptr; AssumptionCache *AC = nullptr; InformationCache &InfoCache = A.getInfoCache(); @@ -2480,8 +2352,8 @@ struct AANonNullFloating : public AANonNullImpl { AC = InfoCache.getAnalysisResultForFunction<AssumptionAnalysis>(*Fn); } - auto VisitValueCB = [&](Value &V, const Instruction *CtxI, - AANonNull::StateType &T, bool Stripped) -> bool { + AANonNull::StateType T; + auto VisitValueCB = [&](Value &V, const Instruction *CtxI) -> bool { const auto &AA = A.getAAFor<AANonNull>(*this, IRPosition::value(V), DepClassTy::REQUIRED); if (!Stripped && this == &AA) { @@ -2495,12 +2367,9 @@ struct AANonNullFloating : public AANonNullImpl { return T.isValidState(); }; - StateType T; - bool UsedAssumedInformation = false; - if (!genericValueTraversal<StateType>(A, getIRPosition(), *this, T, - VisitValueCB, getCtxI(), - UsedAssumedInformation)) - return indicatePessimisticFixpoint(); + for (const auto &VAC : Values) + if (!VisitValueCB(*VAC.getValue(), VAC.getCtxI())) + return indicatePessimisticFixpoint(); return clampStateAndIndicateChange(getState(), T); } @@ -2753,8 +2622,9 @@ struct AAUndefinedBehaviorImpl : public AAUndefinedBehavior { if (!NoUndefAA.isKnownNoUndef()) continue; bool UsedAssumedInformation = false; - Optional<Value *> SimplifiedVal = A.getAssumedSimplified( - IRPosition::value(*ArgVal), *this, UsedAssumedInformation); + Optional<Value *> SimplifiedVal = + A.getAssumedSimplified(IRPosition::value(*ArgVal), *this, + UsedAssumedInformation, AA::Interprocedural); if (UsedAssumedInformation) continue; if (SimplifiedVal && !SimplifiedVal.value()) @@ -2925,8 +2795,9 @@ private: Optional<Value *> stopOnUndefOrAssumed(Attributor &A, Value *V, Instruction *I) { bool UsedAssumedInformation = false; - Optional<Value *> SimplifiedV = A.getAssumedSimplified( - IRPosition::value(*V), *this, UsedAssumedInformation); + Optional<Value *> SimplifiedV = + A.getAssumedSimplified(IRPosition::value(*V), *this, + UsedAssumedInformation, AA::Interprocedural); if (!UsedAssumedInformation) { // Don't depend on assumed values. if (!SimplifiedV) { @@ -3369,7 +3240,9 @@ struct AANoAliasCallSiteArgument final : AANoAliasImpl { } } - if (!AA::isPotentiallyReachable(A, *UserI, *getCtxI(), *this)) + if (!AA::isPotentiallyReachable( + A, *UserI, *getCtxI(), *this, + [ScopeFn](const Function &Fn) { return &Fn != ScopeFn; })) return true; } @@ -4364,10 +4237,23 @@ struct AADereferenceableFloating : AADereferenceableImpl { /// See AbstractAttribute::updateImpl(...). ChangeStatus updateImpl(Attributor &A) override { + + bool Stripped; + bool UsedAssumedInformation = false; + SmallVector<AA::ValueAndContext> Values; + if (!A.getAssumedSimplifiedValues(getIRPosition(), *this, Values, + AA::AnyScope, UsedAssumedInformation)) { + Values.push_back({getAssociatedValue(), getCtxI()}); + Stripped = false; + } else { + Stripped = Values.size() != 1 || + Values.front().getValue() != &getAssociatedValue(); + } + const DataLayout &DL = A.getDataLayout(); + DerefState T; - auto VisitValueCB = [&](const Value &V, const Instruction *, DerefState &T, - bool Stripped) -> bool { + auto VisitValueCB = [&](const Value &V) -> bool { unsigned IdxWidth = DL.getIndexSizeInBits(V.getType()->getPointerAddressSpace()); APInt Offset(IdxWidth, 0); @@ -4420,12 +4306,9 @@ struct AADereferenceableFloating : AADereferenceableImpl { return T.isValidState(); }; - DerefState T; - bool UsedAssumedInformation = false; - if (!genericValueTraversal<DerefState>(A, getIRPosition(), *this, T, - VisitValueCB, getCtxI(), - UsedAssumedInformation)) - return indicatePessimisticFixpoint(); + for (const auto &VAC : Values) + if (!VisitValueCB(*VAC.getValue())) + return indicatePessimisticFixpoint(); return clampStateAndIndicateChange(getState(), T); } @@ -4652,8 +4535,20 @@ struct AAAlignFloating : AAAlignImpl { ChangeStatus updateImpl(Attributor &A) override { const DataLayout &DL = A.getDataLayout(); - auto VisitValueCB = [&](Value &V, const Instruction *, - AAAlign::StateType &T, bool Stripped) -> bool { + bool Stripped; + bool UsedAssumedInformation = false; + SmallVector<AA::ValueAndContext> Values; + if (!A.getAssumedSimplifiedValues(getIRPosition(), *this, Values, + AA::AnyScope, UsedAssumedInformation)) { + Values.push_back({getAssociatedValue(), getCtxI()}); + Stripped = false; + } else { + Stripped = Values.size() != 1 || + Values.front().getValue() != &getAssociatedValue(); + } + + StateType T; + auto VisitValueCB = [&](Value &V) -> bool { if (isa<UndefValue>(V) || isa<ConstantPointerNull>(V)) return true; const auto &AA = A.getAAFor<AAAlign>(*this, IRPosition::value(V), @@ -4686,15 +4581,13 @@ struct AAAlignFloating : AAAlignImpl { return T.isValidState(); }; - StateType T; - bool UsedAssumedInformation = false; - if (!genericValueTraversal<StateType>(A, getIRPosition(), *this, T, - VisitValueCB, getCtxI(), - UsedAssumedInformation)) - return indicatePessimisticFixpoint(); + for (const auto &VAC : Values) { + if (!VisitValueCB(*VAC.getValue())) + return indicatePessimisticFixpoint(); + } - // TODO: If we know we visited all incoming values, thus no are assumed - // dead, we can take the known information from the state T. + // TODO: If we know we visited all incoming values, thus no are assumed + // dead, we can take the known information from the state T. return clampStateAndIndicateChange(getState(), T); } @@ -4941,7 +4834,9 @@ struct AAInstanceInfoImpl : public AAInstanceInfo { return false; // If this call base might reach the scope again we might forward the // argument back here. This is very conservative. - if (AA::isPotentiallyReachable(A, *CB, *Scope, *this, nullptr)) + if (AA::isPotentiallyReachable( + A, *CB, *Scope, *this, + [Scope](const Function &Fn) { return &Fn != Scope; })) return false; return true; } @@ -5518,9 +5413,9 @@ struct AAValueSimplifyImpl : AAValueSimplify { if (const auto &NewV = VMap.lookup(&V)) return NewV; bool UsedAssumedInformation = false; - Optional<Value *> SimpleV = - A.getAssumedSimplified(V, QueryingAA, UsedAssumedInformation); - if (!SimpleV) + Optional<Value *> SimpleV = A.getAssumedSimplified( + V, QueryingAA, UsedAssumedInformation, AA::Interprocedural); + if (!SimpleV.has_value()) return PoisonValue::get(&Ty); Value *EffectiveV = &V; if (SimpleV.value()) @@ -5561,8 +5456,8 @@ struct AAValueSimplifyImpl : AAValueSimplify { bool UsedAssumedInformation = false; Optional<Value *> QueryingValueSimplified = &IRP.getAssociatedValue(); if (Simplify) - QueryingValueSimplified = - A.getAssumedSimplified(IRP, QueryingAA, UsedAssumedInformation); + QueryingValueSimplified = A.getAssumedSimplified( + IRP, QueryingAA, UsedAssumedInformation, AA::Interprocedural); return unionAssumed(QueryingValueSimplified); } @@ -5763,209 +5658,11 @@ struct AAValueSimplifyFloating : AAValueSimplifyImpl { indicatePessimisticFixpoint(); } - /// Check if \p Cmp is a comparison we can simplify. - /// - /// We handle multiple cases, one in which at least one operand is an - /// (assumed) nullptr. If so, try to simplify it using AANonNull on the other - /// operand. Return true if successful, in that case SimplifiedAssociatedValue - /// will be updated. - bool handleCmp(Attributor &A, CmpInst &Cmp) { - auto Union = [&](Value &V) { - SimplifiedAssociatedValue = AA::combineOptionalValuesInAAValueLatice( - SimplifiedAssociatedValue, &V, V.getType()); - return SimplifiedAssociatedValue != Optional<Value *>(nullptr); - }; - - Value *LHS = Cmp.getOperand(0); - Value *RHS = Cmp.getOperand(1); - - // Simplify the operands first. - bool UsedAssumedInformation = false; - const auto &SimplifiedLHS = - A.getAssumedSimplified(IRPosition::value(*LHS, getCallBaseContext()), - *this, UsedAssumedInformation); - if (!SimplifiedLHS) - return true; - if (!SimplifiedLHS.value()) - return false; - LHS = *SimplifiedLHS; - - const auto &SimplifiedRHS = - A.getAssumedSimplified(IRPosition::value(*RHS, getCallBaseContext()), - *this, UsedAssumedInformation); - if (!SimplifiedRHS) - return true; - if (!SimplifiedRHS.value()) - return false; - RHS = *SimplifiedRHS; - - LLVMContext &Ctx = Cmp.getContext(); - // Handle the trivial case first in which we don't even need to think about - // null or non-null. - if (LHS == RHS && (Cmp.isTrueWhenEqual() || Cmp.isFalseWhenEqual())) { - Constant *NewVal = - ConstantInt::get(Type::getInt1Ty(Ctx), Cmp.isTrueWhenEqual()); - if (!Union(*NewVal)) - return false; - if (!UsedAssumedInformation) - indicateOptimisticFixpoint(); - return true; - } - - // From now on we only handle equalities (==, !=). - ICmpInst *ICmp = dyn_cast<ICmpInst>(&Cmp); - if (!ICmp || !ICmp->isEquality()) - return false; - - bool LHSIsNull = isa<ConstantPointerNull>(LHS); - bool RHSIsNull = isa<ConstantPointerNull>(RHS); - if (!LHSIsNull && !RHSIsNull) - return false; - - // Left is the nullptr ==/!= non-nullptr case. We'll use AANonNull on the - // non-nullptr operand and if we assume it's non-null we can conclude the - // result of the comparison. - assert((LHSIsNull || RHSIsNull) && - "Expected nullptr versus non-nullptr comparison at this point"); - - // The index is the operand that we assume is not null. - unsigned PtrIdx = LHSIsNull; - auto &PtrNonNullAA = A.getAAFor<AANonNull>( - *this, IRPosition::value(*ICmp->getOperand(PtrIdx)), - DepClassTy::REQUIRED); - if (!PtrNonNullAA.isAssumedNonNull()) - return false; - UsedAssumedInformation |= !PtrNonNullAA.isKnownNonNull(); - - // The new value depends on the predicate, true for != and false for ==. - Constant *NewVal = ConstantInt::get( - Type::getInt1Ty(Ctx), ICmp->getPredicate() == CmpInst::ICMP_NE); - if (!Union(*NewVal)) - return false; - - if (!UsedAssumedInformation) - indicateOptimisticFixpoint(); - - return true; - } - - /// Use the generic, non-optimistic InstSimplfy functionality if we managed to - /// simplify any operand of the instruction \p I. Return true if successful, - /// in that case SimplifiedAssociatedValue will be updated. - bool handleGenericInst(Attributor &A, Instruction &I) { - bool SomeSimplified = false; - bool UsedAssumedInformation = false; - - SmallVector<Value *, 8> NewOps(I.getNumOperands()); - int Idx = 0; - for (Value *Op : I.operands()) { - const auto &SimplifiedOp = - A.getAssumedSimplified(IRPosition::value(*Op, getCallBaseContext()), - *this, UsedAssumedInformation); - // If we are not sure about any operand we are not sure about the entire - // instruction, we'll wait. - if (!SimplifiedOp) - return true; - - if (SimplifiedOp.value()) - NewOps[Idx] = SimplifiedOp.value(); - else - NewOps[Idx] = Op; - - SomeSimplified |= (NewOps[Idx] != Op); - ++Idx; - } - - // We won't bother with the InstSimplify interface if we didn't simplify any - // operand ourselves. - if (!SomeSimplified) - return false; - - InformationCache &InfoCache = A.getInfoCache(); - Function *F = I.getFunction(); - const auto *DT = - InfoCache.getAnalysisResultForFunction<DominatorTreeAnalysis>(*F); - const auto *TLI = A.getInfoCache().getTargetLibraryInfoForFunction(*F); - auto *AC = InfoCache.getAnalysisResultForFunction<AssumptionAnalysis>(*F); - OptimizationRemarkEmitter *ORE = nullptr; - - const DataLayout &DL = I.getModule()->getDataLayout(); - SimplifyQuery Q(DL, TLI, DT, AC, &I); - if (Value *SimplifiedI = - simplifyInstructionWithOperands(&I, NewOps, Q, ORE)) { - SimplifiedAssociatedValue = AA::combineOptionalValuesInAAValueLatice( - SimplifiedAssociatedValue, SimplifiedI, I.getType()); - return SimplifiedAssociatedValue != Optional<Value *>(nullptr); - } - return false; - } - /// See AbstractAttribute::updateImpl(...). ChangeStatus updateImpl(Attributor &A) override { auto Before = SimplifiedAssociatedValue; - - // Do not simplify loads that are only used in llvm.assume if we cannot also - // remove all stores that may feed into the load. The reason is that the - // assume is probably worth something as long as the stores are around. - if (auto *LI = dyn_cast<LoadInst>(&getAssociatedValue())) { - InformationCache &InfoCache = A.getInfoCache(); - if (InfoCache.isOnlyUsedByAssume(*LI)) { - SmallSetVector<Value *, 4> PotentialCopies; - SmallSetVector<Instruction *, 4> PotentialValueOrigins; - bool UsedAssumedInformation = false; - if (AA::getPotentiallyLoadedValues(A, *LI, PotentialCopies, - PotentialValueOrigins, *this, - UsedAssumedInformation, - /* OnlyExact */ true)) { - if (!llvm::all_of(PotentialValueOrigins, [&](Instruction *I) { - if (!I) - return true; - if (auto *SI = dyn_cast<StoreInst>(I)) - return A.isAssumedDead(SI->getOperandUse(0), this, - /* LivenessAA */ nullptr, - UsedAssumedInformation, - /* CheckBBLivenessOnly */ false); - return A.isAssumedDead(*I, this, /* LivenessAA */ nullptr, - UsedAssumedInformation, - /* CheckBBLivenessOnly */ false); - })) - return indicatePessimisticFixpoint(); - } - } - } - - auto VisitValueCB = [&](Value &V, const Instruction *CtxI, bool &, - bool Stripped) -> bool { - auto &AA = A.getAAFor<AAValueSimplify>( - *this, IRPosition::value(V, getCallBaseContext()), - DepClassTy::REQUIRED); - if (!Stripped && this == &AA) { - - if (auto *I = dyn_cast<Instruction>(&V)) { - if (auto *Cmp = dyn_cast<CmpInst>(&V)) - if (handleCmp(A, *Cmp)) - return true; - if (handleGenericInst(A, *I)) - return true; - } - // TODO: Look the instruction and check recursively. - - LLVM_DEBUG(dbgs() << "[ValueSimplify] Can't be stripped more : " << V - << "\n"); - return false; - } - return checkAndUpdate(A, *this, - IRPosition::value(V, getCallBaseContext())); - }; - - bool Dummy = false; - bool UsedAssumedInformation = false; - if (!genericValueTraversal<bool>(A, getIRPosition(), *this, Dummy, - VisitValueCB, getCtxI(), - UsedAssumedInformation, - /* UseValueSimplify */ false)) - if (!askSimplifiedValueForOtherAAs(A)) - return indicatePessimisticFixpoint(); + if (!askSimplifiedValueForOtherAAs(A)) + return indicatePessimisticFixpoint(); // If a candicate was found in this update, return CHANGED. return Before == SimplifiedAssociatedValue ? ChangeStatus::UNCHANGED @@ -6122,6 +5819,8 @@ struct AAHeapToStackFunction final : public AAHeapToStack { struct DeallocationInfo { /// The call that deallocates the memory. CallBase *const CB; + /// The value freed by the call. + Value *FreedOp; /// Flag to indicate if we don't know all objects this deallocation might /// free. @@ -6153,14 +5852,14 @@ struct AAHeapToStackFunction final : public AAHeapToStack { CallBase *CB = dyn_cast<CallBase>(&I); if (!CB) return true; - if (isFreeCall(CB, TLI)) { - DeallocationInfos[CB] = new (A.Allocator) DeallocationInfo{CB}; + if (Value *FreedOp = getFreedOperand(CB, TLI)) { + DeallocationInfos[CB] = new (A.Allocator) DeallocationInfo{CB, FreedOp}; return true; } // To do heap to stack, we need to know that the allocation itself is // removable once uses are rewritten, and that we can initialize the // alloca to the same pattern as the original allocation result. - if (isAllocationFn(CB, TLI) && isAllocRemovable(CB, TLI)) { + if (isRemovableAlloc(CB, TLI)) { auto *I8Ty = Type::getInt8Ty(CB->getParent()->getContext()); if (nullptr != getInitialValueOfAllocation(CB, TLI, I8Ty)) { AllocationInfo *AI = new (A.Allocator) AllocationInfo{CB}; @@ -6427,44 +6126,36 @@ ChangeStatus AAHeapToStackFunction::updateImpl(Attributor &A) { /* CheckBBLivenessOnly */ true)) continue; - // Use the optimistic version to get the freed objects, ignoring dead - // branches etc. - SmallVector<Value *, 8> Objects; - if (!AA::getAssumedUnderlyingObjects(A, *DI.CB->getArgOperand(0), Objects, - *this, DI.CB, - UsedAssumedInformation)) { - LLVM_DEBUG( - dbgs() - << "[H2S] Unexpected failure in getAssumedUnderlyingObjects!\n"); + // Use the non-optimistic version to get the freed object. + Value *Obj = getUnderlyingObject(DI.FreedOp); + if (!Obj) { + LLVM_DEBUG(dbgs() << "[H2S] Unknown underlying object for free!\n"); DI.MightFreeUnknownObjects = true; continue; } - // Check each object explicitly. - for (auto *Obj : Objects) { - // Free of null and undef can be ignored as no-ops (or UB in the latter - // case). - if (isa<ConstantPointerNull>(Obj) || isa<UndefValue>(Obj)) - continue; - - CallBase *ObjCB = dyn_cast<CallBase>(Obj); - if (!ObjCB) { - LLVM_DEBUG(dbgs() - << "[H2S] Free of a non-call object: " << *Obj << "\n"); - DI.MightFreeUnknownObjects = true; - continue; - } + // Free of null and undef can be ignored as no-ops (or UB in the latter + // case). + if (isa<ConstantPointerNull>(Obj) || isa<UndefValue>(Obj)) + continue; - AllocationInfo *AI = AllocationInfos.lookup(ObjCB); - if (!AI) { - LLVM_DEBUG(dbgs() << "[H2S] Free of a non-allocation object: " << *Obj - << "\n"); - DI.MightFreeUnknownObjects = true; - continue; - } + CallBase *ObjCB = dyn_cast<CallBase>(Obj); + if (!ObjCB) { + LLVM_DEBUG(dbgs() << "[H2S] Free of a non-call object: " << *Obj + << "\n"); + DI.MightFreeUnknownObjects = true; + continue; + } - DI.PotentialAllocationCalls.insert(ObjCB); + AllocationInfo *AI = AllocationInfos.lookup(ObjCB); + if (!AI) { + LLVM_DEBUG(dbgs() << "[H2S] Free of a non-allocation object: " << *Obj + << "\n"); + DI.MightFreeUnknownObjects = true; + continue; } + + DI.PotentialAllocationCalls.insert(ObjCB); } }; @@ -7692,7 +7383,7 @@ bool AAMemoryBehaviorFloating::followUsersOfUseIn(Attributor &A, const Use &U, const Instruction *UserI) { // The loaded value is unrelated to the pointer argument, no need to // follow the users of the load. - if (isa<LoadInst>(UserI)) + if (isa<LoadInst>(UserI) || isa<ReturnInst>(UserI)) return false; // By default we follow all uses assuming UserI might leak information on U, @@ -7822,16 +7513,15 @@ struct AAMemoryLocationImpl : public AAMemoryLocation { AAMemoryLocationImpl(const IRPosition &IRP, Attributor &A) : AAMemoryLocation(IRP, A), Allocator(A.Allocator) { - for (unsigned u = 0; u < llvm::CTLog2<VALID_STATE>(); ++u) - AccessKind2Accesses[u] = nullptr; + AccessKind2Accesses.fill(nullptr); } ~AAMemoryLocationImpl() { // The AccessSets are allocated via a BumpPtrAllocator, we call // the destructor manually. - for (unsigned u = 0; u < llvm::CTLog2<VALID_STATE>(); ++u) - if (AccessKind2Accesses[u]) - AccessKind2Accesses[u]->~AccessSet(); + for (AccessSet *AS : AccessKind2Accesses) + if (AS) + AS->~AccessSet(); } /// See AbstractAttribute::initialize(...). @@ -7999,7 +7689,7 @@ protected: /// Mapping from *single* memory location kinds, e.g., LOCAL_MEM with the /// value of NO_LOCAL_MEM, to the accesses encountered for this memory kind. using AccessSet = SmallSet<AccessInfo, 2, AccessInfo>; - AccessSet *AccessKind2Accesses[llvm::CTLog2<VALID_STATE>()]; + std::array<AccessSet *, llvm::CTLog2<VALID_STATE>()> AccessKind2Accesses; /// Categorize the pointer arguments of CB that might access memory in /// AccessedLoc and update the state and access map accordingly. @@ -8061,7 +7751,7 @@ void AAMemoryLocationImpl::categorizePtrValue( << Ptr << " [" << getMemoryLocationsAsStr(State.getAssumed()) << "]\n"); - SmallVector<Value *, 8> Objects; + SmallSetVector<Value *, 8> Objects; bool UsedAssumedInformation = false; if (!AA::getAssumedUnderlyingObjects(A, Ptr, Objects, *this, &I, UsedAssumedInformation, @@ -8670,19 +8360,19 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl { // Simplify the operands first. bool UsedAssumedInformation = false; - const auto &SimplifiedLHS = - A.getAssumedSimplified(IRPosition::value(*LHS, getCallBaseContext()), - *this, UsedAssumedInformation); - if (!SimplifiedLHS) + const auto &SimplifiedLHS = A.getAssumedSimplified( + IRPosition::value(*LHS, getCallBaseContext()), *this, + UsedAssumedInformation, AA::Interprocedural); + if (!SimplifiedLHS.has_value()) return true; if (!SimplifiedLHS.value()) return false; LHS = *SimplifiedLHS; - const auto &SimplifiedRHS = - A.getAssumedSimplified(IRPosition::value(*RHS, getCallBaseContext()), - *this, UsedAssumedInformation); - if (!SimplifiedRHS) + const auto &SimplifiedRHS = A.getAssumedSimplified( + IRPosition::value(*RHS, getCallBaseContext()), *this, + UsedAssumedInformation, AA::Interprocedural); + if (!SimplifiedRHS.has_value()) return true; if (!SimplifiedRHS.value()) return false; @@ -8723,10 +8413,10 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl { // Simplify the operand first. bool UsedAssumedInformation = false; - const auto &SimplifiedOpV = - A.getAssumedSimplified(IRPosition::value(*OpV, getCallBaseContext()), - *this, UsedAssumedInformation); - if (!SimplifiedOpV) + const auto &SimplifiedOpV = A.getAssumedSimplified( + IRPosition::value(*OpV, getCallBaseContext()), *this, + UsedAssumedInformation, AA::Interprocedural); + if (!SimplifiedOpV.has_value()) return true; if (!SimplifiedOpV.value()) return false; @@ -8753,19 +8443,19 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl { // Simplify the operands first. bool UsedAssumedInformation = false; - const auto &SimplifiedLHS = - A.getAssumedSimplified(IRPosition::value(*LHS, getCallBaseContext()), - *this, UsedAssumedInformation); - if (!SimplifiedLHS) + const auto &SimplifiedLHS = A.getAssumedSimplified( + IRPosition::value(*LHS, getCallBaseContext()), *this, + UsedAssumedInformation, AA::Interprocedural); + if (!SimplifiedLHS.has_value()) return true; if (!SimplifiedLHS.value()) return false; LHS = *SimplifiedLHS; - const auto &SimplifiedRHS = - A.getAssumedSimplified(IRPosition::value(*RHS, getCallBaseContext()), - *this, UsedAssumedInformation); - if (!SimplifiedRHS) + const auto &SimplifiedRHS = A.getAssumedSimplified( + IRPosition::value(*RHS, getCallBaseContext()), *this, + UsedAssumedInformation, AA::Interprocedural); + if (!SimplifiedRHS.has_value()) return true; if (!SimplifiedRHS.value()) return false; @@ -8820,17 +8510,18 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl { /// See AbstractAttribute::updateImpl(...). ChangeStatus updateImpl(Attributor &A) override { - auto VisitValueCB = [&](Value &V, const Instruction *CtxI, - IntegerRangeState &T, bool Stripped) -> bool { + + IntegerRangeState T(getBitWidth()); + auto VisitValueCB = [&](Value &V, const Instruction *CtxI) -> bool { Instruction *I = dyn_cast<Instruction>(&V); if (!I || isa<CallBase>(I)) { // Simplify the operand first. bool UsedAssumedInformation = false; - const auto &SimplifiedOpV = - A.getAssumedSimplified(IRPosition::value(V, getCallBaseContext()), - *this, UsedAssumedInformation); - if (!SimplifiedOpV) + const auto &SimplifiedOpV = A.getAssumedSimplified( + IRPosition::value(V, getCallBaseContext()), *this, + UsedAssumedInformation, AA::Interprocedural); + if (!SimplifiedOpV.has_value()) return true; if (!SimplifiedOpV.value()) return false; @@ -8880,13 +8571,7 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl { return T.isValidState(); }; - IntegerRangeState T(getBitWidth()); - - bool UsedAssumedInformation = false; - if (!genericValueTraversal<IntegerRangeState>(A, getIRPosition(), *this, T, - VisitValueCB, getCtxI(), - UsedAssumedInformation, - /* UseValueSimplify */ false)) + if (!VisitValueCB(getAssociatedValue(), getCtxI())) return indicatePessimisticFixpoint(); // Ensure that long def-use chains can't cause circular reasoning either by @@ -8998,6 +8683,36 @@ struct AAPotentialConstantValuesImpl : AAPotentialConstantValues { AAPotentialConstantValues::initialize(A); } + bool fillSetWithConstantValues(Attributor &A, const IRPosition &IRP, SetTy &S, + bool &ContainsUndef) { + SmallVector<AA::ValueAndContext> Values; + bool UsedAssumedInformation = false; + if (!A.getAssumedSimplifiedValues(IRP, *this, Values, AA::Interprocedural, + UsedAssumedInformation)) { + if (!IRP.getAssociatedType()->isIntegerTy()) + return false; + auto &PotentialValuesAA = A.getAAFor<AAPotentialConstantValues>( + *this, IRP, DepClassTy::REQUIRED); + if (!PotentialValuesAA.getState().isValidState()) + return false; + ContainsUndef = PotentialValuesAA.getState().undefIsContained(); + S = PotentialValuesAA.getState().getAssumedSet(); + return true; + } + + for (auto &It : Values) { + if (isa<UndefValue>(It.getValue())) + continue; + auto *CI = dyn_cast<ConstantInt>(It.getValue()); + if (!CI) + return false; + S.insert(CI->getValue()); + } + ContainsUndef = S.empty(); + + return true; + } + /// See AbstractAttribute::getAsStr(). const std::string getAsStr() const override { std::string Str; @@ -9186,50 +8901,22 @@ struct AAPotentialConstantValuesFloating : AAPotentialConstantValuesImpl { Value *LHS = ICI->getOperand(0); Value *RHS = ICI->getOperand(1); - // Simplify the operands first. - bool UsedAssumedInformation = false; - const auto &SimplifiedLHS = - A.getAssumedSimplified(IRPosition::value(*LHS, getCallBaseContext()), - *this, UsedAssumedInformation); - if (!SimplifiedLHS) - return ChangeStatus::UNCHANGED; - if (!SimplifiedLHS.value()) - return indicatePessimisticFixpoint(); - LHS = *SimplifiedLHS; - - const auto &SimplifiedRHS = - A.getAssumedSimplified(IRPosition::value(*RHS, getCallBaseContext()), - *this, UsedAssumedInformation); - if (!SimplifiedRHS) - return ChangeStatus::UNCHANGED; - if (!SimplifiedRHS.value()) - return indicatePessimisticFixpoint(); - RHS = *SimplifiedRHS; - - if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy()) - return indicatePessimisticFixpoint(); - - auto &LHSAA = A.getAAFor<AAPotentialConstantValues>( - *this, IRPosition::value(*LHS), DepClassTy::REQUIRED); - if (!LHSAA.isValidState()) + bool LHSContainsUndef = false, RHSContainsUndef = false; + SetTy LHSAAPVS, RHSAAPVS; + if (!fillSetWithConstantValues(A, IRPosition::value(*LHS), LHSAAPVS, + LHSContainsUndef) || + !fillSetWithConstantValues(A, IRPosition::value(*RHS), RHSAAPVS, + RHSContainsUndef)) return indicatePessimisticFixpoint(); - auto &RHSAA = A.getAAFor<AAPotentialConstantValues>( - *this, IRPosition::value(*RHS), DepClassTy::REQUIRED); - if (!RHSAA.isValidState()) - return indicatePessimisticFixpoint(); - - const SetTy &LHSAAPVS = LHSAA.getAssumedSet(); - const SetTy &RHSAAPVS = RHSAA.getAssumedSet(); - // TODO: make use of undef flag to limit potential values aggressively. bool MaybeTrue = false, MaybeFalse = false; const APInt Zero(RHS->getType()->getIntegerBitWidth(), 0); - if (LHSAA.undefIsContained() && RHSAA.undefIsContained()) { + if (LHSContainsUndef && RHSContainsUndef) { // The result of any comparison between undefs can be soundly replaced // with undef. unionAssumedWithUndef(); - } else if (LHSAA.undefIsContained()) { + } else if (LHSContainsUndef) { for (const APInt &R : RHSAAPVS) { bool CmpResult = calculateICmpInst(ICI, Zero, R); MaybeTrue |= CmpResult; @@ -9237,7 +8924,7 @@ struct AAPotentialConstantValuesFloating : AAPotentialConstantValuesImpl { if (MaybeTrue & MaybeFalse) return indicatePessimisticFixpoint(); } - } else if (RHSAA.undefIsContained()) { + } else if (RHSContainsUndef) { for (const APInt &L : LHSAAPVS) { bool CmpResult = calculateICmpInst(ICI, L, Zero); MaybeTrue |= CmpResult; @@ -9269,29 +8956,7 @@ struct AAPotentialConstantValuesFloating : AAPotentialConstantValuesImpl { Value *LHS = SI->getTrueValue(); Value *RHS = SI->getFalseValue(); - // Simplify the operands first. bool UsedAssumedInformation = false; - const auto &SimplifiedLHS = - A.getAssumedSimplified(IRPosition::value(*LHS, getCallBaseContext()), - *this, UsedAssumedInformation); - if (!SimplifiedLHS) - return ChangeStatus::UNCHANGED; - if (!SimplifiedLHS.value()) - return indicatePessimisticFixpoint(); - LHS = *SimplifiedLHS; - - const auto &SimplifiedRHS = - A.getAssumedSimplified(IRPosition::value(*RHS, getCallBaseContext()), - *this, UsedAssumedInformation); - if (!SimplifiedRHS) - return ChangeStatus::UNCHANGED; - if (!SimplifiedRHS.value()) - return indicatePessimisticFixpoint(); - RHS = *SimplifiedRHS; - - if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy()) - return indicatePessimisticFixpoint(); - Optional<Constant *> C = A.getAssumedConstant(*SI->getCondition(), *this, UsedAssumedInformation); @@ -9302,35 +8967,36 @@ struct AAPotentialConstantValuesFloating : AAPotentialConstantValuesImpl { else if (C && *C && (*C)->isZeroValue()) OnlyRight = true; - const AAPotentialConstantValues *LHSAA = nullptr, *RHSAA = nullptr; - if (!OnlyRight) { - LHSAA = &A.getAAFor<AAPotentialConstantValues>( - *this, IRPosition::value(*LHS), DepClassTy::REQUIRED); - if (!LHSAA->isValidState()) - return indicatePessimisticFixpoint(); - } - if (!OnlyLeft) { - RHSAA = &A.getAAFor<AAPotentialConstantValues>( - *this, IRPosition::value(*RHS), DepClassTy::REQUIRED); - if (!RHSAA->isValidState()) - return indicatePessimisticFixpoint(); - } + bool LHSContainsUndef = false, RHSContainsUndef = false; + SetTy LHSAAPVS, RHSAAPVS; + if (!OnlyRight && !fillSetWithConstantValues(A, IRPosition::value(*LHS), + LHSAAPVS, LHSContainsUndef)) + return indicatePessimisticFixpoint(); + + if (!OnlyLeft && !fillSetWithConstantValues(A, IRPosition::value(*RHS), + RHSAAPVS, RHSContainsUndef)) + return indicatePessimisticFixpoint(); - if (!LHSAA || !RHSAA) { + if (OnlyLeft || OnlyRight) { // select (true/false), lhs, rhs - auto *OpAA = LHSAA ? LHSAA : RHSAA; + auto *OpAA = OnlyLeft ? &LHSAAPVS : &RHSAAPVS; + auto Undef = OnlyLeft ? LHSContainsUndef : RHSContainsUndef; - if (OpAA->undefIsContained()) + if (Undef) unionAssumedWithUndef(); - else - unionAssumed(*OpAA); + else { + for (auto &It : *OpAA) + unionAssumed(It); + } - } else if (LHSAA->undefIsContained() && RHSAA->undefIsContained()) { + } else if (LHSContainsUndef && RHSContainsUndef) { // select i1 *, undef , undef => undef unionAssumedWithUndef(); } else { - unionAssumed(*LHSAA); - unionAssumed(*RHSAA); + for (auto &It : LHSAAPVS) + unionAssumed(It); + for (auto &It : RHSAAPVS) + unionAssumed(It); } return AssumedBefore == getAssumed() ? ChangeStatus::UNCHANGED : ChangeStatus::CHANGED; @@ -9344,26 +9010,16 @@ struct AAPotentialConstantValuesFloating : AAPotentialConstantValuesImpl { uint32_t ResultBitWidth = CI->getDestTy()->getIntegerBitWidth(); Value *Src = CI->getOperand(0); - // Simplify the operand first. - bool UsedAssumedInformation = false; - const auto &SimplifiedSrc = - A.getAssumedSimplified(IRPosition::value(*Src, getCallBaseContext()), - *this, UsedAssumedInformation); - if (!SimplifiedSrc) - return ChangeStatus::UNCHANGED; - if (!SimplifiedSrc.value()) + bool SrcContainsUndef = false; + SetTy SrcPVS; + if (!fillSetWithConstantValues(A, IRPosition::value(*Src), SrcPVS, + SrcContainsUndef)) return indicatePessimisticFixpoint(); - Src = *SimplifiedSrc; - auto &SrcAA = A.getAAFor<AAPotentialConstantValues>( - *this, IRPosition::value(*Src), DepClassTy::REQUIRED); - if (!SrcAA.isValidState()) - return indicatePessimisticFixpoint(); - const SetTy &SrcAAPVS = SrcAA.getAssumedSet(); - if (SrcAA.undefIsContained()) + if (SrcContainsUndef) unionAssumedWithUndef(); else { - for (const APInt &S : SrcAAPVS) { + for (const APInt &S : SrcPVS) { APInt T = calculateCastInst(CI, S, ResultBitWidth); unionAssumed(T); } @@ -9377,53 +9033,26 @@ struct AAPotentialConstantValuesFloating : AAPotentialConstantValuesImpl { Value *LHS = BinOp->getOperand(0); Value *RHS = BinOp->getOperand(1); - // Simplify the operands first. - bool UsedAssumedInformation = false; - const auto &SimplifiedLHS = - A.getAssumedSimplified(IRPosition::value(*LHS, getCallBaseContext()), - *this, UsedAssumedInformation); - if (!SimplifiedLHS) - return ChangeStatus::UNCHANGED; - if (!SimplifiedLHS.value()) + bool LHSContainsUndef = false, RHSContainsUndef = false; + SetTy LHSAAPVS, RHSAAPVS; + if (!fillSetWithConstantValues(A, IRPosition::value(*LHS), LHSAAPVS, + LHSContainsUndef) || + !fillSetWithConstantValues(A, IRPosition::value(*RHS), RHSAAPVS, + RHSContainsUndef)) return indicatePessimisticFixpoint(); - LHS = *SimplifiedLHS; - const auto &SimplifiedRHS = - A.getAssumedSimplified(IRPosition::value(*RHS, getCallBaseContext()), - *this, UsedAssumedInformation); - if (!SimplifiedRHS) - return ChangeStatus::UNCHANGED; - if (!SimplifiedRHS.value()) - return indicatePessimisticFixpoint(); - RHS = *SimplifiedRHS; - - if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy()) - return indicatePessimisticFixpoint(); - - auto &LHSAA = A.getAAFor<AAPotentialConstantValues>( - *this, IRPosition::value(*LHS), DepClassTy::REQUIRED); - if (!LHSAA.isValidState()) - return indicatePessimisticFixpoint(); - - auto &RHSAA = A.getAAFor<AAPotentialConstantValues>( - *this, IRPosition::value(*RHS), DepClassTy::REQUIRED); - if (!RHSAA.isValidState()) - return indicatePessimisticFixpoint(); - - const SetTy &LHSAAPVS = LHSAA.getAssumedSet(); - const SetTy &RHSAAPVS = RHSAA.getAssumedSet(); const APInt Zero = APInt(LHS->getType()->getIntegerBitWidth(), 0); // TODO: make use of undef flag to limit potential values aggressively. - if (LHSAA.undefIsContained() && RHSAA.undefIsContained()) { + if (LHSContainsUndef && RHSContainsUndef) { if (!calculateBinaryOperatorAndTakeUnion(BinOp, Zero, Zero)) return indicatePessimisticFixpoint(); - } else if (LHSAA.undefIsContained()) { + } else if (LHSContainsUndef) { for (const APInt &R : RHSAAPVS) { if (!calculateBinaryOperatorAndTakeUnion(BinOp, Zero, R)) return indicatePessimisticFixpoint(); } - } else if (RHSAA.undefIsContained()) { + } else if (RHSContainsUndef) { for (const APInt &L : LHSAAPVS) { if (!calculateBinaryOperatorAndTakeUnion(BinOp, L, Zero)) return indicatePessimisticFixpoint(); @@ -9440,35 +9069,6 @@ struct AAPotentialConstantValuesFloating : AAPotentialConstantValuesImpl { : ChangeStatus::CHANGED; } - ChangeStatus updateWithPHINode(Attributor &A, PHINode *PHI) { - auto AssumedBefore = getAssumed(); - for (unsigned u = 0, e = PHI->getNumIncomingValues(); u < e; u++) { - Value *IncomingValue = PHI->getIncomingValue(u); - - // Simplify the operand first. - bool UsedAssumedInformation = false; - const auto &SimplifiedIncomingValue = A.getAssumedSimplified( - IRPosition::value(*IncomingValue, getCallBaseContext()), *this, - UsedAssumedInformation); - if (!SimplifiedIncomingValue) - continue; - if (!SimplifiedIncomingValue.value()) - return indicatePessimisticFixpoint(); - IncomingValue = *SimplifiedIncomingValue; - - auto &PotentialValuesAA = A.getAAFor<AAPotentialConstantValues>( - *this, IRPosition::value(*IncomingValue), DepClassTy::REQUIRED); - if (!PotentialValuesAA.isValidState()) - return indicatePessimisticFixpoint(); - if (PotentialValuesAA.undefIsContained()) - unionAssumedWithUndef(); - else - unionAssumed(PotentialValuesAA.getAssumed()); - } - return AssumedBefore == getAssumed() ? ChangeStatus::UNCHANGED - : ChangeStatus::CHANGED; - } - /// See AbstractAttribute::updateImpl(...). ChangeStatus updateImpl(Attributor &A) override { Value &V = getAssociatedValue(); @@ -9486,9 +9086,6 @@ struct AAPotentialConstantValuesFloating : AAPotentialConstantValuesImpl { if (auto *BinOp = dyn_cast<BinaryOperator>(I)) return updateWithBinaryOperator(A, BinOp); - if (auto *PHI = dyn_cast<PHINode>(I)) - return updateWithPHINode(A, PHI); - return indicatePessimisticFixpoint(); } @@ -9642,7 +9239,8 @@ struct AANoUndefImpl : AANoUndef { // A position whose simplified value does not have any value is // considered to be dead. We don't manifest noundef in such positions for // the same reason above. - if (!A.getAssumedSimplified(getIRPosition(), *this, UsedAssumedInformation) + if (!A.getAssumedSimplified(getIRPosition(), *this, UsedAssumedInformation, + AA::Interprocedural) .has_value()) return ChangeStatus::UNCHANGED; return AANoUndef::manifest(A); @@ -9663,11 +9261,19 @@ struct AANoUndefFloating : public AANoUndefImpl { /// See AbstractAttribute::updateImpl(...). ChangeStatus updateImpl(Attributor &A) override { - auto VisitValueCB = [&](Value &V, const Instruction *CtxI, - AANoUndef::StateType &T, bool Stripped) -> bool { + + SmallVector<AA::ValueAndContext> Values; + bool UsedAssumedInformation = false; + if (!A.getAssumedSimplifiedValues(getIRPosition(), *this, Values, + AA::AnyScope, UsedAssumedInformation)) { + Values.push_back({getAssociatedValue(), getCtxI()}); + } + + StateType T; + auto VisitValueCB = [&](Value &V, const Instruction *CtxI) -> bool { const auto &AA = A.getAAFor<AANoUndef>(*this, IRPosition::value(V), DepClassTy::REQUIRED); - if (!Stripped && this == &AA) { + if (this == &AA) { T.indicatePessimisticFixpoint(); } else { const AANoUndef::StateType &S = @@ -9677,12 +9283,9 @@ struct AANoUndefFloating : public AANoUndefImpl { return T.isValidState(); }; - StateType T; - bool UsedAssumedInformation = false; - if (!genericValueTraversal<StateType>(A, getIRPosition(), *this, T, - VisitValueCB, getCtxI(), - UsedAssumedInformation)) - return indicatePessimisticFixpoint(); + for (const auto &VAC : Values) + if (!VisitValueCB(*VAC.getValue(), VAC.getCtxI())) + return indicatePessimisticFixpoint(); return clampStateAndIndicateChange(getState(), T); } @@ -9782,8 +9385,7 @@ struct AACallEdgesCallSite : public AACallEdgesImpl { ChangeStatus updateImpl(Attributor &A) override { ChangeStatus Change = ChangeStatus::UNCHANGED; - auto VisitValue = [&](Value &V, const Instruction *CtxI, bool &HasUnknown, - bool Stripped) -> bool { + auto VisitValue = [&](Value &V, const Instruction *CtxI) -> bool { if (Function *Fn = dyn_cast<Function>(&V)) { addCalledFunction(Fn, Change); } else { @@ -9795,17 +9397,17 @@ struct AACallEdgesCallSite : public AACallEdgesImpl { return true; }; + SmallVector<AA::ValueAndContext> Values; // Process any value that we might call. - auto ProcessCalledOperand = [&](Value *V) { - bool DummyValue = false; + auto ProcessCalledOperand = [&](Value *V, Instruction *CtxI) { bool UsedAssumedInformation = false; - if (!genericValueTraversal<bool>(A, IRPosition::value(*V), *this, - DummyValue, VisitValue, nullptr, - UsedAssumedInformation, false)) { - // If we haven't gone through all values, assume that there are unknown - // callees. - setHasUnknownCallee(true, Change); + Values.clear(); + if (!A.getAssumedSimplifiedValues(IRPosition::value(*V), *this, Values, + AA::AnyScope, UsedAssumedInformation)) { + Values.push_back({*V, CtxI}); } + for (auto &VAC : Values) + VisitValue(*VAC.getValue(), VAC.getCtxI()); }; CallBase *CB = cast<CallBase>(getCtxI()); @@ -9828,13 +9430,13 @@ struct AACallEdgesCallSite : public AACallEdgesImpl { } // The most simple case. - ProcessCalledOperand(CB->getCalledOperand()); + ProcessCalledOperand(CB->getCalledOperand(), CB); // Process callback functions. SmallVector<const Use *, 4u> CallbackUses; AbstractCallSite::getCallbackUses(*CB, CallbackUses); for (const Use *U : CallbackUses) - ProcessCalledOperand(U->get()); + ProcessCalledOperand(U->get(), CB); return Change; } @@ -9920,8 +9522,11 @@ private: for (auto *AAEdges : AAEdgesList) { if (AAEdges->hasUnknownCallee()) { - if (!CanReachUnknownCallee) + if (!CanReachUnknownCallee) { + LLVM_DEBUG(dbgs() + << "[QueryResolver] Edges include unknown callee!\n"); Change = ChangeStatus::CHANGED; + } CanReachUnknownCallee = true; return Change; } @@ -10065,14 +9670,10 @@ public: } bool instructionCanReach(Attributor &A, const Instruction &Inst, - const Function &Fn, - bool UseBackwards) const override { + const Function &Fn) const override { if (!isValidState()) return true; - if (UseBackwards) - return AA::isPotentiallyReachable(A, Inst, Fn, *this, nullptr); - const auto &Reachability = A.getAAFor<AAReachability>( *this, IRPosition::function(*getAssociatedFunction()), DepClassTy::REQUIRED); @@ -10085,8 +9686,11 @@ public: // This is a hack for us to be able to cache queries. auto *NonConstThis = const_cast<AAFunctionReachabilityFunction *>(this); QueryResolver &InstQSet = NonConstThis->InstQueries[&Inst]; - if (!AllKnown) + if (!AllKnown) { + LLVM_DEBUG(dbgs() << "[AAReachability] Not all reachable edges known, " + "may reach unknown callee!\n"); InstQSet.CanReachUnknownCallee = true; + } return InstQSet.isReachable(A, *NonConstThis, CallEdges, Fn); } @@ -10119,8 +9723,11 @@ public: bool AllKnown = getReachableCallEdges(A, *Reachability, *InstPair.first, CallEdges); // Update will return change if we this effects any queries. - if (!AllKnown) + if (!AllKnown) { + LLVM_DEBUG(dbgs() << "[AAReachability] Not all reachable edges " + "known, may reach unknown callee!\n"); InstPair.second.CanReachUnknownCallee = true; + } Change |= InstPair.second.update(A, *this, CallEdges); } } @@ -10133,8 +9740,11 @@ public: WholeFunction.Reachable.size() + WholeFunction.Unreachable.size(); return "FunctionReachability [" + - std::to_string(WholeFunction.Reachable.size()) + "," + - std::to_string(QueryCount) + "]"; + (canReachUnknownCallee() + ? "unknown" + : (std::to_string(WholeFunction.Reachable.size()) + "," + + std::to_string(QueryCount))) + + "]"; } void trackStatistics() const override {} @@ -10156,6 +9766,822 @@ private: }; } // namespace +template <typename AAType> +static Optional<Constant *> +askForAssumedConstant(Attributor &A, const AbstractAttribute &QueryingAA, + const IRPosition &IRP, Type &Ty) { + if (!Ty.isIntegerTy()) + return nullptr; + + // This will also pass the call base context. + const auto &AA = A.getAAFor<AAType>(QueryingAA, IRP, DepClassTy::NONE); + + Optional<Constant *> COpt = AA.getAssumedConstant(A); + + if (!COpt.has_value()) { + A.recordDependence(AA, QueryingAA, DepClassTy::OPTIONAL); + return llvm::None; + } + if (auto *C = COpt.value()) { + A.recordDependence(AA, QueryingAA, DepClassTy::OPTIONAL); + return C; + } + return nullptr; +} + +Value *AAPotentialValues::getSingleValue( + Attributor &A, const AbstractAttribute &AA, const IRPosition &IRP, + SmallVectorImpl<AA::ValueAndContext> &Values) { + Type &Ty = *IRP.getAssociatedType(); + Optional<Value *> V; + for (auto &It : Values) { + V = AA::combineOptionalValuesInAAValueLatice(V, It.getValue(), &Ty); + if (V.has_value() && !V.value()) + break; + } + if (!V.has_value()) + return UndefValue::get(&Ty); + return V.value(); +} + +namespace { +struct AAPotentialValuesImpl : AAPotentialValues { + using StateType = PotentialLLVMValuesState; + + AAPotentialValuesImpl(const IRPosition &IRP, Attributor &A) + : AAPotentialValues(IRP, A) {} + + /// See AbstractAttribute::initialize(..). + void initialize(Attributor &A) override { + if (A.hasSimplificationCallback(getIRPosition())) { + indicatePessimisticFixpoint(); + return; + } + Value *Stripped = getAssociatedValue().stripPointerCasts(); + if (isa<Constant>(Stripped)) { + addValue(A, getState(), *Stripped, getCtxI(), AA::AnyScope, + getAnchorScope()); + indicateOptimisticFixpoint(); + return; + } + AAPotentialValues::initialize(A); + } + + /// See AbstractAttribute::getAsStr(). + const std::string getAsStr() const override { + std::string Str; + llvm::raw_string_ostream OS(Str); + OS << getState(); + return OS.str(); + } + + template <typename AAType> + static Optional<Value *> askOtherAA(Attributor &A, + const AbstractAttribute &AA, + const IRPosition &IRP, Type &Ty) { + if (isa<Constant>(IRP.getAssociatedValue())) + return &IRP.getAssociatedValue(); + Optional<Constant *> C = askForAssumedConstant<AAType>(A, AA, IRP, Ty); + if (!C) + return llvm::None; + if (C.value()) + if (auto *CC = AA::getWithType(**C, Ty)) + return CC; + return nullptr; + } + + void addValue(Attributor &A, StateType &State, Value &V, + const Instruction *CtxI, AA::ValueScope S, + Function *AnchorScope) const { + + IRPosition ValIRP = IRPosition::value(V); + if (auto *CB = dyn_cast_or_null<CallBase>(CtxI)) { + for (auto &U : CB->args()) { + if (U.get() != &V) + continue; + ValIRP = IRPosition::callsite_argument(*CB, CB->getArgOperandNo(&U)); + break; + } + } + + Value *VPtr = &V; + if (ValIRP.getAssociatedType()->isIntegerTy()) { + Type &Ty = *getAssociatedType(); + Optional<Value *> SimpleV = + askOtherAA<AAValueConstantRange>(A, *this, ValIRP, Ty); + if (SimpleV.has_value() && !SimpleV.value()) { + auto &PotentialConstantsAA = A.getAAFor<AAPotentialConstantValues>( + *this, ValIRP, DepClassTy::OPTIONAL); + if (PotentialConstantsAA.isValidState()) { + for (auto &It : PotentialConstantsAA.getAssumedSet()) { + State.unionAssumed({{*ConstantInt::get(&Ty, It), nullptr}, S}); + } + assert(!PotentialConstantsAA.undefIsContained() && + "Undef should be an explicit value!"); + return; + } + } + if (!SimpleV.has_value()) + return; + + if (SimpleV.value()) + VPtr = SimpleV.value(); + } + + if (isa<ConstantInt>(VPtr)) + CtxI = nullptr; + if (!AA::isValidInScope(*VPtr, AnchorScope)) + S = AA::ValueScope(S | AA::Interprocedural); + + State.unionAssumed({{*VPtr, CtxI}, S}); + } + + /// Helper struct to tie a value+context pair together with the scope for + /// which this is the simplified version. + struct ItemInfo { + AA::ValueAndContext I; + AA::ValueScope S; + }; + + bool recurseForValue(Attributor &A, const IRPosition &IRP, AA::ValueScope S) { + SmallMapVector<AA::ValueAndContext, int, 8> ValueScopeMap; + for (auto CS : {AA::Intraprocedural, AA::Interprocedural}) { + if (!(CS & S)) + continue; + + bool UsedAssumedInformation = false; + SmallVector<AA::ValueAndContext> Values; + if (!A.getAssumedSimplifiedValues(IRP, this, Values, CS, + UsedAssumedInformation)) + return false; + + for (auto &It : Values) + ValueScopeMap[It] += CS; + } + for (auto &It : ValueScopeMap) + addValue(A, getState(), *It.first.getValue(), It.first.getCtxI(), + AA::ValueScope(It.second), getAnchorScope()); + + return true; + } + + void giveUpOnIntraprocedural(Attributor &A) { + auto NewS = StateType::getBestState(getState()); + for (auto &It : getAssumedSet()) { + if (It.second == AA::Intraprocedural) + continue; + addValue(A, NewS, *It.first.getValue(), It.first.getCtxI(), + AA::Interprocedural, getAnchorScope()); + } + assert(!undefIsContained() && "Undef should be an explicit value!"); + addValue(A, NewS, getAssociatedValue(), getCtxI(), AA::Intraprocedural, + getAnchorScope()); + getState() = NewS; + } + + /// See AbstractState::indicatePessimisticFixpoint(...). + ChangeStatus indicatePessimisticFixpoint() override { + getState() = StateType::getBestState(getState()); + getState().unionAssumed({{getAssociatedValue(), getCtxI()}, AA::AnyScope}); + AAPotentialValues::indicateOptimisticFixpoint(); + return ChangeStatus::CHANGED; + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + return indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::manifest(...). + ChangeStatus manifest(Attributor &A) override { + SmallVector<AA::ValueAndContext> Values; + for (AA::ValueScope S : {AA::Interprocedural, AA::Intraprocedural}) { + Values.clear(); + if (!getAssumedSimplifiedValues(A, Values, S)) + continue; + Value &OldV = getAssociatedValue(); + if (isa<UndefValue>(OldV)) + continue; + Value *NewV = getSingleValue(A, *this, getIRPosition(), Values); + if (!NewV || NewV == &OldV) + continue; + if (getCtxI() && + !AA::isValidAtPosition({*NewV, *getCtxI()}, A.getInfoCache())) + continue; + if (A.changeAfterManifest(getIRPosition(), *NewV)) + return ChangeStatus::CHANGED; + } + return ChangeStatus::UNCHANGED; + } + + bool getAssumedSimplifiedValues(Attributor &A, + SmallVectorImpl<AA::ValueAndContext> &Values, + AA::ValueScope S) const override { + if (!isValidState()) + return false; + for (auto &It : getAssumedSet()) + if (It.second & S) + Values.push_back(It.first); + assert(!undefIsContained() && "Undef should be an explicit value!"); + return true; + } +}; + +struct AAPotentialValuesFloating : AAPotentialValuesImpl { + AAPotentialValuesFloating(const IRPosition &IRP, Attributor &A) + : AAPotentialValuesImpl(IRP, A) {} + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + auto AssumedBefore = getAssumed(); + + genericValueTraversal(A); + + return (AssumedBefore == getAssumed()) ? ChangeStatus::UNCHANGED + : ChangeStatus::CHANGED; + } + + /// Helper struct to remember which AAIsDead instances we actually used. + struct LivenessInfo { + const AAIsDead *LivenessAA = nullptr; + bool AnyDead = false; + }; + + /// Check if \p Cmp is a comparison we can simplify. + /// + /// We handle multiple cases, one in which at least one operand is an + /// (assumed) nullptr. If so, try to simplify it using AANonNull on the other + /// operand. Return true if successful, in that case Worklist will be updated. + bool handleCmp(Attributor &A, CmpInst &Cmp, ItemInfo II, + SmallVectorImpl<ItemInfo> &Worklist) { + Value *LHS = Cmp.getOperand(0); + Value *RHS = Cmp.getOperand(1); + + // Simplify the operands first. + bool UsedAssumedInformation = false; + const auto &SimplifiedLHS = A.getAssumedSimplified( + IRPosition::value(*LHS, getCallBaseContext()), *this, + UsedAssumedInformation, AA::Intraprocedural); + if (!SimplifiedLHS.has_value()) + return true; + if (!SimplifiedLHS.value()) + return false; + LHS = *SimplifiedLHS; + + const auto &SimplifiedRHS = A.getAssumedSimplified( + IRPosition::value(*RHS, getCallBaseContext()), *this, + UsedAssumedInformation, AA::Intraprocedural); + if (!SimplifiedRHS.has_value()) + return true; + if (!SimplifiedRHS.value()) + return false; + RHS = *SimplifiedRHS; + + LLVMContext &Ctx = Cmp.getContext(); + // Handle the trivial case first in which we don't even need to think about + // null or non-null. + if (LHS == RHS && (Cmp.isTrueWhenEqual() || Cmp.isFalseWhenEqual())) { + Constant *NewV = + ConstantInt::get(Type::getInt1Ty(Ctx), Cmp.isTrueWhenEqual()); + addValue(A, getState(), *NewV, /* CtxI */ nullptr, II.S, + getAnchorScope()); + return true; + } + + // From now on we only handle equalities (==, !=). + ICmpInst *ICmp = dyn_cast<ICmpInst>(&Cmp); + if (!ICmp || !ICmp->isEquality()) + return false; + + bool LHSIsNull = isa<ConstantPointerNull>(LHS); + bool RHSIsNull = isa<ConstantPointerNull>(RHS); + if (!LHSIsNull && !RHSIsNull) + return false; + + // Left is the nullptr ==/!= non-nullptr case. We'll use AANonNull on the + // non-nullptr operand and if we assume it's non-null we can conclude the + // result of the comparison. + assert((LHSIsNull || RHSIsNull) && + "Expected nullptr versus non-nullptr comparison at this point"); + + // The index is the operand that we assume is not null. + unsigned PtrIdx = LHSIsNull; + auto &PtrNonNullAA = A.getAAFor<AANonNull>( + *this, IRPosition::value(*ICmp->getOperand(PtrIdx)), + DepClassTy::REQUIRED); + if (!PtrNonNullAA.isAssumedNonNull()) + return false; + + // The new value depends on the predicate, true for != and false for ==. + Constant *NewV = ConstantInt::get(Type::getInt1Ty(Ctx), + ICmp->getPredicate() == CmpInst::ICMP_NE); + addValue(A, getState(), *NewV, /* CtxI */ nullptr, II.S, getAnchorScope()); + return true; + } + + bool handleSelectInst(Attributor &A, SelectInst &SI, ItemInfo II, + SmallVectorImpl<ItemInfo> &Worklist) { + const Instruction *CtxI = II.I.getCtxI(); + bool UsedAssumedInformation = false; + + Optional<Constant *> C = + A.getAssumedConstant(*SI.getCondition(), *this, UsedAssumedInformation); + bool NoValueYet = !C.has_value(); + if (NoValueYet || isa_and_nonnull<UndefValue>(*C)) + return true; + if (auto *CI = dyn_cast_or_null<ConstantInt>(*C)) { + if (CI->isZero()) + Worklist.push_back({{*SI.getFalseValue(), CtxI}, II.S}); + else + Worklist.push_back({{*SI.getTrueValue(), CtxI}, II.S}); + } else { + // We could not simplify the condition, assume both values. + Worklist.push_back({{*SI.getTrueValue(), CtxI}, II.S}); + Worklist.push_back({{*SI.getFalseValue(), CtxI}, II.S}); + } + return true; + } + + bool handleLoadInst(Attributor &A, LoadInst &LI, ItemInfo II, + SmallVectorImpl<ItemInfo> &Worklist) { + SmallSetVector<Value *, 4> PotentialCopies; + SmallSetVector<Instruction *, 4> PotentialValueOrigins; + bool UsedAssumedInformation = false; + if (!AA::getPotentiallyLoadedValues(A, LI, PotentialCopies, + PotentialValueOrigins, *this, + UsedAssumedInformation, + /* OnlyExact */ true)) { + LLVM_DEBUG(dbgs() << "[AAPotentialValues] Failed to get potentially " + "loaded values for load instruction " + << LI << "\n"); + return false; + } + + // Do not simplify loads that are only used in llvm.assume if we cannot also + // remove all stores that may feed into the load. The reason is that the + // assume is probably worth something as long as the stores are around. + InformationCache &InfoCache = A.getInfoCache(); + if (InfoCache.isOnlyUsedByAssume(LI)) { + if (!llvm::all_of(PotentialValueOrigins, [&](Instruction *I) { + if (!I) + return true; + if (auto *SI = dyn_cast<StoreInst>(I)) + return A.isAssumedDead(SI->getOperandUse(0), this, + /* LivenessAA */ nullptr, + UsedAssumedInformation, + /* CheckBBLivenessOnly */ false); + return A.isAssumedDead(*I, this, /* LivenessAA */ nullptr, + UsedAssumedInformation, + /* CheckBBLivenessOnly */ false); + })) { + LLVM_DEBUG(dbgs() << "[AAPotentialValues] Load is onl used by assumes " + "and we cannot delete all the stores: " + << LI << "\n"); + return false; + } + } + + // Values have to be dynamically unique or we loose the fact that a + // single llvm::Value might represent two runtime values (e.g., + // stack locations in different recursive calls). + const Instruction *CtxI = II.I.getCtxI(); + bool ScopeIsLocal = (II.S & AA::Intraprocedural); + bool AllLocal = ScopeIsLocal; + bool DynamicallyUnique = llvm::all_of(PotentialCopies, [&](Value *PC) { + AllLocal &= AA::isValidInScope(*PC, getAnchorScope()); + return AA::isDynamicallyUnique(A, *this, *PC); + }); + if (!DynamicallyUnique) { + LLVM_DEBUG(dbgs() << "[AAPotentialValues] Not all potentially loaded " + "values are dynamically unique: " + << LI << "\n"); + return false; + } + + for (auto *PotentialCopy : PotentialCopies) { + if (AllLocal) { + Worklist.push_back({{*PotentialCopy, CtxI}, II.S}); + } else { + Worklist.push_back({{*PotentialCopy, CtxI}, AA::Interprocedural}); + } + } + if (!AllLocal && ScopeIsLocal) + addValue(A, getState(), LI, CtxI, AA::Intraprocedural, getAnchorScope()); + return true; + } + + bool handlePHINode( + Attributor &A, PHINode &PHI, ItemInfo II, + SmallVectorImpl<ItemInfo> &Worklist, + SmallMapVector<const Function *, LivenessInfo, 4> &LivenessAAs) { + auto GetLivenessInfo = [&](const Function &F) -> LivenessInfo & { + LivenessInfo &LI = LivenessAAs[&F]; + if (!LI.LivenessAA) + LI.LivenessAA = &A.getAAFor<AAIsDead>(*this, IRPosition::function(F), + DepClassTy::NONE); + return LI; + }; + + LivenessInfo &LI = GetLivenessInfo(*PHI.getFunction()); + for (unsigned u = 0, e = PHI.getNumIncomingValues(); u < e; u++) { + BasicBlock *IncomingBB = PHI.getIncomingBlock(u); + if (LI.LivenessAA->isEdgeDead(IncomingBB, PHI.getParent())) { + LI.AnyDead = true; + continue; + } + Worklist.push_back( + {{*PHI.getIncomingValue(u), IncomingBB->getTerminator()}, II.S}); + } + return true; + } + + /// Use the generic, non-optimistic InstSimplfy functionality if we managed to + /// simplify any operand of the instruction \p I. Return true if successful, + /// in that case Worklist will be updated. + bool handleGenericInst(Attributor &A, Instruction &I, ItemInfo II, + SmallVectorImpl<ItemInfo> &Worklist) { + bool SomeSimplified = false; + bool UsedAssumedInformation = false; + + SmallVector<Value *, 8> NewOps(I.getNumOperands()); + int Idx = 0; + for (Value *Op : I.operands()) { + const auto &SimplifiedOp = A.getAssumedSimplified( + IRPosition::value(*Op, getCallBaseContext()), *this, + UsedAssumedInformation, AA::Intraprocedural); + // If we are not sure about any operand we are not sure about the entire + // instruction, we'll wait. + if (!SimplifiedOp.has_value()) + return true; + + if (SimplifiedOp.value()) + NewOps[Idx] = SimplifiedOp.value(); + else + NewOps[Idx] = Op; + + SomeSimplified |= (NewOps[Idx] != Op); + ++Idx; + } + + // We won't bother with the InstSimplify interface if we didn't simplify any + // operand ourselves. + if (!SomeSimplified) + return false; + + InformationCache &InfoCache = A.getInfoCache(); + Function *F = I.getFunction(); + const auto *DT = + InfoCache.getAnalysisResultForFunction<DominatorTreeAnalysis>(*F); + const auto *TLI = A.getInfoCache().getTargetLibraryInfoForFunction(*F); + auto *AC = InfoCache.getAnalysisResultForFunction<AssumptionAnalysis>(*F); + OptimizationRemarkEmitter *ORE = nullptr; + + const DataLayout &DL = I.getModule()->getDataLayout(); + SimplifyQuery Q(DL, TLI, DT, AC, &I); + Value *NewV = simplifyInstructionWithOperands(&I, NewOps, Q, ORE); + if (!NewV || NewV == &I) + return false; + + LLVM_DEBUG(dbgs() << "Generic inst " << I << " assumed simplified to " + << *NewV << "\n"); + Worklist.push_back({{*NewV, II.I.getCtxI()}, II.S}); + return true; + } + + bool simplifyInstruction( + Attributor &A, Instruction &I, ItemInfo II, + SmallVectorImpl<ItemInfo> &Worklist, + SmallMapVector<const Function *, LivenessInfo, 4> &LivenessAAs) { + if (auto *CI = dyn_cast<CmpInst>(&I)) + if (handleCmp(A, *CI, II, Worklist)) + return true; + + switch (I.getOpcode()) { + case Instruction::Select: + return handleSelectInst(A, cast<SelectInst>(I), II, Worklist); + case Instruction::PHI: + return handlePHINode(A, cast<PHINode>(I), II, Worklist, LivenessAAs); + case Instruction::Load: + return handleLoadInst(A, cast<LoadInst>(I), II, Worklist); + default: + return handleGenericInst(A, I, II, Worklist); + }; + return false; + } + + void genericValueTraversal(Attributor &A) { + SmallMapVector<const Function *, LivenessInfo, 4> LivenessAAs; + + Value *InitialV = &getAssociatedValue(); + SmallSet<AA::ValueAndContext, 16> Visited; + SmallVector<ItemInfo, 16> Worklist; + Worklist.push_back({{*InitialV, getCtxI()}, AA::AnyScope}); + + int Iteration = 0; + do { + ItemInfo II = Worklist.pop_back_val(); + Value *V = II.I.getValue(); + assert(V); + const Instruction *CtxI = II.I.getCtxI(); + AA::ValueScope S = II.S; + + // Check if we should process the current value. To prevent endless + // recursion keep a record of the values we followed! + if (!Visited.insert(II.I).second) + continue; + + // Make sure we limit the compile time for complex expressions. + if (Iteration++ >= MaxPotentialValuesIterations) { + LLVM_DEBUG(dbgs() << "Generic value traversal reached iteration limit: " + << Iteration << "!\n"); + addValue(A, getState(), *V, CtxI, S, getAnchorScope()); + continue; + } + + // Explicitly look through calls with a "returned" attribute if we do + // not have a pointer as stripPointerCasts only works on them. + Value *NewV = nullptr; + if (V->getType()->isPointerTy()) { + NewV = AA::getWithType(*V->stripPointerCasts(), *V->getType()); + } else { + auto *CB = dyn_cast<CallBase>(V); + if (CB && CB->getCalledFunction()) { + for (Argument &Arg : CB->getCalledFunction()->args()) + if (Arg.hasReturnedAttr()) { + NewV = CB->getArgOperand(Arg.getArgNo()); + break; + } + } + } + if (NewV && NewV != V) { + Worklist.push_back({{*NewV, CtxI}, S}); + continue; + } + + if (auto *I = dyn_cast<Instruction>(V)) { + if (simplifyInstruction(A, *I, II, Worklist, LivenessAAs)) + continue; + } + + if (V != InitialV || isa<Argument>(V)) + if (recurseForValue(A, IRPosition::value(*V), II.S)) + continue; + + // If we haven't stripped anything we give up. + if (V == InitialV && CtxI == getCtxI()) { + indicatePessimisticFixpoint(); + return; + } + + addValue(A, getState(), *V, CtxI, S, getAnchorScope()); + } while (!Worklist.empty()); + + // If we actually used liveness information so we have to record a + // dependence. + for (auto &It : LivenessAAs) + if (It.second.AnyDead) + A.recordDependence(*It.second.LivenessAA, *this, DepClassTy::OPTIONAL); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_FLOATING_ATTR(potential_values) + } +}; + +struct AAPotentialValuesArgument final : AAPotentialValuesImpl { + using Base = AAPotentialValuesImpl; + AAPotentialValuesArgument(const IRPosition &IRP, Attributor &A) + : Base(IRP, A) {} + + /// See AbstractAttribute::initialize(..). + void initialize(Attributor &A) override { + auto &Arg = cast<Argument>(getAssociatedValue()); + if (Arg.hasPointeeInMemoryValueAttr()) + indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + auto AssumedBefore = getAssumed(); + + unsigned CSArgNo = getCallSiteArgNo(); + + bool UsedAssumedInformation = false; + SmallVector<AA::ValueAndContext> Values; + auto CallSitePred = [&](AbstractCallSite ACS) { + const auto CSArgIRP = IRPosition::callsite_argument(ACS, CSArgNo); + if (CSArgIRP.getPositionKind() == IRP_INVALID) + return false; + + if (!A.getAssumedSimplifiedValues(CSArgIRP, this, Values, + AA::Interprocedural, + UsedAssumedInformation)) + return false; + + return isValidState(); + }; + + if (!A.checkForAllCallSites(CallSitePred, *this, + /* RequireAllCallSites */ true, + UsedAssumedInformation)) + return indicatePessimisticFixpoint(); + + Function *Fn = getAssociatedFunction(); + bool AnyNonLocal = false; + for (auto &It : Values) { + if (isa<Constant>(It.getValue())) { + addValue(A, getState(), *It.getValue(), It.getCtxI(), AA::AnyScope, + getAnchorScope()); + continue; + } + if (!AA::isDynamicallyUnique(A, *this, *It.getValue())) + return indicatePessimisticFixpoint(); + + if (auto *Arg = dyn_cast<Argument>(It.getValue())) + if (Arg->getParent() == Fn) { + addValue(A, getState(), *It.getValue(), It.getCtxI(), AA::AnyScope, + getAnchorScope()); + continue; + } + addValue(A, getState(), *It.getValue(), It.getCtxI(), AA::Interprocedural, + getAnchorScope()); + AnyNonLocal = true; + } + if (undefIsContained()) + unionAssumedWithUndef(); + if (AnyNonLocal) + giveUpOnIntraprocedural(A); + + return (AssumedBefore == getAssumed()) ? ChangeStatus::UNCHANGED + : ChangeStatus::CHANGED; + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_ARG_ATTR(potential_values) + } +}; + +struct AAPotentialValuesReturned + : AAReturnedFromReturnedValues<AAPotentialValues, AAPotentialValuesImpl> { + using Base = + AAReturnedFromReturnedValues<AAPotentialValues, AAPotentialValuesImpl>; + AAPotentialValuesReturned(const IRPosition &IRP, Attributor &A) + : Base(IRP, A) {} + + /// See AbstractAttribute::initialize(..). + void initialize(Attributor &A) override { + if (A.hasSimplificationCallback(getIRPosition())) + indicatePessimisticFixpoint(); + else + AAPotentialValues::initialize(A); + } + + ChangeStatus manifest(Attributor &A) override { + // We queried AAValueSimplify for the returned values so they will be + // replaced if a simplified form was found. Nothing to do here. + return ChangeStatus::UNCHANGED; + } + + ChangeStatus indicatePessimisticFixpoint() override { + return AAPotentialValues::indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_FNRET_ATTR(potential_values) + } +}; + +struct AAPotentialValuesFunction : AAPotentialValuesImpl { + AAPotentialValuesFunction(const IRPosition &IRP, Attributor &A) + : AAPotentialValuesImpl(IRP, A) {} + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + llvm_unreachable("AAPotentialValues(Function|CallSite)::updateImpl will " + "not be called"); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_FN_ATTR(potential_values) + } +}; + +struct AAPotentialValuesCallSite : AAPotentialValuesFunction { + AAPotentialValuesCallSite(const IRPosition &IRP, Attributor &A) + : AAPotentialValuesFunction(IRP, A) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_CS_ATTR(potential_values) + } +}; + +struct AAPotentialValuesCallSiteReturned : AAPotentialValuesImpl { + AAPotentialValuesCallSiteReturned(const IRPosition &IRP, Attributor &A) + : AAPotentialValuesImpl(IRP, A) {} + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + auto AssumedBefore = getAssumed(); + + Function *Callee = getAssociatedFunction(); + if (!Callee) + return indicatePessimisticFixpoint(); + + bool UsedAssumedInformation = false; + auto *CB = cast<CallBase>(getCtxI()); + if (CB->isMustTailCall() && + !A.isAssumedDead(IRPosition::inst(*CB), this, nullptr, + UsedAssumedInformation)) + return indicatePessimisticFixpoint(); + + SmallVector<AA::ValueAndContext> Values; + if (!A.getAssumedSimplifiedValues(IRPosition::returned(*Callee), this, + Values, AA::Intraprocedural, + UsedAssumedInformation)) + return indicatePessimisticFixpoint(); + + Function *Caller = CB->getCaller(); + + bool AnyNonLocal = false; + for (auto &It : Values) { + Value *V = It.getValue(); + Optional<Value *> CallerV = A.translateArgumentToCallSiteContent( + V, *CB, *this, UsedAssumedInformation); + if (!CallerV.has_value()) { + // Nothing to do as long as no value was determined. + continue; + } + V = CallerV.value() ? CallerV.value() : V; + if (AA::isDynamicallyUnique(A, *this, *V) && + AA::isValidInScope(*V, Caller)) { + if (CallerV.value()) { + SmallVector<AA::ValueAndContext> ArgValues; + IRPosition IRP = IRPosition::value(*V); + if (auto *Arg = dyn_cast<Argument>(V)) + if (Arg->getParent() == CB->getCalledFunction()) + IRP = IRPosition::callsite_argument(*CB, Arg->getArgNo()); + if (recurseForValue(A, IRP, AA::AnyScope)) + continue; + } + addValue(A, getState(), *V, CB, AA::AnyScope, getAnchorScope()); + } else { + AnyNonLocal = true; + break; + } + } + if (AnyNonLocal) { + Values.clear(); + if (!A.getAssumedSimplifiedValues(IRPosition::returned(*Callee), this, + Values, AA::Interprocedural, + UsedAssumedInformation)) + return indicatePessimisticFixpoint(); + AnyNonLocal = false; + getState() = PotentialLLVMValuesState::getBestState(); + for (auto &It : Values) { + Value *V = It.getValue(); + if (!AA::isDynamicallyUnique(A, *this, *V)) + return indicatePessimisticFixpoint(); + if (AA::isValidInScope(*V, Caller)) { + addValue(A, getState(), *V, CB, AA::AnyScope, getAnchorScope()); + } else { + AnyNonLocal = true; + addValue(A, getState(), *V, CB, AA::Interprocedural, + getAnchorScope()); + } + } + if (AnyNonLocal) + giveUpOnIntraprocedural(A); + } + return (AssumedBefore == getAssumed()) ? ChangeStatus::UNCHANGED + : ChangeStatus::CHANGED; + } + + ChangeStatus indicatePessimisticFixpoint() override { + return AAPotentialValues::indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_CSRET_ATTR(potential_values) + } +}; + +struct AAPotentialValuesCallSiteArgument : AAPotentialValuesFloating { + AAPotentialValuesCallSiteArgument(const IRPosition &IRP, Attributor &A) + : AAPotentialValuesFloating(IRP, A) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_CSARG_ATTR(potential_values) + } +}; +} // namespace + /// ---------------------- Assumption Propagation ------------------------------ namespace { struct AAAssumptionInfoImpl : public AAAssumptionInfo { @@ -10323,6 +10749,7 @@ const char AAMemoryBehavior::ID = 0; const char AAMemoryLocation::ID = 0; const char AAValueConstantRange::ID = 0; const char AAPotentialConstantValues::ID = 0; +const char AAPotentialValues::ID = 0; const char AANoUndef::ID = 0; const char AACallEdges::ID = 0; const char AAFunctionReachability::ID = 0; @@ -10441,6 +10868,7 @@ CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAInstanceInfo) CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoCapture) CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAValueConstantRange) CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPotentialConstantValues) +CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPotentialValues) CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoUndef) CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPointerInfo) diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionImport.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionImport.cpp index 56e2df14ff38..360ec24a0509 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionImport.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionImport.cpp @@ -1147,6 +1147,14 @@ void llvm::thinLTOInternalizeModule(Module &TheModule, // Declare a callback for the internalize pass that will ask for every // candidate GlobalValue if it can be internalized or not. auto MustPreserveGV = [&](const GlobalValue &GV) -> bool { + // It may be the case that GV is on a chain of an ifunc, its alias and + // subsequent aliases. In this case, the summary for the value is not + // available. + if (isa<GlobalIFunc>(&GV) || + (isa<GlobalAlias>(&GV) && + isa<GlobalIFunc>(cast<GlobalAlias>(&GV)->getAliaseeObject()))) + return true; + // Lookup the linkage recorded in the summaries during global analysis. auto GS = DefinedGlobals.find(GV.getGUID()); if (GS == DefinedGlobals.end()) { @@ -1277,7 +1285,7 @@ Expected<bool> FunctionImporter::importFunctions( } } for (GlobalAlias &GA : SrcModule->aliases()) { - if (!GA.hasName()) + if (!GA.hasName() || isa<GlobalIFunc>(GA.getAliaseeObject())) continue; auto GUID = GA.getGUID(); auto Import = ImportGUIDs.count(GUID); @@ -1413,29 +1421,6 @@ static bool doImportingForModule(Module &M) { return *Result; } -namespace { - -/// Pass that performs cross-module function import provided a summary file. -class FunctionImportLegacyPass : public ModulePass { -public: - /// Pass identification, replacement for typeid - static char ID; - - explicit FunctionImportLegacyPass() : ModulePass(ID) {} - - /// Specify pass name for debug output - StringRef getPassName() const override { return "Function Importing"; } - - bool runOnModule(Module &M) override { - if (skipModule(M)) - return false; - - return doImportingForModule(M); - } -}; - -} // end anonymous namespace - PreservedAnalyses FunctionImportPass::run(Module &M, ModuleAnalysisManager &AM) { if (!doImportingForModule(M)) @@ -1443,15 +1428,3 @@ PreservedAnalyses FunctionImportPass::run(Module &M, return PreservedAnalyses::none(); } - -char FunctionImportLegacyPass::ID = 0; -INITIALIZE_PASS(FunctionImportLegacyPass, "function-import", - "Summary Based Function Import", false, false) - -namespace llvm { - -Pass *createFunctionImportPass() { - return new FunctionImportLegacyPass(); -} - -} // end namespace llvm diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/GlobalOpt.cpp index 1ad6e2b2a1d2..ec26db8bfc0b 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -1040,7 +1040,7 @@ static bool tryToOptimizeStoreOfAllocationToGlobal(GlobalVariable *GV, CallInst *CI, const DataLayout &DL, TargetLibraryInfo *TLI) { - if (!isAllocRemovable(CI, TLI)) + if (!isRemovableAlloc(CI, TLI)) // Must be able to remove the call when we get done.. return false; diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/IPO.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/IPO.cpp index ec2b80012ed6..dfd434e61d5b 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/IPO/IPO.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/IPO.cpp @@ -44,7 +44,6 @@ void llvm::initializeIPO(PassRegistry &Registry) { initializeLoopExtractorLegacyPassPass(Registry); initializeBlockExtractorLegacyPassPass(Registry); initializeSingleLoopExtractorPass(Registry); - initializeLowerTypeTestsPass(Registry); initializeMergeFunctionsLegacyPassPass(Registry); initializePartialInlinerLegacyPassPass(Registry); initializeAttributorLegacyPassPass(Registry); @@ -60,9 +59,6 @@ void llvm::initializeIPO(PassRegistry &Registry) { initializeStripNonDebugSymbolsPass(Registry); initializeBarrierNoopPass(Registry); initializeEliminateAvailableExternallyLegacyPassPass(Registry); - initializeSampleProfileLoaderLegacyPassPass(Registry); - initializeFunctionImportLegacyPassPass(Registry); - initializeWholeProgramDevirtPass(Registry); } void LLVMInitializeIPO(LLVMPassRegistryRef R) { diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/Internalize.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/Internalize.cpp index 5aa5b905f06c..85b1a8303d33 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/IPO/Internalize.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/Internalize.cpp @@ -28,6 +28,7 @@ #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/GlobPattern.h" #include "llvm/Support/LineIterator.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/raw_ostream.h" @@ -40,13 +41,13 @@ STATISTIC(NumAliases, "Number of aliases internalized"); STATISTIC(NumFunctions, "Number of functions internalized"); STATISTIC(NumGlobals, "Number of global vars internalized"); -// APIFile - A file which contains a list of symbols that should not be marked -// external. +// APIFile - A file which contains a list of symbol glob patterns that should +// not be marked external. static cl::opt<std::string> APIFile("internalize-public-api-file", cl::value_desc("filename"), cl::desc("A file containing list of symbol names to preserve")); -// APIList - A list of symbols that should not be marked internal. +// APIList - A list of symbol glob patterns that should not be marked internal. static cl::list<std::string> APIList("internalize-public-api-list", cl::value_desc("list"), cl::desc("A list of symbol names to preserve"), cl::CommaSeparated); @@ -59,29 +60,44 @@ public: PreserveAPIList() { if (!APIFile.empty()) LoadFile(APIFile); - ExternalNames.insert(APIList.begin(), APIList.end()); + for (StringRef Pattern : APIList) + addGlob(Pattern); } bool operator()(const GlobalValue &GV) { - return ExternalNames.count(GV.getName()); + return llvm::any_of( + ExternalNames, [&](GlobPattern &GP) { return GP.match(GV.getName()); }); } private: // Contains the set of symbols loaded from file - StringSet<> ExternalNames; + SmallVector<GlobPattern> ExternalNames; + + void addGlob(StringRef Pattern) { + auto GlobOrErr = GlobPattern::create(Pattern); + if (!GlobOrErr) { + errs() << "WARNING: when loading pattern: '" + << toString(GlobOrErr.takeError()) << "' ignoring"; + return; + } + ExternalNames.emplace_back(std::move(*GlobOrErr)); + } void LoadFile(StringRef Filename) { // Load the APIFile... - ErrorOr<std::unique_ptr<MemoryBuffer>> Buf = + ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr = MemoryBuffer::getFile(Filename); - if (!Buf) { + if (!BufOrErr) { errs() << "WARNING: Internalize couldn't load file '" << Filename << "'! Continuing as if it's empty.\n"; return; // Just continue as if the file were empty } - for (line_iterator I(*Buf->get(), true), E; I != E; ++I) - ExternalNames.insert(*I); + Buf = std::move(*BufOrErr); + for (line_iterator I(*Buf, true), E; I != E; ++I) + addGlob(*I); } + + std::shared_ptr<MemoryBuffer> Buf; }; } // end anonymous namespace diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/LowerTypeTests.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/LowerTypeTests.cpp index d5f1d291f41f..6bf25df101fa 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/IPO/LowerTypeTests.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/LowerTypeTests.cpp @@ -528,50 +528,8 @@ public: // arguments. For testing purposes only. static bool runForTesting(Module &M); }; - -struct LowerTypeTests : public ModulePass { - static char ID; - - bool UseCommandLine = false; - - ModuleSummaryIndex *ExportSummary; - const ModuleSummaryIndex *ImportSummary; - bool DropTypeTests; - - LowerTypeTests() : ModulePass(ID), UseCommandLine(true) { - initializeLowerTypeTestsPass(*PassRegistry::getPassRegistry()); - } - - LowerTypeTests(ModuleSummaryIndex *ExportSummary, - const ModuleSummaryIndex *ImportSummary, bool DropTypeTests) - : ModulePass(ID), ExportSummary(ExportSummary), - ImportSummary(ImportSummary), - DropTypeTests(DropTypeTests || ClDropTypeTests) { - initializeLowerTypeTestsPass(*PassRegistry::getPassRegistry()); - } - - bool runOnModule(Module &M) override { - if (UseCommandLine) - return LowerTypeTestsModule::runForTesting(M); - return LowerTypeTestsModule(M, ExportSummary, ImportSummary, DropTypeTests) - .lower(); - } -}; - } // end anonymous namespace -char LowerTypeTests::ID = 0; - -INITIALIZE_PASS(LowerTypeTests, "lowertypetests", "Lower type metadata", false, - false) - -ModulePass * -llvm::createLowerTypeTestsPass(ModuleSummaryIndex *ExportSummary, - const ModuleSummaryIndex *ImportSummary, - bool DropTypeTests) { - return new LowerTypeTests(ExportSummary, ImportSummary, DropTypeTests); -} - /// Build a bit set for TypeId using the object layouts in /// GlobalLayout. BitSetInfo LowerTypeTestsModule::buildBitSet( diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index 8e0ca8c6c997..0b42fc151991 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -4808,7 +4808,7 @@ void OpenMPOpt::registerAAs(bool IsModulePass) { if (auto *LI = dyn_cast<LoadInst>(&I)) { bool UsedAssumedInformation = false; A.getAssumedSimplified(IRPosition::value(*LI), /* AA */ nullptr, - UsedAssumedInformation); + UsedAssumedInformation, AA::Interprocedural); } else if (auto *SI = dyn_cast<StoreInst>(&I)) { A.getOrCreateAAFor<AAIsDead>(IRPosition::value(*SI)); } diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp index 8eef82675e86..f1b6f2bb7de4 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -91,14 +91,6 @@ cl::opt<bool> EnableDFAJumpThreading("enable-dfa-jump-thread", cl::desc("Enable DFA jump threading."), cl::init(false), cl::Hidden); -static cl::opt<bool> - EnablePrepareForThinLTO("prepare-for-thinlto", cl::init(false), cl::Hidden, - cl::desc("Enable preparation for ThinLTO.")); - -static cl::opt<bool> - EnablePerformThinLTO("perform-thinlto", cl::init(false), cl::Hidden, - cl::desc("Enable performing ThinLTO.")); - cl::opt<bool> EnableHotColdSplit("hot-cold-split", cl::desc("Enable hot-cold splitting pass")); @@ -192,15 +184,6 @@ PassManagerBuilder::PassManagerBuilder() { VerifyInput = false; VerifyOutput = false; MergeFunctions = false; - PrepareForLTO = false; - EnablePGOInstrGen = false; - EnablePGOCSInstrGen = false; - EnablePGOCSInstrUse = false; - PGOInstrGen = ""; - PGOInstrUse = ""; - PGOSampleUse = ""; - PrepareForThinLTO = EnablePrepareForThinLTO; - PerformThinLTO = EnablePerformThinLTO; DivergentTarget = false; CallGraphProfile = true; } @@ -390,7 +373,7 @@ void PassManagerBuilder::addFunctionSimplificationPasses( MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, /*AllowSpeculation=*/false)); // Rotate Loop - disable header duplication at -Oz - MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1, PrepareForLTO)); + MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1, false)); // TODO: Investigate promotion cap for O1. MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, /*AllowSpeculation=*/true)); @@ -470,10 +453,6 @@ void PassManagerBuilder::addFunctionSimplificationPasses( // Clean up after everything. MPM.add(createInstructionCombiningPass()); addExtensionsToPM(EP_Peephole, MPM); - - if (EnableCHR && OptLevel >= 3 && - (!PGOInstrUse.empty() || !PGOSampleUse.empty() || EnablePGOCSInstrGen)) - MPM.add(createControlHeightReductionLegacyPass()); } /// FIXME: Should LTO cause any differences to this set of passes? @@ -598,15 +577,6 @@ void PassManagerBuilder::populateModulePassManager( legacy::PassManagerBase &MPM) { MPM.add(createAnnotation2MetadataLegacyPass()); - if (!PGOSampleUse.empty()) { - MPM.add(createPruneEHPass()); - // In ThinLTO mode, when flattened profile is used, all the available - // profile information will be annotated in PreLink phase so there is - // no need to load the profile again in PostLink. - if (!(FlattenedProfileUsed && PerformThinLTO)) - MPM.add(createSampleProfileLoaderPass(PGOSampleUse)); - } - // Allow forcing function attributes as a debugging and tuning aid. MPM.add(createForceFunctionAttrsLegacyPass()); @@ -628,26 +598,8 @@ void PassManagerBuilder::populateModulePassManager( else if (GlobalExtensionsNotEmpty() || !Extensions.empty()) MPM.add(createBarrierNoopPass()); - if (PerformThinLTO) { - MPM.add(createLowerTypeTestsPass(nullptr, nullptr, true)); - // Drop available_externally and unreferenced globals. This is necessary - // with ThinLTO in order to avoid leaving undefined references to dead - // globals in the object file. - MPM.add(createEliminateAvailableExternallyPass()); - MPM.add(createGlobalDCEPass()); - } - addExtensionsToPM(EP_EnabledOnOptLevel0, MPM); - if (PrepareForLTO || PrepareForThinLTO) { - MPM.add(createCanonicalizeAliasesPass()); - // Rename anon globals to be able to export them in the summary. - // This has to be done after we add the extensions to the pass manager - // as there could be passes (e.g. Adddress sanitizer) which introduce - // new unnamed globals. - MPM.add(createNameAnonGlobalPass()); - } - MPM.add(createAnnotationRemarksLegacyPass()); return; } @@ -658,25 +610,6 @@ void PassManagerBuilder::populateModulePassManager( addInitialAliasAnalysisPasses(MPM); - // For ThinLTO there are two passes of indirect call promotion. The - // first is during the compile phase when PerformThinLTO=false and - // intra-module indirect call targets are promoted. The second is during - // the ThinLTO backend when PerformThinLTO=true, when we promote imported - // inter-module indirect calls. For that we perform indirect call promotion - // earlier in the pass pipeline, here before globalopt. Otherwise imported - // available_externally functions look unreferenced and are removed. - if (PerformThinLTO) { - MPM.add(createLowerTypeTestsPass(nullptr, nullptr, true)); - } - - // For SamplePGO in ThinLTO compile phase, we do not want to unroll loops - // as it will change the CFG too much to make the 2nd profile annotation - // in backend more difficult. - bool PrepareForThinLTOUsingPGOSampleProfile = - PrepareForThinLTO && !PGOSampleUse.empty(); - if (PrepareForThinLTOUsingPGOSampleProfile) - DisableUnrollLoops = true; - // Infer attributes about declarations if possible. MPM.add(createInferFunctionAttrsLegacyPass()); @@ -744,7 +677,7 @@ void PassManagerBuilder::populateModulePassManager( if (RunPartialInlining) MPM.add(createPartialInliningPass()); - if (OptLevel > 1 && !PrepareForLTO && !PrepareForThinLTO) + if (OptLevel > 1) // Remove avail extern fns and globals definitions if we aren't // compiling an object file for later LTO. For LTO we want to preserve // these so they are eligible for inlining at link-time. Note if they @@ -756,9 +689,6 @@ void PassManagerBuilder::populateModulePassManager( // and saves running remaining passes on the eliminated functions. MPM.add(createEliminateAvailableExternallyPass()); - if (EnableOrderFileInstrumentation) - MPM.add(createInstrOrderFilePass()); - MPM.add(createReversePostOrderFunctionAttrsPass()); // The inliner performs some kind of dead code elimination as it goes, @@ -772,24 +702,6 @@ void PassManagerBuilder::populateModulePassManager( MPM.add(createGlobalDCEPass()); } - // If we are planning to perform ThinLTO later, let's not bloat the code with - // unrolling/vectorization/... now. We'll first run the inliner + CGSCC passes - // during ThinLTO and perform the rest of the optimizations afterward. - if (PrepareForThinLTO) { - // Ensure we perform any last passes, but do so before renaming anonymous - // globals in case the passes add any. - addExtensionsToPM(EP_OptimizerLast, MPM); - MPM.add(createCanonicalizeAliasesPass()); - // Rename anon globals to be able to export them in the summary. - MPM.add(createNameAnonGlobalPass()); - return; - } - - if (PerformThinLTO) - // Optimize globals now when performing ThinLTO, this enables more - // optimizations later. - MPM.add(createGlobalOptimizerPass()); - // Scheduling LoopVersioningLICM when inlining is over, because after that // we may see more accurate aliasing. Reason to run this late is that too // early versioning may prevent further inlining due to increase of code @@ -834,7 +746,7 @@ void PassManagerBuilder::populateModulePassManager( // Re-rotate loops in all our loop nests. These may have fallout out of // rotated form due to GVN or other transformations, and the vectorizer relies // on the rotated form. Disable header duplication at -Oz. - MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1, PrepareForLTO)); + MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1, false)); // Distribute loops to allow partial vectorization. I.e. isolate dependences // into separate loop that would otherwise inhibit vectorization. This is @@ -856,7 +768,7 @@ void PassManagerBuilder::populateModulePassManager( // See comment in the new PM for justification of scheduling splitting at // this stage (\ref buildModuleSimplificationPipeline). - if (EnableHotColdSplit && !(PrepareForLTO || PrepareForThinLTO)) + if (EnableHotColdSplit) MPM.add(createHotColdSplittingPass()); if (EnableIROutliner) @@ -865,10 +777,6 @@ void PassManagerBuilder::populateModulePassManager( if (MergeFunctions) MPM.add(createMergeFunctionsPass()); - // Add Module flag "CG Profile" based on Branch Frequency Information. - if (CallGraphProfile) - MPM.add(createCGProfileLegacyPass()); - // LoopSink pass sinks instructions hoisted by LICM, which serves as a // canonicalization pass that enables other optimizations. As a result, // LoopSink pass needs to be a very late IR pass to avoid undoing LICM @@ -889,12 +797,6 @@ void PassManagerBuilder::populateModulePassManager( addExtensionsToPM(EP_OptimizerLast, MPM); - if (PrepareForLTO) { - MPM.add(createCanonicalizeAliasesPass()); - // Rename anon globals to be able to handle them in the summary - MPM.add(createNameAnonGlobalPass()); - } - MPM.add(createAnnotationRemarksLegacyPass()); } diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfile.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfile.cpp index 55fee213cd5f..f76b886e810a 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -546,53 +546,6 @@ private: return AnnotatedPassName.c_str(); } }; - -class SampleProfileLoaderLegacyPass : public ModulePass { -public: - // Class identification, replacement for typeinfo - static char ID; - - SampleProfileLoaderLegacyPass( - StringRef Name = SampleProfileFile, - ThinOrFullLTOPhase LTOPhase = ThinOrFullLTOPhase::None) - : ModulePass(ID), SampleLoader( - Name, SampleProfileRemappingFile, LTOPhase, - [&](Function &F) -> AssumptionCache & { - return ACT->getAssumptionCache(F); - }, - [&](Function &F) -> TargetTransformInfo & { - return TTIWP->getTTI(F); - }, - [&](Function &F) -> TargetLibraryInfo & { - return TLIWP->getTLI(F); - }) { - initializeSampleProfileLoaderLegacyPassPass( - *PassRegistry::getPassRegistry()); - } - - void dump() { SampleLoader.dump(); } - - bool doInitialization(Module &M) override { - return SampleLoader.doInitialization(M); - } - - StringRef getPassName() const override { return "Sample profile pass"; } - bool runOnModule(Module &M) override; - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<AssumptionCacheTracker>(); - AU.addRequired<TargetTransformInfoWrapperPass>(); - AU.addRequired<TargetLibraryInfoWrapperPass>(); - AU.addRequired<ProfileSummaryInfoWrapperPass>(); - } - -private: - SampleProfileLoader SampleLoader; - AssumptionCacheTracker *ACT = nullptr; - TargetTransformInfoWrapperPass *TTIWP = nullptr; - TargetLibraryInfoWrapperPass *TLIWP = nullptr; -}; - } // end anonymous namespace ErrorOr<uint64_t> SampleProfileLoader::getInstWeight(const Instruction &Inst) { @@ -734,8 +687,8 @@ SampleProfileLoader::findIndirectCallFunctionSamples( auto FSCompare = [](const FunctionSamples *L, const FunctionSamples *R) { assert(L && R && "Expect non-null FunctionSamples"); - if (L->getEntrySamples() != R->getEntrySamples()) - return L->getEntrySamples() > R->getEntrySamples(); + if (L->getHeadSamplesEstimate() != R->getHeadSamplesEstimate()) + return L->getHeadSamplesEstimate() > R->getHeadSamplesEstimate(); return FunctionSamples::getGUID(L->getName()) < FunctionSamples::getGUID(R->getName()); }; @@ -750,7 +703,7 @@ SampleProfileLoader::findIndirectCallFunctionSamples( // as that already includes both inlined callee and non-inlined ones.. Sum = 0; for (const auto *const FS : CalleeSamples) { - Sum += FS->getEntrySamples(); + Sum += FS->getHeadSamplesEstimate(); R.push_back(FS); } llvm::sort(R, FSCompare); @@ -771,7 +724,7 @@ SampleProfileLoader::findIndirectCallFunctionSamples( if (M->empty()) return R; for (const auto &NameFS : *M) { - Sum += NameFS.second.getEntrySamples(); + Sum += NameFS.second.getHeadSamplesEstimate(); R.push_back(&NameFS.second); } llvm::sort(R, FSCompare); @@ -1090,7 +1043,7 @@ void SampleProfileLoader::findExternalInlineCandidate( bool PreInline = UsePreInlinerDecision && CalleeSample->getContext().hasAttribute(ContextShouldBeInlined); - if (!PreInline && CalleeSample->getEntrySamples() < Threshold) + if (!PreInline && CalleeSample->getHeadSamplesEstimate() < Threshold) continue; StringRef Name = CalleeSample->getFuncName(); @@ -1171,7 +1124,8 @@ bool SampleProfileLoader::inlineHotFunctions( assert((!FunctionSamples::UseMD5 || FS->GUIDToFuncNameMap) && "GUIDToFuncNameMap has to be populated"); AllCandidates.push_back(CB); - if (FS->getEntrySamples() > 0 || FunctionSamples::ProfileIsCS) + if (FS->getHeadSamplesEstimate() > 0 || + FunctionSamples::ProfileIsCS) LocalNotInlinedCallSites.try_emplace(CB, FS); if (callsiteIsHot(FS, PSI, ProfAccForSymsInList)) Hot = true; @@ -1211,7 +1165,7 @@ bool SampleProfileLoader::inlineHotFunctions( if (!callsiteIsHot(FS, PSI, ProfAccForSymsInList)) continue; - Candidate = {I, FS, FS->getEntrySamples(), 1.0}; + Candidate = {I, FS, FS->getHeadSamplesEstimate(), 1.0}; if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum)) { LocalNotInlinedCallSites.erase(I); LocalChanged = true; @@ -1325,7 +1279,7 @@ bool SampleProfileLoader::getInlineCandidate(InlineCandidate *NewCandidate, Factor = Probe->Factor; uint64_t CallsiteCount = - CalleeSamples ? CalleeSamples->getEntrySamples() * Factor : 0; + CalleeSamples ? CalleeSamples->getHeadSamplesEstimate() * Factor : 0; *NewCandidate = {CB, CalleeSamples, CallsiteCount, Factor}; return true; } @@ -1481,7 +1435,7 @@ bool SampleProfileLoader::inlineHotFunctionsWithPriority( continue; } uint64_t EntryCountDistributed = - FS->getEntrySamples() * Candidate.CallsiteDistribution; + FS->getHeadSamplesEstimate() * Candidate.CallsiteDistribution; // In addition to regular inline cost check, we also need to make sure // ICP isn't introducing excessive speculative checks even if individual // target looks beneficial to promote and inline. That means we should @@ -1568,7 +1522,7 @@ void SampleProfileLoader::promoteMergeNotInlinedContextSamples( ++NumCSNotInlined; const FunctionSamples *FS = Pair.getSecond(); - if (FS->getTotalSamples() == 0 && FS->getEntrySamples() == 0) { + if (FS->getTotalSamples() == 0 && FS->getHeadSamplesEstimate() == 0) { continue; } @@ -1586,7 +1540,7 @@ void SampleProfileLoader::promoteMergeNotInlinedContextSamples( // Use entry samples as head samples during the merge, as inlinees // don't have head samples. const_cast<FunctionSamples *>(FS)->addHeadSamples( - FS->getEntrySamples()); + FS->getHeadSamplesEstimate()); // Note that we have to do the merge right after processing function. // This allows OutlineFS's profile to be used for annotation during @@ -1599,7 +1553,7 @@ void SampleProfileLoader::promoteMergeNotInlinedContextSamples( } else { auto pair = notInlinedCallInfo.try_emplace(Callee, NotInlinedProfileInfo{0}); - pair.first->second.entryCount += FS->getEntrySamples(); + pair.first->second.entryCount += FS->getHeadSamplesEstimate(); } } } @@ -1663,7 +1617,7 @@ void SampleProfileLoader::generateMDProfMetadata(Function &F) { if (const FunctionSamplesMap *M = FS->findFunctionSamplesMapAt(CallSite)) { for (const auto &NameFS : *M) - Sum += NameFS.second.getEntrySamples(); + Sum += NameFS.second.getHeadSamplesEstimate(); } } if (Sum) @@ -1825,17 +1779,6 @@ bool SampleProfileLoader::emitAnnotations(Function &F) { return Changed; } -char SampleProfileLoaderLegacyPass::ID = 0; - -INITIALIZE_PASS_BEGIN(SampleProfileLoaderLegacyPass, "sample-profile", - "Sample Profile loader", false, false) -INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) -INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) -INITIALIZE_PASS_END(SampleProfileLoaderLegacyPass, "sample-profile", - "Sample Profile loader", false, false) - std::unique_ptr<ProfiledCallGraph> SampleProfileLoader::buildProfiledCallGraph(CallGraph &CG) { std::unique_ptr<ProfiledCallGraph> ProfiledCG; @@ -2073,14 +2016,6 @@ bool SampleProfileLoader::doInitialization(Module &M, return true; } -ModulePass *llvm::createSampleProfileLoaderPass() { - return new SampleProfileLoaderLegacyPass(); -} - -ModulePass *llvm::createSampleProfileLoaderPass(StringRef Name) { - return new SampleProfileLoaderLegacyPass(Name); -} - bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM, ProfileSummaryInfo *_PSI, CallGraph *CG) { GUIDToFuncNameMapper Mapper(M, *Reader, GUIDToFuncNameMap); @@ -2141,15 +2076,6 @@ bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM, return retval; } -bool SampleProfileLoaderLegacyPass::runOnModule(Module &M) { - ACT = &getAnalysis<AssumptionCacheTracker>(); - TTIWP = &getAnalysis<TargetTransformInfoWrapperPass>(); - TLIWP = &getAnalysis<TargetLibraryInfoWrapperPass>(); - ProfileSummaryInfo *PSI = - &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); - return SampleLoader.runOnModule(M, nullptr, PSI, nullptr); -} - bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM) { LLVM_DEBUG(dbgs() << "\n\nProcessing Function " << F.getName() << "\n"); DILocation2SampleMap.clear(); diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp index 898a213d0849..ad00c116ce0a 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp @@ -747,78 +747,8 @@ struct DevirtIndex { void run(); }; - -struct WholeProgramDevirt : public ModulePass { - static char ID; - - bool UseCommandLine = false; - - ModuleSummaryIndex *ExportSummary = nullptr; - const ModuleSummaryIndex *ImportSummary = nullptr; - - WholeProgramDevirt() : ModulePass(ID), UseCommandLine(true) { - initializeWholeProgramDevirtPass(*PassRegistry::getPassRegistry()); - } - - WholeProgramDevirt(ModuleSummaryIndex *ExportSummary, - const ModuleSummaryIndex *ImportSummary) - : ModulePass(ID), ExportSummary(ExportSummary), - ImportSummary(ImportSummary) { - initializeWholeProgramDevirtPass(*PassRegistry::getPassRegistry()); - } - - bool runOnModule(Module &M) override { - if (skipModule(M)) - return false; - - // In the new pass manager, we can request the optimization - // remark emitter pass on a per-function-basis, which the - // OREGetter will do for us. - // In the old pass manager, this is harder, so we just build - // an optimization remark emitter on the fly, when we need it. - std::unique_ptr<OptimizationRemarkEmitter> ORE; - auto OREGetter = [&](Function *F) -> OptimizationRemarkEmitter & { - ORE = std::make_unique<OptimizationRemarkEmitter>(F); - return *ORE; - }; - - auto LookupDomTree = [this](Function &F) -> DominatorTree & { - return this->getAnalysis<DominatorTreeWrapperPass>(F).getDomTree(); - }; - - if (UseCommandLine) - return DevirtModule::runForTesting(M, LegacyAARGetter(*this), OREGetter, - LookupDomTree); - - return DevirtModule(M, LegacyAARGetter(*this), OREGetter, LookupDomTree, - ExportSummary, ImportSummary) - .run(); - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<AssumptionCacheTracker>(); - AU.addRequired<TargetLibraryInfoWrapperPass>(); - AU.addRequired<DominatorTreeWrapperPass>(); - } -}; - } // end anonymous namespace -INITIALIZE_PASS_BEGIN(WholeProgramDevirt, "wholeprogramdevirt", - "Whole program devirtualization", false, false) -INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) -INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_END(WholeProgramDevirt, "wholeprogramdevirt", - "Whole program devirtualization", false, false) -char WholeProgramDevirt::ID = 0; - -ModulePass * -llvm::createWholeProgramDevirtPass(ModuleSummaryIndex *ExportSummary, - const ModuleSummaryIndex *ImportSummary) { - return new WholeProgramDevirt(ExportSummary, ImportSummary); -} - PreservedAnalyses WholeProgramDevirtPass::run(Module &M, ModuleAnalysisManager &AM) { auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 535a7736454c..4a459ec6c550 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -1966,12 +1966,14 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) { return BinaryOperator::CreateAdd(X, ConstantExpr::getSub(C, C2)); } - // If there's no chance any bit will need to borrow from an adjacent bit: - // sub C, X --> xor X, C const APInt *Op0C; - if (match(Op0, m_APInt(Op0C)) && - (~computeKnownBits(Op1, 0, &I).Zero).isSubsetOf(*Op0C)) - return BinaryOperator::CreateXor(Op1, Op0); + if (match(Op0, m_APInt(Op0C)) && Op0C->isMask()) { + // Turn this into a xor if LHS is 2^n-1 and the remaining bits are known + // zero. + KnownBits RHSKnown = computeKnownBits(Op1, 0, &I); + if ((*Op0C | RHSKnown.Zero).isAllOnes()) + return BinaryOperator::CreateXor(Op1, Op0); + } { Value *Y; diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index a8f2cd79830a..8253c575bc37 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -2664,8 +2664,8 @@ Value *InstCombinerImpl::foldAndOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, // Inverted form (example): // (icmp slt (X | Y), 0) & (icmp sgt (X & Y), -1) -> (icmp slt (X ^ Y), 0) bool TrueIfSignedL, TrueIfSignedR; - if (InstCombiner::isSignBitCheck(PredL, *LHSC, TrueIfSignedL) && - InstCombiner::isSignBitCheck(PredR, *RHSC, TrueIfSignedR) && + if (isSignBitCheck(PredL, *LHSC, TrueIfSignedL) && + isSignBitCheck(PredR, *RHSC, TrueIfSignedR) && (RHS->hasOneUse() || LHS->hasOneUse())) { Value *X, *Y; if (IsAnd) { @@ -3202,25 +3202,38 @@ Value *InstCombinerImpl::foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS, // TODO: This can be generalized to compares of non-signbits using // decomposeBitTestICmp(). It could be enhanced more by using (something like) // foldLogOpOfMaskedICmps(). - if ((LHS->hasOneUse() || RHS->hasOneUse()) && + const APInt *LC, *RC; + if (match(LHS1, m_APInt(LC)) && match(RHS1, m_APInt(RC)) && LHS0->getType() == RHS0->getType() && - LHS0->getType()->isIntOrIntVectorTy()) { + LHS0->getType()->isIntOrIntVectorTy() && + (LHS->hasOneUse() || RHS->hasOneUse())) { + // Convert xor of signbit tests to signbit test of xor'd values: // (X > -1) ^ (Y > -1) --> (X ^ Y) < 0 // (X < 0) ^ (Y < 0) --> (X ^ Y) < 0 - if ((PredL == CmpInst::ICMP_SGT && match(LHS1, m_AllOnes()) && - PredR == CmpInst::ICMP_SGT && match(RHS1, m_AllOnes())) || - (PredL == CmpInst::ICMP_SLT && match(LHS1, m_Zero()) && - PredR == CmpInst::ICMP_SLT && match(RHS1, m_Zero()))) - return Builder.CreateIsNeg(Builder.CreateXor(LHS0, RHS0)); - // (X > -1) ^ (Y < 0) --> (X ^ Y) > -1 // (X < 0) ^ (Y > -1) --> (X ^ Y) > -1 - if ((PredL == CmpInst::ICMP_SGT && match(LHS1, m_AllOnes()) && - PredR == CmpInst::ICMP_SLT && match(RHS1, m_Zero())) || - (PredL == CmpInst::ICMP_SLT && match(LHS1, m_Zero()) && - PredR == CmpInst::ICMP_SGT && match(RHS1, m_AllOnes()))) - return Builder.CreateIsNotNeg(Builder.CreateXor(LHS0, RHS0)); - + bool TrueIfSignedL, TrueIfSignedR; + if (isSignBitCheck(PredL, *LC, TrueIfSignedL) && + isSignBitCheck(PredR, *RC, TrueIfSignedR)) { + Value *XorLR = Builder.CreateXor(LHS0, RHS0); + return TrueIfSignedL == TrueIfSignedR ? Builder.CreateIsNeg(XorLR) : + Builder.CreateIsNotNeg(XorLR); + } + + // (X > C) ^ (X < C + 2) --> X != C + 1 + // (X < C + 2) ^ (X > C) --> X != C + 1 + // Considering the correctness of this pattern, we should avoid that C is + // non-negative and C + 2 is negative, although it will be matched by other + // patterns. + const APInt *C1, *C2; + if ((PredL == CmpInst::ICMP_SGT && match(LHS1, m_APInt(C1)) && + PredR == CmpInst::ICMP_SLT && match(RHS1, m_APInt(C2))) || + (PredL == CmpInst::ICMP_SLT && match(LHS1, m_APInt(C2)) && + PredR == CmpInst::ICMP_SGT && match(RHS1, m_APInt(C1)))) + if (LHS0 == RHS0 && *C1 + 2 == *C2 && + (C1->isNegative() || C2->isNonNegative())) + return Builder.CreateICmpNE(LHS0, + ConstantInt::get(LHS0->getType(), *C1 + 1)); } // Instead of trying to imitate the folds for and/or, decompose this 'xor' diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index edfdf70c2b97..bc01d2ef7fe2 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1140,8 +1140,8 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { if (Value *V = simplifyCall(&CI, SQ.getWithInstruction(&CI))) return replaceInstUsesWith(CI, V); - if (isFreeCall(&CI, &TLI)) - return visitFree(CI); + if (Value *FreedOp = getFreedOperand(&CI, &TLI)) + return visitFree(CI, FreedOp); // If the caller function (i.e. us, the function that contains this CallInst) // is nounwind, mark the call as nounwind, even if the callee isn't. @@ -1539,8 +1539,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { Type *Ty = II->getType(); unsigned BitWidth = Ty->getScalarSizeInBits(); Constant *ShAmtC; - if (match(II->getArgOperand(2), m_ImmConstant(ShAmtC)) && - !ShAmtC->containsConstantExpression()) { + if (match(II->getArgOperand(2), m_ImmConstant(ShAmtC))) { // Canonicalize a shift amount constant operand to modulo the bit-width. Constant *WidthC = ConstantInt::get(Ty, BitWidth); Constant *ModuloC = @@ -2885,21 +2884,21 @@ bool InstCombinerImpl::annotateAnyAllocSite(CallBase &Call, // of the respective allocator declaration with generic attributes. bool Changed = false; - if (isAllocationFn(&Call, TLI)) { - uint64_t Size; - ObjectSizeOpts Opts; - if (getObjectSize(&Call, Size, DL, TLI, Opts) && Size > 0) { - // TODO: We really should just emit deref_or_null here and then - // let the generic inference code combine that with nonnull. - if (Call.hasRetAttr(Attribute::NonNull)) { - Changed = !Call.hasRetAttr(Attribute::Dereferenceable); - Call.addRetAttr( - Attribute::getWithDereferenceableBytes(Call.getContext(), Size)); - } else { - Changed = !Call.hasRetAttr(Attribute::DereferenceableOrNull); - Call.addRetAttr(Attribute::getWithDereferenceableOrNullBytes( - Call.getContext(), Size)); - } + if (!Call.getType()->isPointerTy()) + return Changed; + + Optional<APInt> Size = getAllocSize(&Call, TLI); + if (Size && *Size != 0) { + // TODO: We really should just emit deref_or_null here and then + // let the generic inference code combine that with nonnull. + if (Call.hasRetAttr(Attribute::NonNull)) { + Changed = !Call.hasRetAttr(Attribute::Dereferenceable); + Call.addRetAttr(Attribute::getWithDereferenceableBytes( + Call.getContext(), Size->getLimitedValue())); + } else { + Changed = !Call.hasRetAttr(Attribute::DereferenceableOrNull); + Call.addRetAttr(Attribute::getWithDereferenceableOrNullBytes( + Call.getContext(), Size->getLimitedValue())); } } @@ -3079,8 +3078,7 @@ Instruction *InstCombinerImpl::visitCallBase(CallBase &Call) { Call, Builder.CreateBitOrPointerCast(ReturnedArg, CallTy)); } - if (isAllocationFn(&Call, &TLI) && - isAllocRemovable(&cast<CallBase>(Call), &TLI)) + if (isRemovableAlloc(&Call, &TLI)) return visitAllocSite(Call); // Handle intrinsics which can be used in both call and invoke context. @@ -3242,15 +3240,16 @@ bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) { // the call because there is no place to put the cast instruction (without // breaking the critical edge). Bail out in this case. if (!Caller->use_empty()) { - if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) - for (User *U : II->users()) + BasicBlock *PhisNotSupportedBlock = nullptr; + if (auto *II = dyn_cast<InvokeInst>(Caller)) + PhisNotSupportedBlock = II->getNormalDest(); + if (auto *CB = dyn_cast<CallBrInst>(Caller)) + PhisNotSupportedBlock = CB->getDefaultDest(); + if (PhisNotSupportedBlock) + for (User *U : Caller->users()) if (PHINode *PN = dyn_cast<PHINode>(U)) - if (PN->getParent() == II->getNormalDest() || - PN->getParent() == II->getUnwindDest()) + if (PN->getParent() == PhisNotSupportedBlock) return false; - // FIXME: Be conservative for callbr to avoid a quadratic search. - if (isa<CallBrInst>(Caller)) - return false; } } diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 9f6d36b85522..158d2e8289e0 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -2002,9 +2002,12 @@ Instruction *InstCombinerImpl::foldICmpMulConstant(ICmpInst &Cmp, Constant::getNullValue(Mul->getType())); } + if (MulC->isZero() || !(Mul->hasNoSignedWrap() || Mul->hasNoUnsignedWrap())) + return nullptr; + // If the multiply does not wrap, try to divide the compare constant by the // multiplication factor. - if (Cmp.isEquality() && !MulC->isZero()) { + if (Cmp.isEquality()) { // (mul nsw X, MulC) == C --> X == C /s MulC if (Mul->hasNoSignedWrap() && C.srem(*MulC).isZero()) { Constant *NewC = ConstantInt::get(Mul->getType(), C.sdiv(*MulC)); @@ -2017,7 +2020,40 @@ Instruction *InstCombinerImpl::foldICmpMulConstant(ICmpInst &Cmp, } } - return nullptr; + Constant *NewC = nullptr; + + // FIXME: Add assert that Pred is not equal to ICMP_SGE, ICMP_SLE, + // ICMP_UGE, ICMP_ULE. + + if (Mul->hasNoSignedWrap()) { + if (MulC->isNegative()) { + // MININT / -1 --> overflow. + if (C.isMinSignedValue() && MulC->isAllOnes()) + return nullptr; + Pred = ICmpInst::getSwappedPredicate(Pred); + } + if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SGE) + NewC = ConstantInt::get( + Mul->getType(), + APIntOps::RoundingSDiv(C, *MulC, APInt::Rounding::UP)); + if (Pred == ICmpInst::ICMP_SLE || Pred == ICmpInst::ICMP_SGT) + NewC = ConstantInt::get( + Mul->getType(), + APIntOps::RoundingSDiv(C, *MulC, APInt::Rounding::DOWN)); + } + + if (Mul->hasNoUnsignedWrap()) { + if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_UGE) + NewC = ConstantInt::get( + Mul->getType(), + APIntOps::RoundingUDiv(C, *MulC, APInt::Rounding::UP)); + if (Pred == ICmpInst::ICMP_ULE || Pred == ICmpInst::ICMP_UGT) + NewC = ConstantInt::get( + Mul->getType(), + APIntOps::RoundingUDiv(C, *MulC, APInt::Rounding::DOWN)); + } + + return NewC ? new ICmpInst(Pred, Mul->getOperand(0), NewC) : nullptr; } /// Fold icmp (shl 1, Y), C. @@ -2235,13 +2271,22 @@ Instruction *InstCombinerImpl::foldICmpShrConstant(ICmpInst &Cmp, bool IsAShr = Shr->getOpcode() == Instruction::AShr; const APInt *ShiftValC; - if (match(Shr->getOperand(0), m_APInt(ShiftValC))) { + if (match(X, m_APInt(ShiftValC))) { if (Cmp.isEquality()) return foldICmpShrConstConst(Cmp, Shr->getOperand(1), C, *ShiftValC); + // (ShiftValC >> Y) >s -1 --> Y != 0 with ShiftValC < 0 + // (ShiftValC >> Y) <s 0 --> Y == 0 with ShiftValC < 0 + bool TrueIfSigned; + if (!IsAShr && ShiftValC->isNegative() && + isSignBitCheck(Pred, C, TrueIfSigned)) + return new ICmpInst(TrueIfSigned ? CmpInst::ICMP_EQ : CmpInst::ICMP_NE, + Shr->getOperand(1), + ConstantInt::getNullValue(X->getType())); + // If the shifted constant is a power-of-2, test the shift amount directly: - // (ShiftValC >> X) >u C --> X <u (LZ(C) - LZ(ShiftValC)) - // (ShiftValC >> X) <u C --> X >=u (LZ(C-1) - LZ(ShiftValC)) + // (ShiftValC >> Y) >u C --> X <u (LZ(C) - LZ(ShiftValC)) + // (ShiftValC >> Y) <u C --> X >=u (LZ(C-1) - LZ(ShiftValC)) if (!IsAShr && ShiftValC->isPowerOf2() && (Pred == CmpInst::ICMP_UGT || Pred == CmpInst::ICMP_ULT)) { bool IsUGT = Pred == CmpInst::ICMP_UGT; @@ -2972,7 +3017,7 @@ Instruction *InstCombinerImpl::foldICmpBitCast(ICmpInst &Cmp) { const APInt *C; bool TrueIfSigned; if (match(Op1, m_APInt(C)) && Bitcast->hasOneUse() && - InstCombiner::isSignBitCheck(Pred, *C, TrueIfSigned)) { + isSignBitCheck(Pred, *C, TrueIfSigned)) { if (match(BCSrcOp, m_FPExt(m_Value(X))) || match(BCSrcOp, m_FPTrunc(m_Value(X)))) { // (bitcast (fpext/fptrunc X)) to iX) < 0 --> (bitcast X to iY) < 0 diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index 271154bb3f5a..827b25533513 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -152,7 +152,7 @@ public: Instruction *visitGEPOfBitcast(BitCastInst *BCI, GetElementPtrInst &GEP); Instruction *visitAllocaInst(AllocaInst &AI); Instruction *visitAllocSite(Instruction &FI); - Instruction *visitFree(CallInst &FI); + Instruction *visitFree(CallInst &FI, Value *FreedOp); Instruction *visitLoadInst(LoadInst &LI); Instruction *visitStoreInst(StoreInst &SI); Instruction *visitAtomicRMWInst(AtomicRMWInst &SI); diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp index f4e2d1239f0f..13c98b935adf 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -566,6 +566,13 @@ static bool canEvaluateShifted(Value *V, unsigned NumBits, bool IsLeftShift, return false; return true; } + case Instruction::Mul: { + const APInt *MulConst; + // We can fold (shr (mul X, -(1 << C)), C) -> (and (neg X), C`) + return !IsLeftShift && match(I->getOperand(1), m_APInt(MulConst)) && + MulConst->isNegatedPowerOf2() && + MulConst->countTrailingZeros() == NumBits; + } } } @@ -680,6 +687,17 @@ static Value *getShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, isLeftShift, IC, DL)); return PN; } + case Instruction::Mul: { + assert(!isLeftShift && "Unexpected shift direction!"); + auto *Neg = BinaryOperator::CreateNeg(I->getOperand(0)); + IC.InsertNewInstWith(Neg, *I); + unsigned TypeWidth = I->getType()->getScalarSizeInBits(); + APInt Mask = APInt::getLowBitsSet(TypeWidth, TypeWidth - NumBits); + auto *And = BinaryOperator::CreateAnd(Neg, + ConstantInt::get(I->getType(), Mask)); + And->takeName(I); + return IC.InsertNewInstWith(And, *I); + } } } diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 75520a0c8d5f..71c763de43b4 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -994,6 +994,24 @@ Instruction *InstCombinerImpl::foldBinopOfSextBoolToSelect(BinaryOperator &BO) { return SelectInst::Create(X, TVal, FVal); } +static Constant *constantFoldOperationIntoSelectOperand( + Instruction &I, SelectInst *SI, Value *SO) { + auto *ConstSO = dyn_cast<Constant>(SO); + if (!ConstSO) + return nullptr; + + SmallVector<Constant *> ConstOps; + for (Value *Op : I.operands()) { + if (Op == SI) + ConstOps.push_back(ConstSO); + else if (auto *C = dyn_cast<Constant>(Op)) + ConstOps.push_back(C); + else + llvm_unreachable("Operands should be select or constant"); + } + return ConstantFoldInstOperands(&I, ConstOps, I.getModule()->getDataLayout()); +} + static Value *foldOperationIntoSelectOperand(Instruction &I, Value *SO, InstCombiner::BuilderTy &Builder) { if (auto *Cast = dyn_cast<CastInst>(&I)) @@ -1101,8 +1119,17 @@ Instruction *InstCombinerImpl::FoldOpIntoSelect(Instruction &Op, SelectInst *SI, } } - Value *NewTV = foldOperationIntoSelectOperand(Op, TV, Builder); - Value *NewFV = foldOperationIntoSelectOperand(Op, FV, Builder); + // Make sure that one of the select arms constant folds successfully. + Value *NewTV = constantFoldOperationIntoSelectOperand(Op, SI, TV); + Value *NewFV = constantFoldOperationIntoSelectOperand(Op, SI, FV); + if (!NewTV && !NewFV) + return nullptr; + + // Create an instruction for the arm that did not fold. + if (!NewTV) + NewTV = foldOperationIntoSelectOperand(Op, TV, Builder); + if (!NewFV) + NewFV = foldOperationIntoSelectOperand(Op, FV, Builder); return SelectInst::Create(SI->getCondition(), NewTV, NewFV, "", nullptr, SI); } @@ -2774,13 +2801,14 @@ static bool isAllocSiteRemovable(Instruction *AI, continue; } - if (isFreeCall(I, &TLI) && getAllocationFamily(I, &TLI) == Family) { + if (getFreedOperand(cast<CallBase>(I), &TLI) == PI && + getAllocationFamily(I, &TLI) == Family) { assert(Family); Users.emplace_back(I); continue; } - if (isReallocLikeFn(I, &TLI) && + if (getReallocatedOperand(cast<CallBase>(I), &TLI) == PI && getAllocationFamily(I, &TLI) == Family) { assert(Family); Users.emplace_back(I); @@ -2805,7 +2833,7 @@ static bool isAllocSiteRemovable(Instruction *AI, } Instruction *InstCombinerImpl::visitAllocSite(Instruction &MI) { - assert(isa<AllocaInst>(MI) || isAllocRemovable(&cast<CallBase>(MI), &TLI)); + assert(isa<AllocaInst>(MI) || isRemovableAlloc(&cast<CallBase>(MI), &TLI)); // If we have a malloc call which is only used in any amount of comparisons to // null and free calls, delete the calls and replace the comparisons with true @@ -3007,9 +3035,7 @@ static Instruction *tryToMoveFreeBeforeNullTest(CallInst &FI, return &FI; } -Instruction *InstCombinerImpl::visitFree(CallInst &FI) { - Value *Op = FI.getArgOperand(0); - +Instruction *InstCombinerImpl::visitFree(CallInst &FI, Value *Op) { // free undef -> unreachable. if (isa<UndefValue>(Op)) { // Leave a marker since we can't modify the CFG here. @@ -3024,12 +3050,10 @@ Instruction *InstCombinerImpl::visitFree(CallInst &FI) { // If we had free(realloc(...)) with no intervening uses, then eliminate the // realloc() entirely. - if (CallInst *CI = dyn_cast<CallInst>(Op)) { - if (CI->hasOneUse() && isReallocLikeFn(CI, &TLI)) { - return eraseInstFromFunction( - *replaceInstUsesWith(*CI, CI->getOperand(0))); - } - } + CallInst *CI = dyn_cast<CallInst>(Op); + if (CI && CI->hasOneUse()) + if (Value *ReallocatedOp = getReallocatedOperand(CI, &TLI)) + return eraseInstFromFunction(*replaceInstUsesWith(*CI, ReallocatedOp)); // If we optimize for code size, try to move the call to free before the null // test so that simplify cfg can remove the empty block and dead code diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 4fed4bd18fb1..cf2754b1dd60 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -108,6 +108,7 @@ static const uint64_t kAArch64_ShadowOffset64 = 1ULL << 36; static const uint64_t kRISCV64_ShadowOffset64 = 0xd55550000; static const uint64_t kFreeBSD_ShadowOffset32 = 1ULL << 30; static const uint64_t kFreeBSD_ShadowOffset64 = 1ULL << 46; +static const uint64_t kFreeBSDAArch64_ShadowOffset64 = 1ULL << 47; static const uint64_t kFreeBSDKasan_ShadowOffset64 = 0xdffff7c000000000; static const uint64_t kNetBSD_ShadowOffset32 = 1ULL << 30; static const uint64_t kNetBSD_ShadowOffset64 = 1ULL << 46; @@ -523,6 +524,8 @@ static ShadowMapping getShadowMapping(const Triple &TargetTriple, int LongSize, Mapping.Offset = kPPC64_ShadowOffset64; else if (IsSystemZ) Mapping.Offset = kSystemZ_ShadowOffset64; + else if (IsFreeBSD && IsAArch64) + Mapping.Offset = kFreeBSDAArch64_ShadowOffset64; else if (IsFreeBSD && !IsMIPS64) { if (IsKasan) Mapping.Offset = kFreeBSDKasan_ShadowOffset64; diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/CGProfile.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/CGProfile.cpp index 57c491436b93..27107f46ed92 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/CGProfile.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/CGProfile.cpp @@ -101,42 +101,6 @@ static bool runCGProfilePass( return addModuleFlags(M, Counts); } -namespace { -struct CGProfileLegacyPass final : public ModulePass { - static char ID; - CGProfileLegacyPass() : ModulePass(ID) { - initializeCGProfileLegacyPassPass(*PassRegistry::getPassRegistry()); - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesCFG(); - AU.addRequired<LazyBlockFrequencyInfoPass>(); - AU.addRequired<TargetTransformInfoWrapperPass>(); - } - - bool runOnModule(Module &M) override { - auto GetBFI = [this](Function &F) -> BlockFrequencyInfo & { - return this->getAnalysis<LazyBlockFrequencyInfoPass>(F).getBFI(); - }; - auto GetTTI = [this](Function &F) -> TargetTransformInfo & { - return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); - }; - - return runCGProfilePass(M, GetBFI, GetTTI, true); - } -}; - -} // namespace - -char CGProfileLegacyPass::ID = 0; - -INITIALIZE_PASS(CGProfileLegacyPass, "cg-profile", "Call Graph Profile", false, - false) - -ModulePass *llvm::createCGProfileLegacyPass() { - return new CGProfileLegacyPass(); -} - PreservedAnalyses CGProfilePass::run(Module &M, ModuleAnalysisManager &MAM) { FunctionAnalysisManager &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp index e5c0705b916e..adc007dacae4 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp @@ -103,47 +103,6 @@ static void parseCHRFilterFiles() { } namespace { -class ControlHeightReductionLegacyPass : public FunctionPass { -public: - static char ID; - - ControlHeightReductionLegacyPass() : FunctionPass(ID) { - initializeControlHeightReductionLegacyPassPass( - *PassRegistry::getPassRegistry()); - parseCHRFilterFiles(); - } - - bool runOnFunction(Function &F) override; - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<BlockFrequencyInfoWrapperPass>(); - AU.addRequired<DominatorTreeWrapperPass>(); - AU.addRequired<ProfileSummaryInfoWrapperPass>(); - AU.addRequired<RegionInfoPass>(); - AU.addPreserved<GlobalsAAWrapperPass>(); - } -}; -} // end anonymous namespace - -char ControlHeightReductionLegacyPass::ID = 0; - -INITIALIZE_PASS_BEGIN(ControlHeightReductionLegacyPass, - "chr", - "Reduce control height in the hot paths", - false, false) -INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(RegionInfoPass) -INITIALIZE_PASS_END(ControlHeightReductionLegacyPass, - "chr", - "Reduce control height in the hot paths", - false, false) - -FunctionPass *llvm::createControlHeightReductionLegacyPass() { - return new ControlHeightReductionLegacyPass(); -} - -namespace { struct CHRStats { CHRStats() = default; @@ -2083,18 +2042,6 @@ bool CHR::run() { return Changed; } -bool ControlHeightReductionLegacyPass::runOnFunction(Function &F) { - BlockFrequencyInfo &BFI = - getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI(); - DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); - ProfileSummaryInfo &PSI = - getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); - RegionInfo &RI = getAnalysis<RegionInfoPass>().getRegionInfo(); - std::unique_ptr<OptimizationRemarkEmitter> OwnedORE = - std::make_unique<OptimizationRemarkEmitter>(&F); - return CHR(F, BFI, DT, PSI, RI, *OwnedORE).run(); -} - namespace llvm { ControlHeightReductionPass::ControlHeightReductionPass() { diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/InstrOrderFile.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/InstrOrderFile.cpp index 2091881c29fe..d7561c193aa3 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/InstrOrderFile.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/InstrOrderFile.cpp @@ -163,42 +163,11 @@ public: } }; // End of InstrOrderFile struct - -class InstrOrderFileLegacyPass : public ModulePass { -public: - static char ID; - - InstrOrderFileLegacyPass() : ModulePass(ID) { - initializeInstrOrderFileLegacyPassPass( - *PassRegistry::getPassRegistry()); - } - - bool runOnModule(Module &M) override; -}; - } // End anonymous namespace -bool InstrOrderFileLegacyPass::runOnModule(Module &M) { - if (skipModule(M)) - return false; - - return InstrOrderFile().run(M); -} - PreservedAnalyses InstrOrderFilePass::run(Module &M, ModuleAnalysisManager &AM) { if (InstrOrderFile().run(M)) return PreservedAnalyses::none(); return PreservedAnalyses::all(); } - -INITIALIZE_PASS_BEGIN(InstrOrderFileLegacyPass, "instrorderfile", - "Instrumentation for Order File", false, false) -INITIALIZE_PASS_END(InstrOrderFileLegacyPass, "instrorderfile", - "Instrumentation for Order File", false, false) - -char InstrOrderFileLegacyPass::ID = 0; - -ModulePass *llvm::createInstrOrderFilePass() { - return new InstrOrderFileLegacyPass(); -} diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp index 3572cb3b50e2..5b7aa304b987 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -147,35 +147,6 @@ cl::opt<bool> SkipRetExitBlock( "skip-ret-exit-block", cl::init(true), cl::desc("Suppress counter promotion if exit blocks contain ret.")); -class InstrProfilingLegacyPass : public ModulePass { - InstrProfiling InstrProf; - -public: - static char ID; - - InstrProfilingLegacyPass() : ModulePass(ID) {} - InstrProfilingLegacyPass(const InstrProfOptions &Options, bool IsCS = false) - : ModulePass(ID), InstrProf(Options, IsCS) { - initializeInstrProfilingLegacyPassPass(*PassRegistry::getPassRegistry()); - } - - StringRef getPassName() const override { - return "Frontend instrumentation-based coverage lowering"; - } - - bool runOnModule(Module &M) override { - auto GetTLI = [this](Function &F) -> TargetLibraryInfo & { - return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); - }; - return InstrProf.run(M, GetTLI); - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesCFG(); - AU.addRequired<TargetLibraryInfoWrapperPass>(); - } -}; - /// /// A helper class to promote one counter RMW operation in the loop /// into register update. @@ -439,21 +410,6 @@ PreservedAnalyses InstrProfiling::run(Module &M, ModuleAnalysisManager &AM) { return PreservedAnalyses::none(); } -char InstrProfilingLegacyPass::ID = 0; -INITIALIZE_PASS_BEGIN(InstrProfilingLegacyPass, "instrprof", - "Frontend instrumentation-based coverage lowering.", - false, false) -INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) -INITIALIZE_PASS_END(InstrProfilingLegacyPass, "instrprof", - "Frontend instrumentation-based coverage lowering.", false, - false) - -ModulePass * -llvm::createInstrProfilingLegacyPass(const InstrProfOptions &Options, - bool IsCS) { - return new InstrProfilingLegacyPass(Options, IsCS); -} - bool InstrProfiling::lowerIntrinsics(Function *F) { bool MadeChange = false; PromotionCandidates.clear(); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp index 9ff0e632bd7f..bd575b6cf3b0 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp @@ -94,11 +94,6 @@ void llvm::initializeInstrumentation(PassRegistry &Registry) { initializeMemProfilerLegacyPassPass(Registry); initializeModuleMemProfilerLegacyPassPass(Registry); initializeBoundsCheckingLegacyPassPass(Registry); - initializeControlHeightReductionLegacyPassPass(Registry); - initializeCGProfileLegacyPassPass(Registry); - initializeInstrOrderFileLegacyPassPass(Registry); - initializeInstrProfilingLegacyPassPass(Registry); - initializeModuleSanitizerCoverageLegacyPassPass(Registry); initializeDataFlowSanitizerLegacyPassPass(Registry); } diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 4d72f6c3d1a9..4606bd5de6c3 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -417,6 +417,14 @@ static const MemoryMapParams Linux_AArch64_MemoryMapParams = { 0x01000000000, // OriginBase }; +// aarch64 FreeBSD +static const MemoryMapParams FreeBSD_AArch64_MemoryMapParams = { + 0x1800000000000, // AndMask + 0x0400000000000, // XorMask + 0x0200000000000, // ShadowBase + 0x0700000000000, // OriginBase +}; + // i386 FreeBSD static const MemoryMapParams FreeBSD_I386_MemoryMapParams = { 0x000180000000, // AndMask @@ -466,6 +474,11 @@ static const PlatformMemoryMapParams Linux_ARM_MemoryMapParams = { &Linux_AArch64_MemoryMapParams, }; +static const PlatformMemoryMapParams FreeBSD_ARM_MemoryMapParams = { + nullptr, + &FreeBSD_AArch64_MemoryMapParams, +}; + static const PlatformMemoryMapParams FreeBSD_X86_MemoryMapParams = { &FreeBSD_I386_MemoryMapParams, &FreeBSD_X86_64_MemoryMapParams, @@ -894,6 +907,9 @@ void MemorySanitizer::initializeModule(Module &M) { switch (TargetTriple.getOS()) { case Triple::FreeBSD: switch (TargetTriple.getArch()) { + case Triple::aarch64: + MapParams = FreeBSD_ARM_MemoryMapParams.bits64; + break; case Triple::x86_64: MapParams = FreeBSD_X86_MemoryMapParams.bits64; break; diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index 3a29cd70e42e..c4512d0222cd 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -210,12 +210,11 @@ cl::opt<bool> // Command line option to enable/disable the warning about a hash mismatch in // the profile data for Comdat functions, which often turns out to be false // positive due to the pre-instrumentation inline. -static cl::opt<bool> - NoPGOWarnMismatchComdat("no-pgo-warn-mismatch-comdat", cl::init(true), - cl::Hidden, - cl::desc("The option is used to turn on/off " - "warnings about hash mismatch for comdat " - "functions.")); +static cl::opt<bool> NoPGOWarnMismatchComdatWeak( + "no-pgo-warn-mismatch-comdat-weak", cl::init(true), cl::Hidden, + cl::desc("The option is used to turn on/off " + "warnings about hash mismatch for comdat " + "or weak functions.")); // Command line option to enable/disable select instruction instrumentation. static cl::opt<bool> @@ -287,6 +286,11 @@ static cl::opt<unsigned> PGOVerifyBFICutoff( cl::desc("Set the threshold for pgo-verify-bfi: skip the counts whose " "profile count value is below.")); +static cl::opt<std::string> PGOTraceFuncHash( + "pgo-trace-func-hash", cl::init("-"), cl::Hidden, + cl::value_desc("function name"), + cl::desc("Trace the hash of the function with this name.")); + namespace llvm { // Command line option to turn on CFG dot dump after profile annotation. // Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts @@ -630,6 +634,10 @@ void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() { << ", High32 CRC = " << JCH.getCRC()); } LLVM_DEBUG(dbgs() << ", Hash = " << FunctionHash << "\n";); + + if (PGOTraceFuncHash != "-" && F.getName().contains(PGOTraceFuncHash)) + dbgs() << "Funcname=" << F.getName() << ", Hash=" << FunctionHash + << " in building " << F.getParent()->getSourceFileName() << "\n"; } // Check if we can safely rename this Comdat function. @@ -832,8 +840,6 @@ static void instrumentOneFunc( auto CFGHash = ConstantInt::get(Type::getInt64Ty(M->getContext()), FuncInfo.FunctionHash); if (PGOFunctionEntryCoverage) { - assert(!IsCS && - "entry coverge does not support context-sensitive instrumentation"); auto &EntryBB = F.getEntryBlock(); IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt()); // llvm.instrprof.cover(i8* <name>, i64 <hash>, i32 <num-counters>, @@ -1216,8 +1222,9 @@ static void annotateFunctionWithHashMismatch(Function &F, bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros, bool &AllMinusOnes) { auto &Ctx = M->getContext(); - Expected<InstrProfRecord> Result = - PGOReader->getInstrProfRecord(FuncInfo.FuncName, FuncInfo.FunctionHash); + uint64_t MismatchedFuncSum = 0; + Expected<InstrProfRecord> Result = PGOReader->getInstrProfRecord( + FuncInfo.FuncName, FuncInfo.FunctionHash, &MismatchedFuncSum); if (Error E = Result.takeError()) { handleAllErrors(std::move(E), [&](const InstrProfError &IPE) { auto Err = IPE.get(); @@ -1233,10 +1240,11 @@ bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros, IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++; SkipWarning = NoPGOWarnMismatch || - (NoPGOWarnMismatchComdat && - (F.hasComdat() || + (NoPGOWarnMismatchComdatWeak && + (F.hasComdat() || F.getLinkage() == GlobalValue::WeakAnyLinkage || F.getLinkage() == GlobalValue::AvailableExternallyLinkage)); - LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")"); + LLVM_DEBUG(dbgs() << "hash mismatch (hash= " << FuncInfo.FunctionHash + << " skip=" << SkipWarning << ")"); // Emit function metadata indicating PGO profile mismatch. annotateFunctionWithHashMismatch(F, M->getContext()); } @@ -1245,9 +1253,11 @@ bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros, if (SkipWarning) return; - std::string Msg = IPE.message() + std::string(" ") + F.getName().str() + - std::string(" Hash = ") + - std::to_string(FuncInfo.FunctionHash); + std::string Msg = + IPE.message() + std::string(" ") + F.getName().str() + + std::string(" Hash = ") + std::to_string(FuncInfo.FunctionHash) + + std::string(" up to ") + std::to_string(MismatchedFuncSum) + + std::string(" count discarded"); Ctx.diagnose( DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning)); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp index d9d11cc90d3d..3ca476e74953 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp @@ -278,53 +278,6 @@ private: const SpecialCaseList *Allowlist; const SpecialCaseList *Blocklist; }; - -class ModuleSanitizerCoverageLegacyPass : public ModulePass { -public: - ModuleSanitizerCoverageLegacyPass( - const SanitizerCoverageOptions &Options = SanitizerCoverageOptions(), - const std::vector<std::string> &AllowlistFiles = - std::vector<std::string>(), - const std::vector<std::string> &BlocklistFiles = - std::vector<std::string>()) - : ModulePass(ID), Options(Options) { - if (AllowlistFiles.size() > 0) - Allowlist = SpecialCaseList::createOrDie(AllowlistFiles, - *vfs::getRealFileSystem()); - if (BlocklistFiles.size() > 0) - Blocklist = SpecialCaseList::createOrDie(BlocklistFiles, - *vfs::getRealFileSystem()); - initializeModuleSanitizerCoverageLegacyPassPass( - *PassRegistry::getPassRegistry()); - } - bool runOnModule(Module &M) override { - ModuleSanitizerCoverage ModuleSancov(Options, Allowlist.get(), - Blocklist.get()); - auto DTCallback = [this](Function &F) -> const DominatorTree * { - return &this->getAnalysis<DominatorTreeWrapperPass>(F).getDomTree(); - }; - auto PDTCallback = [this](Function &F) -> const PostDominatorTree * { - return &this->getAnalysis<PostDominatorTreeWrapperPass>(F) - .getPostDomTree(); - }; - return ModuleSancov.instrumentModule(M, DTCallback, PDTCallback); - } - - static char ID; // Pass identification, replacement for typeid - StringRef getPassName() const override { return "ModuleSanitizerCoverage"; } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<DominatorTreeWrapperPass>(); - AU.addRequired<PostDominatorTreeWrapperPass>(); - } - -private: - SanitizerCoverageOptions Options; - - std::unique_ptr<SpecialCaseList> Allowlist; - std::unique_ptr<SpecialCaseList> Blocklist; -}; - } // namespace PreservedAnalyses ModuleSanitizerCoveragePass::run(Module &M, @@ -1075,20 +1028,3 @@ ModuleSanitizerCoverage::getSectionEnd(const std::string &Section) const { return "\1section$end$__DATA$__" + Section; return "__stop___" + Section; } - -char ModuleSanitizerCoverageLegacyPass::ID = 0; -INITIALIZE_PASS_BEGIN(ModuleSanitizerCoverageLegacyPass, "sancov", - "Pass for instrumenting coverage on functions", false, - false) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass) -INITIALIZE_PASS_END(ModuleSanitizerCoverageLegacyPass, "sancov", - "Pass for instrumenting coverage on functions", false, - false) -ModulePass *llvm::createModuleSanitizerCoverageLegacyPassPass( - const SanitizerCoverageOptions &Options, - const std::vector<std::string> &AllowlistFiles, - const std::vector<std::string> &BlocklistFiles) { - return new ModuleSanitizerCoverageLegacyPass(Options, AllowlistFiles, - BlocklistFiles); -} diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index 4c42869dbd58..3f0dad7ee769 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -776,6 +776,11 @@ struct DSEState { // fall back to CFG scan starting from all non-unreachable roots. bool AnyUnreachableExit; + // Whether or not we should iterate on removing dead stores at the end of the + // function due to removing a store causing a previously captured pointer to + // no longer be captured. + bool ShouldIterateEndOfFunctionDSE; + // Class contains self-reference, make sure it's not copied/moved. DSEState(const DSEState &) = delete; DSEState &operator=(const DSEState &) = delete; @@ -1103,9 +1108,8 @@ struct DSEState { return {std::make_pair(MemoryLocation(Ptr, Len), false)}; if (auto *CB = dyn_cast<CallBase>(I)) { - if (isFreeCall(I, &TLI)) - return {std::make_pair(MemoryLocation::getAfter(CB->getArgOperand(0)), - true)}; + if (Value *FreedOp = getFreedOperand(CB, &TLI)) + return {std::make_pair(MemoryLocation::getAfter(FreedOp), true)}; } return None; @@ -1114,9 +1118,9 @@ struct DSEState { /// Returns true if \p I is a memory terminator instruction like /// llvm.lifetime.end or free. bool isMemTerminatorInst(Instruction *I) const { - IntrinsicInst *II = dyn_cast<IntrinsicInst>(I); - return (II && II->getIntrinsicID() == Intrinsic::lifetime_end) || - isFreeCall(I, &TLI); + auto *CB = dyn_cast<CallBase>(I); + return CB && (CB->getIntrinsicID() == Intrinsic::lifetime_end || + getFreedOperand(CB, &TLI) != nullptr); } /// Returns true if \p MaybeTerm is a memory terminator for \p Loc from @@ -1598,6 +1602,14 @@ struct DSEState { if (MemoryAccess *MA = MSSA.getMemoryAccess(DeadInst)) { if (MemoryDef *MD = dyn_cast<MemoryDef>(MA)) { SkipStores.insert(MD); + if (auto *SI = dyn_cast<StoreInst>(MD->getMemoryInst())) { + if (SI->getValueOperand()->getType()->isPointerTy()) { + const Value *UO = getUnderlyingObject(SI->getValueOperand()); + if (CapturedBeforeReturn.erase(UO)) + ShouldIterateEndOfFunctionDSE = true; + InvisibleToCallerAfterRet.erase(UO); + } + } } Updater.removeMemoryAccess(MA); @@ -1671,33 +1683,36 @@ struct DSEState { LLVM_DEBUG( dbgs() << "Trying to eliminate MemoryDefs at the end of the function\n"); - for (MemoryDef *Def : llvm::reverse(MemDefs)) { - if (SkipStores.contains(Def)) - continue; + do { + ShouldIterateEndOfFunctionDSE = false; + for (MemoryDef *Def : llvm::reverse(MemDefs)) { + if (SkipStores.contains(Def)) + continue; - Instruction *DefI = Def->getMemoryInst(); - auto DefLoc = getLocForWrite(DefI); - if (!DefLoc || !isRemovable(DefI)) - continue; + Instruction *DefI = Def->getMemoryInst(); + auto DefLoc = getLocForWrite(DefI); + if (!DefLoc || !isRemovable(DefI)) + continue; - // NOTE: Currently eliminating writes at the end of a function is limited - // to MemoryDefs with a single underlying object, to save compile-time. In - // practice it appears the case with multiple underlying objects is very - // uncommon. If it turns out to be important, we can use - // getUnderlyingObjects here instead. - const Value *UO = getUnderlyingObject(DefLoc->Ptr); - if (!isInvisibleToCallerAfterRet(UO)) - continue; + // NOTE: Currently eliminating writes at the end of a function is + // limited to MemoryDefs with a single underlying object, to save + // compile-time. In practice it appears the case with multiple + // underlying objects is very uncommon. If it turns out to be important, + // we can use getUnderlyingObjects here instead. + const Value *UO = getUnderlyingObject(DefLoc->Ptr); + if (!isInvisibleToCallerAfterRet(UO)) + continue; - if (isWriteAtEndOfFunction(Def)) { - // See through pointer-to-pointer bitcasts - LLVM_DEBUG(dbgs() << " ... MemoryDef is not accessed until the end " - "of the function\n"); - deleteDeadInstruction(DefI); - ++NumFastStores; - MadeChange = true; + if (isWriteAtEndOfFunction(Def)) { + // See through pointer-to-pointer bitcasts + LLVM_DEBUG(dbgs() << " ... MemoryDef is not accessed until the end " + "of the function\n"); + deleteDeadInstruction(DefI); + ++NumFastStores; + MadeChange = true; + } } - } + } while (ShouldIterateEndOfFunctionDSE); return MadeChange; } diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp index a9ca0bdc8f7b..9698ed97379e 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -1738,7 +1738,7 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) { // through *explicit* control flow. We have to eliminate the possibility of // implicit exits (see below) before we know it's truly exact. const SCEV *ExactBTC = SE->getBackedgeTakenCount(L); - if (isa<SCEVCouldNotCompute>(ExactBTC) || !isSafeToExpand(ExactBTC, *SE)) + if (isa<SCEVCouldNotCompute>(ExactBTC) || !Rewriter.isSafeToExpand(ExactBTC)) return false; assert(SE->isLoopInvariant(ExactBTC, L) && "BTC must be loop invariant"); @@ -1769,7 +1769,8 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) { return true; const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); - if (isa<SCEVCouldNotCompute>(ExitCount) || !isSafeToExpand(ExitCount, *SE)) + if (isa<SCEVCouldNotCompute>(ExitCount) || + !Rewriter.isSafeToExpand(ExitCount)) return true; assert(SE->isLoopInvariant(ExitCount, L) && diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp index b54cf5e7cb20..328615011ceb 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp @@ -1451,7 +1451,7 @@ bool LoopConstrainer::run() { return false; } - if (!isSafeToExpandAt(ExitPreLoopAtSCEV, InsertPt, SE)) { + if (!Expander.isSafeToExpandAt(ExitPreLoopAtSCEV, InsertPt)) { LLVM_DEBUG(dbgs() << "irce: could not prove that it is safe to expand the" << " preloop exit limit " << *ExitPreLoopAtSCEV << " at block " << InsertPt->getParent()->getName() @@ -1478,7 +1478,7 @@ bool LoopConstrainer::run() { return false; } - if (!isSafeToExpandAt(ExitMainLoopAtSCEV, InsertPt, SE)) { + if (!Expander.isSafeToExpandAt(ExitMainLoopAtSCEV, InsertPt)) { LLVM_DEBUG(dbgs() << "irce: could not prove that it is safe to expand the" << " main loop exit limit " << *ExitMainLoopAtSCEV << " at block " << InsertPt->getParent()->getName() diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp index 9590fbbb1994..fd2eaee8b47d 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp @@ -388,15 +388,15 @@ bool LoopDataPrefetch::runOnLoop(Loop *L) { if (!isStrideLargeEnough(P.LSCEVAddRec, TargetMinStride)) continue; + BasicBlock *BB = P.InsertPt->getParent(); + SCEVExpander SCEVE(*SE, BB->getModule()->getDataLayout(), "prefaddr"); const SCEV *NextLSCEV = SE->getAddExpr(P.LSCEVAddRec, SE->getMulExpr( SE->getConstant(P.LSCEVAddRec->getType(), ItersAhead), P.LSCEVAddRec->getStepRecurrence(*SE))); - if (!isSafeToExpand(NextLSCEV, *SE)) + if (!SCEVE.isSafeToExpand(NextLSCEV)) continue; - BasicBlock *BB = P.InsertPt->getParent(); Type *I8Ptr = Type::getInt8PtrTy(BB->getContext(), 0/*PtrAddrSpace*/); - SCEVExpander SCEVE(*SE, BB->getModule()->getDataLayout(), "prefaddr"); Value *PrefPtrValue = SCEVE.expandCodeFor(NextLSCEV, I8Ptr, P.InsertPt); IRBuilder<> Builder(P.InsertPt); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index d908c151d9f2..3ed022f65d9a 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -1129,7 +1129,7 @@ bool LoopIdiomRecognize::processLoopStridedStore( // TODO: ideally we should still be able to generate memset if SCEV expander // is taught to generate the dependencies at the latest point. - if (!isSafeToExpand(Start, *SE)) + if (!Expander.isSafeToExpand(Start)) return Changed; // Okay, we have a strided store "p[i]" of a splattable value. We can turn @@ -1163,7 +1163,7 @@ bool LoopIdiomRecognize::processLoopStridedStore( // TODO: ideally we should still be able to generate memset if SCEV expander // is taught to generate the dependencies at the latest point. - if (!isSafeToExpand(NumBytesS, *SE)) + if (!Expander.isSafeToExpand(NumBytesS)) return Changed; Value *NumBytes = diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopInterchange.cpp index 1d3023d04463..18daa4295224 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopInterchange.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopInterchange.cpp @@ -288,7 +288,6 @@ static void populateWorklist(Loop &L, LoopVector &LoopList) { Vec = &CurrentLoop->getSubLoops(); } LoopList.push_back(CurrentLoop); - return; } namespace { diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopPredication.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopPredication.cpp index d0ee5b47a8ca..b327d38d2a84 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopPredication.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopPredication.cpp @@ -275,7 +275,8 @@ class LoopPredication { /// which is that an expression *can be made* invariant via SCEVExpander. /// Thus, this version is only suitable for finding an insert point to be be /// passed to SCEVExpander! - Instruction *findInsertPt(Instruction *User, ArrayRef<const SCEV*> Ops); + Instruction *findInsertPt(const SCEVExpander &Expander, Instruction *User, + ArrayRef<const SCEV *> Ops); /// Return true if the value is known to produce a single fixed value across /// all iterations on which it executes. Note that this does not imply @@ -418,13 +419,14 @@ Value *LoopPredication::expandCheck(SCEVExpander &Expander, return Builder.getFalse(); } - Value *LHSV = Expander.expandCodeFor(LHS, Ty, findInsertPt(Guard, {LHS})); - Value *RHSV = Expander.expandCodeFor(RHS, Ty, findInsertPt(Guard, {RHS})); + Value *LHSV = + Expander.expandCodeFor(LHS, Ty, findInsertPt(Expander, Guard, {LHS})); + Value *RHSV = + Expander.expandCodeFor(RHS, Ty, findInsertPt(Expander, Guard, {RHS})); IRBuilder<> Builder(findInsertPt(Guard, {LHSV, RHSV})); return Builder.CreateICmp(Pred, LHSV, RHSV); } - // Returns true if its safe to truncate the IV to RangeCheckType. // When the IV type is wider than the range operand type, we can still do loop // predication, by generating SCEVs for the range and latch that are of the @@ -516,14 +518,15 @@ Instruction *LoopPredication::findInsertPt(Instruction *Use, return Preheader->getTerminator(); } -Instruction *LoopPredication::findInsertPt(Instruction *Use, - ArrayRef<const SCEV*> Ops) { +Instruction *LoopPredication::findInsertPt(const SCEVExpander &Expander, + Instruction *Use, + ArrayRef<const SCEV *> Ops) { // Subtlety: SCEV considers things to be invariant if the value produced is // the same across iterations. This is not the same as being able to // evaluate outside the loop, which is what we actually need here. for (const SCEV *Op : Ops) if (!SE->isLoopInvariant(Op, L) || - !isSafeToExpandAt(Op, Preheader->getTerminator(), *SE)) + !Expander.isSafeToExpandAt(Op, Preheader->getTerminator())) return Use; return Preheader->getTerminator(); } @@ -589,8 +592,8 @@ Optional<Value *> LoopPredication::widenICmpRangeCheckIncrementingLoop( LLVM_DEBUG(dbgs() << "Can't expand limit check!\n"); return None; } - if (!isSafeToExpandAt(LatchStart, Guard, *SE) || - !isSafeToExpandAt(LatchLimit, Guard, *SE)) { + if (!Expander.isSafeToExpandAt(LatchStart, Guard) || + !Expander.isSafeToExpandAt(LatchLimit, Guard)) { LLVM_DEBUG(dbgs() << "Can't expand limit check!\n"); return None; } @@ -632,8 +635,8 @@ Optional<Value *> LoopPredication::widenICmpRangeCheckDecrementingLoop( LLVM_DEBUG(dbgs() << "Can't expand limit check!\n"); return None; } - if (!isSafeToExpandAt(LatchStart, Guard, *SE) || - !isSafeToExpandAt(LatchLimit, Guard, *SE)) { + if (!Expander.isSafeToExpandAt(LatchStart, Guard) || + !Expander.isSafeToExpandAt(LatchLimit, Guard)) { LLVM_DEBUG(dbgs() << "Can't expand limit check!\n"); return None; } @@ -1159,7 +1162,7 @@ bool LoopPredication::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) { const SCEV *MinEC = getMinAnalyzeableBackedgeTakenCount(*SE, *DT, L); if (isa<SCEVCouldNotCompute>(MinEC) || MinEC->getType()->isPointerTy() || !SE->isLoopInvariant(MinEC, L) || - !isSafeToExpandAt(MinEC, WidenableBR, *SE)) + !Rewriter.isSafeToExpandAt(MinEC, WidenableBR)) return ChangedLoop; // Subtlety: We need to avoid inserting additional uses of the WC. We know @@ -1198,7 +1201,7 @@ bool LoopPredication::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) { const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); if (isa<SCEVCouldNotCompute>(ExitCount) || ExitCount->getType()->isPointerTy() || - !isSafeToExpandAt(ExitCount, WidenableBR, *SE)) + !Rewriter.isSafeToExpandAt(ExitCount, WidenableBR)) continue; const bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB)); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp index b7e0e32780b4..083f87436acd 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp @@ -576,6 +576,18 @@ public: return false; } + // TODO: Tokens may breach LCSSA form by default. However, the transform for + // dead exit blocks requires LCSSA form to be maintained for all values, + // tokens included, otherwise it may break use-def dominance (see PR56243). + if (!DeadExitBlocks.empty() && !L.isLCSSAForm(DT, /*IgnoreTokens*/ false)) { + assert(L.isLCSSAForm(DT, /*IgnoreTokens*/ true) && + "LCSSA broken not by tokens?"); + LLVM_DEBUG(dbgs() << "Give up constant terminator folding in loop " + << Header->getName() + << ": tokens uses potentially break LCSSA form.\n"); + return false; + } + SE.forgetTopmostLoop(&L); // Dump analysis results. LLVM_DEBUG(dump()); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 4ef7809c6681..a3434f8bc46d 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -1950,6 +1950,7 @@ class LSRInstance { Loop *const L; MemorySSAUpdater *MSSAU; TTI::AddressingModeKind AMK; + mutable SCEVExpander Rewriter; bool Changed = false; /// This is the insert position that the current loop's induction variable @@ -1998,7 +1999,7 @@ class LSRInstance { SmallVectorImpl<ChainUsers> &ChainUsersVec); void FinalizeChain(IVChain &Chain); void CollectChains(); - void GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter, + void GenerateIVChain(const IVChain &Chain, SmallVectorImpl<WeakTrackingVH> &DeadInsts); void CollectInterestingTypesAndFactors(); @@ -2068,22 +2069,19 @@ class LSRInstance { void Solve(SmallVectorImpl<const Formula *> &Solution) const; BasicBlock::iterator - HoistInsertPosition(BasicBlock::iterator IP, - const SmallVectorImpl<Instruction *> &Inputs) const; - BasicBlock::iterator - AdjustInsertPositionForExpand(BasicBlock::iterator IP, - const LSRFixup &LF, - const LSRUse &LU, - SCEVExpander &Rewriter) const; + HoistInsertPosition(BasicBlock::iterator IP, + const SmallVectorImpl<Instruction *> &Inputs) const; + BasicBlock::iterator AdjustInsertPositionForExpand(BasicBlock::iterator IP, + const LSRFixup &LF, + const LSRUse &LU) const; Value *Expand(const LSRUse &LU, const LSRFixup &LF, const Formula &F, - BasicBlock::iterator IP, SCEVExpander &Rewriter, + BasicBlock::iterator IP, SmallVectorImpl<WeakTrackingVH> &DeadInsts) const; void RewriteForPHI(PHINode *PN, const LSRUse &LU, const LSRFixup &LF, - const Formula &F, SCEVExpander &Rewriter, + const Formula &F, SmallVectorImpl<WeakTrackingVH> &DeadInsts) const; void Rewrite(const LSRUse &LU, const LSRFixup &LF, const Formula &F, - SCEVExpander &Rewriter, SmallVectorImpl<WeakTrackingVH> &DeadInsts) const; void ImplementSolution(const SmallVectorImpl<const Formula *> &Solution); @@ -3183,7 +3181,7 @@ static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst, /// Generate an add or subtract for each IVInc in a chain to materialize the IV /// user's operand from the previous IV user's operand. -void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter, +void LSRInstance::GenerateIVChain(const IVChain &Chain, SmallVectorImpl<WeakTrackingVH> &DeadInsts) { // Find the new IVOperand for the head of the chain. It may have been replaced // by LSR. @@ -3335,7 +3333,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() { // x == y --> x - y == 0 const SCEV *N = SE.getSCEV(NV); - if (SE.isLoopInvariant(N, L) && isSafeToExpand(N, SE) && + if (SE.isLoopInvariant(N, L) && Rewriter.isSafeToExpand(N) && (!NV->getType()->isPointerTy() || SE.getPointerBase(N) == SE.getPointerBase(S))) { // S is normalized, so normalize N before folding it into S @@ -3343,6 +3341,21 @@ void LSRInstance::CollectFixupsAndInitialFormulae() { N = normalizeForPostIncUse(N, TmpPostIncLoops, SE); Kind = LSRUse::ICmpZero; S = SE.getMinusSCEV(N, S); + } else if (L->isLoopInvariant(NV) && + (!isa<Instruction>(NV) || + DT.dominates(cast<Instruction>(NV), L->getHeader())) && + !NV->getType()->isPointerTy()) { + // If we can't generally expand the expression (e.g. it contains + // a divide), but it is already at a loop invariant point before the + // loop, wrap it in an unknown (to prevent the expander from trying + // to re-expand in a potentially unsafe way.) The restriction to + // integer types is required because the unknown hides the base, and + // SCEV can't compute the difference of two unknown pointers. + N = SE.getUnknown(NV); + N = normalizeForPostIncUse(N, TmpPostIncLoops, SE); + Kind = LSRUse::ICmpZero; + S = SE.getMinusSCEV(N, S); + assert(!isa<SCEVCouldNotCompute>(S)); } // -1 and the negations of all interesting strides (except the negation @@ -3385,10 +3398,10 @@ void LSRInstance::CollectFixupsAndInitialFormulae() { /// Insert a formula for the given expression into the given use, separating out /// loop-variant portions from loop-invariant and loop-computable portions. -void -LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) { +void LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU, + size_t LUIdx) { // Mark uses whose expressions cannot be expanded. - if (!isSafeToExpand(S, SE, /*CanonicalMode*/ false)) + if (!Rewriter.isSafeToExpand(S)) LU.RigidFormula = true; Formula F; @@ -5206,11 +5219,8 @@ LSRInstance::HoistInsertPosition(BasicBlock::iterator IP, /// Determine an input position which will be dominated by the operands and /// which will dominate the result. -BasicBlock::iterator -LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator LowestIP, - const LSRFixup &LF, - const LSRUse &LU, - SCEVExpander &Rewriter) const { +BasicBlock::iterator LSRInstance::AdjustInsertPositionForExpand( + BasicBlock::iterator LowestIP, const LSRFixup &LF, const LSRUse &LU) const { // Collect some instructions which must be dominated by the // expanding replacement. These must be dominated by any operands that // will be required in the expansion. @@ -5273,14 +5283,13 @@ LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator LowestIP, /// is called "expanding"). Value *LSRInstance::Expand(const LSRUse &LU, const LSRFixup &LF, const Formula &F, BasicBlock::iterator IP, - SCEVExpander &Rewriter, SmallVectorImpl<WeakTrackingVH> &DeadInsts) const { if (LU.RigidFormula) return LF.OperandValToReplace; // Determine an input position which will be dominated by the operands and // which will dominate the result. - IP = AdjustInsertPositionForExpand(IP, LF, LU, Rewriter); + IP = AdjustInsertPositionForExpand(IP, LF, LU); Rewriter.setInsertPoint(&*IP); // Inform the Rewriter if we have a post-increment use, so that it can @@ -5452,7 +5461,7 @@ Value *LSRInstance::Expand(const LSRUse &LU, const LSRFixup &LF, /// to be expanded in multiple places. void LSRInstance::RewriteForPHI( PHINode *PN, const LSRUse &LU, const LSRFixup &LF, const Formula &F, - SCEVExpander &Rewriter, SmallVectorImpl<WeakTrackingVH> &DeadInsts) const { + SmallVectorImpl<WeakTrackingVH> &DeadInsts) const { DenseMap<BasicBlock *, Value *> Inserted; for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) if (PN->getIncomingValue(i) == LF.OperandValToReplace) { @@ -5507,8 +5516,8 @@ void LSRInstance::RewriteForPHI( if (!Pair.second) PN->setIncomingValue(i, Pair.first->second); else { - Value *FullV = Expand(LU, LF, F, BB->getTerminator()->getIterator(), - Rewriter, DeadInsts); + Value *FullV = + Expand(LU, LF, F, BB->getTerminator()->getIterator(), DeadInsts); // If this is reuse-by-noop-cast, insert the noop cast. Type *OpTy = LF.OperandValToReplace->getType(); @@ -5567,15 +5576,14 @@ void LSRInstance::RewriteForPHI( /// is called "expanding"), and update the UserInst to reference the newly /// expanded value. void LSRInstance::Rewrite(const LSRUse &LU, const LSRFixup &LF, - const Formula &F, SCEVExpander &Rewriter, + const Formula &F, SmallVectorImpl<WeakTrackingVH> &DeadInsts) const { // First, find an insertion point that dominates UserInst. For PHI nodes, // find the nearest block which dominates all the relevant uses. if (PHINode *PN = dyn_cast<PHINode>(LF.UserInst)) { - RewriteForPHI(PN, LU, LF, F, Rewriter, DeadInsts); + RewriteForPHI(PN, LU, LF, F, DeadInsts); } else { - Value *FullV = - Expand(LU, LF, F, LF.UserInst->getIterator(), Rewriter, DeadInsts); + Value *FullV = Expand(LU, LF, F, LF.UserInst->getIterator(), DeadInsts); // If this is reuse-by-noop-cast, insert the noop cast. Type *OpTy = LF.OperandValToReplace->getType(); @@ -5609,13 +5617,6 @@ void LSRInstance::ImplementSolution( // we can remove them after we are done working. SmallVector<WeakTrackingVH, 16> DeadInsts; - SCEVExpander Rewriter(SE, L->getHeader()->getModule()->getDataLayout(), "lsr", - false); -#ifndef NDEBUG - Rewriter.setDebugType(DEBUG_TYPE); -#endif - Rewriter.disableCanonicalMode(); - Rewriter.enableLSRMode(); Rewriter.setIVIncInsertPos(L, IVIncInsertPos); // Mark phi nodes that terminate chains so the expander tries to reuse them. @@ -5627,12 +5628,12 @@ void LSRInstance::ImplementSolution( // Expand the new value definitions and update the users. for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) for (const LSRFixup &Fixup : Uses[LUIdx].Fixups) { - Rewrite(Uses[LUIdx], Fixup, *Solution[LUIdx], Rewriter, DeadInsts); + Rewrite(Uses[LUIdx], Fixup, *Solution[LUIdx], DeadInsts); Changed = true; } for (const IVChain &Chain : IVChainVec) { - GenerateIVChain(Chain, Rewriter, DeadInsts); + GenerateIVChain(Chain, DeadInsts); Changed = true; } @@ -5697,8 +5698,10 @@ LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE, const TargetTransformInfo &TTI, AssumptionCache &AC, TargetLibraryInfo &TLI, MemorySSAUpdater *MSSAU) : IU(IU), SE(SE), DT(DT), LI(LI), AC(AC), TLI(TLI), TTI(TTI), L(L), - MSSAU(MSSAU), AMK(PreferredAddresingMode.getNumOccurrences() > 0 ? - PreferredAddresingMode : TTI.getPreferredAddressingMode(L, &SE)) { + MSSAU(MSSAU), AMK(PreferredAddresingMode.getNumOccurrences() > 0 + ? PreferredAddresingMode + : TTI.getPreferredAddressingMode(L, &SE)), + Rewriter(SE, L->getHeader()->getModule()->getDataLayout(), "lsr", false) { // If LoopSimplify form is not available, stay out of trouble. if (!L->isLoopSimplifyForm()) return; @@ -5733,6 +5736,14 @@ LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE, L->getHeader()->printAsOperand(dbgs(), /*PrintType=*/false); dbgs() << ":\n"); + // Configure SCEVExpander already now, so the correct mode is used for + // isSafeToExpand() checks. +#ifndef NDEBUG + Rewriter.setDebugType(DEBUG_TYPE); +#endif + Rewriter.disableCanonicalMode(); + Rewriter.enableLSRMode(); + // First, perform some low-level loop optimizations. OptimizeShadowIV(); OptimizeLoopTermCond(); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/Reassociate.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/Reassociate.cpp index 75f0896d4845..240fb5e60687 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/Reassociate.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/Reassociate.cpp @@ -142,12 +142,21 @@ XorOpnd::XorOpnd(Value *V) { isOr = true; } +/// Return true if I is an instruction with the FastMathFlags that are needed +/// for general reassociation set. This is not the same as testing +/// Instruction::isAssociative() because it includes operations like fsub. +/// (This routine is only intended to be called for floating-point operations.) +static bool hasFPAssociativeFlags(Instruction *I) { + assert(I && I->getType()->isFPOrFPVectorTy() && "Should only check FP ops"); + return I->hasAllowReassoc() && I->hasNoSignedZeros(); +} + /// Return true if V is an instruction of the specified opcode and if it /// only has one use. static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) { auto *I = dyn_cast<Instruction>(V); if (I && I->hasOneUse() && I->getOpcode() == Opcode) - if (!isa<FPMathOperator>(I) || I->isFast()) + if (!isa<FPMathOperator>(I) || hasFPAssociativeFlags(I)) return cast<BinaryOperator>(I); return nullptr; } @@ -157,7 +166,7 @@ static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode1, auto *I = dyn_cast<Instruction>(V); if (I && I->hasOneUse() && (I->getOpcode() == Opcode1 || I->getOpcode() == Opcode2)) - if (!isa<FPMathOperator>(I) || I->isFast()) + if (!isa<FPMathOperator>(I) || hasFPAssociativeFlags(I)) return cast<BinaryOperator>(I); return nullptr; } @@ -449,7 +458,8 @@ using RepeatedValue = std::pair<Value*, APInt>; /// of the expression) if it can turn them into binary operators of the right /// type and thus make the expression bigger. static bool LinearizeExprTree(Instruction *I, - SmallVectorImpl<RepeatedValue> &Ops) { + SmallVectorImpl<RepeatedValue> &Ops, + ReassociatePass::OrderedSet &ToRedo) { assert((isa<UnaryOperator>(I) || isa<BinaryOperator>(I)) && "Expected a UnaryOperator or BinaryOperator!"); LLVM_DEBUG(dbgs() << "LINEARIZE: " << *I << '\n'); @@ -572,23 +582,32 @@ static bool LinearizeExprTree(Instruction *I, assert((!isa<Instruction>(Op) || cast<Instruction>(Op)->getOpcode() != Opcode || (isa<FPMathOperator>(Op) && - !cast<Instruction>(Op)->isFast())) && + !hasFPAssociativeFlags(cast<Instruction>(Op)))) && "Should have been handled above!"); assert(Op->hasOneUse() && "Has uses outside the expression tree!"); // If this is a multiply expression, turn any internal negations into - // multiplies by -1 so they can be reassociated. - if (Instruction *Tmp = dyn_cast<Instruction>(Op)) - if ((Opcode == Instruction::Mul && match(Tmp, m_Neg(m_Value()))) || - (Opcode == Instruction::FMul && match(Tmp, m_FNeg(m_Value())))) { - LLVM_DEBUG(dbgs() - << "MORPH LEAF: " << *Op << " (" << Weight << ") TO "); - Tmp = LowerNegateToMultiply(Tmp); - LLVM_DEBUG(dbgs() << *Tmp << '\n'); - Worklist.push_back(std::make_pair(Tmp, Weight)); - Changed = true; - continue; + // multiplies by -1 so they can be reassociated. Add any users of the + // newly created multiplication by -1 to the redo list, so any + // reassociation opportunities that are exposed will be reassociated + // further. + Instruction *Neg; + if (((Opcode == Instruction::Mul && match(Op, m_Neg(m_Value()))) || + (Opcode == Instruction::FMul && match(Op, m_FNeg(m_Value())))) && + match(Op, m_Instruction(Neg))) { + LLVM_DEBUG(dbgs() + << "MORPH LEAF: " << *Op << " (" << Weight << ") TO "); + Instruction *Mul = LowerNegateToMultiply(Neg); + LLVM_DEBUG(dbgs() << *Mul << '\n'); + Worklist.push_back(std::make_pair(Mul, Weight)); + for (User *U : Mul->users()) { + if (BinaryOperator *UserBO = dyn_cast<BinaryOperator>(U)) + ToRedo.insert(UserBO); } + ToRedo.insert(Neg); + Changed = true; + continue; + } // Failed to morph into an expression of the right type. This really is // a leaf. @@ -1141,7 +1160,7 @@ Value *ReassociatePass::RemoveFactorFromExpression(Value *V, Value *Factor) { return nullptr; SmallVector<RepeatedValue, 8> Tree; - MadeChange |= LinearizeExprTree(BO, Tree); + MadeChange |= LinearizeExprTree(BO, Tree, RedoInsts); SmallVector<ValueEntry, 8> Factors; Factors.reserve(Tree.size()); for (unsigned i = 0, e = Tree.size(); i != e; ++i) { @@ -2206,8 +2225,9 @@ void ReassociatePass::OptimizeInst(Instruction *I) { if (Instruction *Res = canonicalizeNegFPConstants(I)) I = Res; - // Don't optimize floating-point instructions unless they are 'fast'. - if (I->getType()->isFPOrFPVectorTy() && !I->isFast()) + // Don't optimize floating-point instructions unless they have the + // appropriate FastMathFlags for reassociation enabled. + if (I->getType()->isFPOrFPVectorTy() && !hasFPAssociativeFlags(I)) return; // Do not reassociate boolean (i1) expressions. We want to preserve the @@ -2320,7 +2340,7 @@ void ReassociatePass::ReassociateExpression(BinaryOperator *I) { // First, walk the expression tree, linearizing the tree, collecting the // operand information. SmallVector<RepeatedValue, 8> Tree; - MadeChange |= LinearizeExprTree(I, Tree); + MadeChange |= LinearizeExprTree(I, Tree, RedoInsts); SmallVector<ValueEntry, 8> Ops; Ops.reserve(Tree.size()); for (const RepeatedValue &E : Tree) diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp index 51e4a5773f3e..baf407c5037b 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -1702,10 +1702,20 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */ auto &Context = Call->getContext(); auto &DL = Call->getModule()->getDataLayout(); auto GetBaseAndOffset = [&](Value *Derived) { - assert(PointerToBase.count(Derived)); + Value *Base = nullptr; + // Optimizations in unreachable code might substitute the real pointer + // with undef, poison or null-derived constant. Return null base for + // them to be consistent with the handling in the main algorithm in + // findBaseDefiningValue. + if (isa<Constant>(Derived)) + Base = + ConstantPointerNull::get(cast<PointerType>(Derived->getType())); + else { + assert(PointerToBase.count(Derived)); + Base = PointerToBase.find(Derived)->second; + } unsigned AddressSpace = Derived->getType()->getPointerAddressSpace(); unsigned IntPtrSize = DL.getPointerSizeInBits(AddressSpace); - Value *Base = PointerToBase.find(Derived)->second; Value *Base_int = Builder.CreatePtrToInt( Base, Type::getIntNTy(Context, IntPtrSize)); Value *Derived_int = Builder.CreatePtrToInt( diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/Scalar.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/Scalar.cpp index 008ddfc72740..5ab9e25577d8 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/Scalar.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/Scalar.cpp @@ -111,8 +111,6 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeLoopLoadEliminationPass(Registry); initializeLoopSimplifyCFGLegacyPassPass(Registry); initializeLoopVersioningLegacyPassPass(Registry); - initializeEntryExitInstrumenterPass(Registry); - initializePostInlineEntryExitInstrumenterPass(Registry); } void LLVMAddLoopSimplifyCFGPass(LLVMPassManagerRef PM) { diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp index f6525ad7de9b..0b797abefe20 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp @@ -68,11 +68,6 @@ static cl::opt<bool> cl::desc("Allow relaxed uniform region checks"), cl::init(true)); -static cl::opt<unsigned> - ReorderNodeSize("structurizecfg-node-reorder-size", - cl::desc("Limit region size for reordering nodes"), - cl::init(100), cl::Hidden); - // Definition of the complex types used in this pass. using BBValuePair = std::pair<BasicBlock *, Value *>; @@ -267,8 +262,6 @@ class StructurizeCFG { void orderNodes(); - void reorderNodes(); - void analyzeLoops(RegionNode *N); Value *buildCondition(BranchInst *Term, unsigned Idx, bool Invert); @@ -427,57 +420,6 @@ void StructurizeCFG::orderNodes() { } } -/// Change the node ordering to decrease the range of live values, especially -/// the values that capture the control flow path for branches. We do this -/// by moving blocks with a single predecessor and successor to appear after -/// predecessor. The motivation is to move some loop exit blocks into a loop. -/// In cases where a loop has a large number of exit blocks, this reduces the -/// amount of values needed across the loop boundary. -void StructurizeCFG::reorderNodes() { - SmallVector<RegionNode *, 8> NewOrder; - DenseMap<BasicBlock *, unsigned> MoveTo; - BitVector Moved(Order.size()); - - // The benefits of reordering nodes occurs for large regions. - if (Order.size() <= ReorderNodeSize) - return; - - // The algorithm works with two passes over Order. The first pass identifies - // the blocks to move and the position to move them to. The second pass - // creates the new order based upon this information. We move blocks with - // a single predecessor and successor. If there are multiple candidates then - // maintain the original order. - BBSet Seen; - for (int I = Order.size() - 1; I >= 0; --I) { - auto *BB = Order[I]->getEntry(); - Seen.insert(BB); - auto *Pred = BB->getSinglePredecessor(); - auto *Succ = BB->getSingleSuccessor(); - // Consider only those basic blocks that have a predecessor in Order and a - // successor that exits the region. The region may contain subregions that - // have been structurized and are not included in Order. - if (Pred && Succ && Seen.count(Pred) && Succ == ParentRegion->getExit() && - !MoveTo.count(Pred)) { - MoveTo[Pred] = I; - Moved.set(I); - } - } - - // If no blocks have been moved then the original order is good. - if (!Moved.count()) - return; - - for (size_t I = 0, E = Order.size(); I < E; ++I) { - auto *BB = Order[I]->getEntry(); - if (MoveTo.count(BB)) - NewOrder.push_back(Order[MoveTo[BB]]); - if (!Moved[I]) - NewOrder.push_back(Order[I]); - } - - Order.assign(NewOrder); -} - /// Determine the end of the loops void StructurizeCFG::analyzeLoops(RegionNode *N) { if (N->isSubRegion()) { @@ -1139,7 +1081,6 @@ bool StructurizeCFG::run(Region *R, DominatorTree *DT) { ParentRegion = R; orderNodes(); - reorderNodes(); collectInfos(); createFlow(); insertConditions(false); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp index 079b2fc973b9..e3cb5f359e34 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -80,7 +80,7 @@ void llvm::detachDeadBlocks( // contained within it must dominate their uses, that all uses will // eventually be removed (they are themselves dead). if (!I.use_empty()) - I.replaceAllUsesWith(UndefValue::get(I.getType())); + I.replaceAllUsesWith(PoisonValue::get(I.getType())); BB->getInstList().pop_back(); } new UnreachableInst(BB->getContext(), BB); @@ -188,8 +188,10 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU, // Don't break self-loops. if (PredBB == BB) return false; - // Don't break unwinding instructions. - if (PredBB->getTerminator()->isExceptionalTerminator()) + + // Don't break unwinding instructions or terminators with other side-effects. + Instruction *PTI = PredBB->getTerminator(); + if (PTI->isExceptionalTerminator() || PTI->mayHaveSideEffects()) return false; // Can't merge if there are multiple distinct successors. @@ -202,7 +204,7 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU, BasicBlock *NewSucc = nullptr; unsigned FallThruPath; if (PredecessorWithTwoSuccessors) { - if (!(PredBB_BI = dyn_cast<BranchInst>(PredBB->getTerminator()))) + if (!(PredBB_BI = dyn_cast<BranchInst>(PTI))) return false; BranchInst *BB_JmpI = dyn_cast<BranchInst>(BB->getTerminator()); if (!BB_JmpI || !BB_JmpI->isUnconditional()) @@ -256,7 +258,6 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU, Updates.push_back({DominatorTree::Delete, PredBB, BB}); } - Instruction *PTI = PredBB->getTerminator(); Instruction *STI = BB->getTerminator(); Instruction *Start = &*BB->begin(); // If there's nothing to move, mark the starting instruction as the last @@ -1141,7 +1142,7 @@ SplitBlockPredecessorsImpl(BasicBlock *BB, ArrayRef<BasicBlock *> Preds, if (Preds.empty()) { // Insert dummy values as the incoming value. for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++I) - cast<PHINode>(I)->addIncoming(UndefValue::get(I->getType()), NewBB); + cast<PHINode>(I)->addIncoming(PoisonValue::get(I->getType()), NewBB); } // Update DominatorTree, LoopInfo, and LCCSA analysis information. diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/BuildLibCalls.cpp index c4a58f36c171..e25ec74a0572 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/BuildLibCalls.cpp @@ -270,9 +270,6 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, bool Changed = false; - if(!isLibFreeFunction(&F, TheLibFunc) && !isReallocLikeFn(&F, &TLI)) - Changed |= setDoesNotFreeMemory(F); - if (F.getParent() != nullptr && F.getParent()->getRtLibUseGOT()) Changed |= setNonLazyBind(F); @@ -285,14 +282,14 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setOnlyAccessesArgMemory(F); Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 0); - return Changed; + break; case LibFunc_strchr: case LibFunc_strrchr: Changed |= setOnlyAccessesArgMemory(F); Changed |= setOnlyReadsMemory(F); Changed |= setDoesNotThrow(F); Changed |= setWillReturn(F); - return Changed; + break; case LibFunc_strtol: case LibFunc_strtod: case LibFunc_strtof: @@ -304,7 +301,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 0); - return Changed; + break; case LibFunc_strcat: case LibFunc_strncat: Changed |= setOnlyAccessesArgMemory(F); @@ -315,7 +312,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setOnlyReadsMemory(F, 1); Changed |= setDoesNotAlias(F, 0); Changed |= setDoesNotAlias(F, 1); - return Changed; + break; case LibFunc_strcpy: case LibFunc_strncpy: Changed |= setReturnedArg(F, 0); @@ -330,14 +327,14 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setOnlyReadsMemory(F, 1); Changed |= setDoesNotAlias(F, 0); Changed |= setDoesNotAlias(F, 1); - return Changed; + break; case LibFunc_strxfrm: Changed |= setDoesNotThrow(F); Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); - return Changed; + break; case LibFunc_strcmp: // 0,1 case LibFunc_strspn: // 0,1 case LibFunc_strncmp: // 0,1 @@ -348,7 +345,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setOnlyReadsMemory(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - return Changed; + break; case LibFunc_strcoll: case LibFunc_strcasecmp: // 0,1 case LibFunc_strncasecmp: // @@ -359,7 +356,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - return Changed; + break; case LibFunc_strstr: case LibFunc_strpbrk: Changed |= setOnlyAccessesArgMemory(F); @@ -367,26 +364,26 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotThrow(F); Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 1); - return Changed; + break; case LibFunc_strtok: case LibFunc_strtok_r: Changed |= setDoesNotThrow(F); Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); - return Changed; + break; case LibFunc_scanf: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); - return Changed; + break; case LibFunc_setbuf: case LibFunc_setvbuf: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); - return Changed; + break; case LibFunc_strndup: Changed |= setArgNoUndef(F, 1); LLVM_FALLTHROUGH; @@ -398,7 +395,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); - return Changed; + break; case LibFunc_stat: case LibFunc_statvfs: Changed |= setRetAndArgsNoUndef(F); @@ -406,7 +403,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 0); - return Changed; + break; case LibFunc_sscanf: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); @@ -414,7 +411,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 0); Changed |= setOnlyReadsMemory(F, 1); - return Changed; + break; case LibFunc_sprintf: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); @@ -423,7 +420,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setOnlyWritesMemory(F, 0); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); - return Changed; + break; case LibFunc_snprintf: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); @@ -432,7 +429,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setOnlyWritesMemory(F, 0); Changed |= setDoesNotCapture(F, 2); Changed |= setOnlyReadsMemory(F, 2); - return Changed; + break; case LibFunc_setitimer: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); @@ -440,13 +437,13 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotCapture(F, 1); Changed |= setDoesNotCapture(F, 2); Changed |= setOnlyReadsMemory(F, 1); - return Changed; + break; case LibFunc_system: // May throw; "system" is a valid pthread cancellation point. Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); - return Changed; + break; case LibFunc_aligned_alloc: Changed |= setAlignedAllocParam(F, 0); Changed |= setAllocSize(F, 1, None); @@ -464,7 +461,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotThrow(F); Changed |= setRetDoesNotAlias(F); Changed |= setWillReturn(F); - return Changed; + break; case LibFunc_memcmp: Changed |= setOnlyAccessesArgMemory(F); Changed |= setOnlyReadsMemory(F); @@ -472,21 +469,21 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - return Changed; + break; case LibFunc_memchr: case LibFunc_memrchr: Changed |= setDoesNotThrow(F); Changed |= setOnlyAccessesArgMemory(F); Changed |= setOnlyReadsMemory(F); Changed |= setWillReturn(F); - return Changed; + break; case LibFunc_modf: case LibFunc_modff: case LibFunc_modfl: Changed |= setDoesNotThrow(F); Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 1); - return Changed; + break; case LibFunc_memcpy: Changed |= setDoesNotThrow(F); Changed |= setOnlyAccessesArgMemory(F); @@ -497,7 +494,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotAlias(F, 1); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); - return Changed; + break; case LibFunc_memmove: Changed |= setDoesNotThrow(F); Changed |= setOnlyAccessesArgMemory(F); @@ -506,7 +503,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setOnlyWritesMemory(F, 0); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); - return Changed; + break; case LibFunc_mempcpy: case LibFunc_memccpy: Changed |= setWillReturn(F); @@ -519,7 +516,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotAlias(F, 1); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); - return Changed; + break; case LibFunc_memalign: Changed |= setAllocFamily(F, "malloc"); Changed |= setAllocKind(F, AllocFnKind::Alloc | AllocFnKind::Aligned | @@ -531,19 +528,19 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotThrow(F); Changed |= setRetDoesNotAlias(F); Changed |= setWillReturn(F); - return Changed; + break; case LibFunc_mkdir: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); - return Changed; + break; case LibFunc_mktime: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 0); - return Changed; + break; case LibFunc_realloc: case LibFunc_reallocf: case LibFunc_vec_realloc: @@ -559,17 +556,17 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 0); Changed |= setArgNoUndef(F, 1); - return Changed; + break; case LibFunc_read: // May throw; "read" is a valid pthread cancellation point. Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotCapture(F, 1); - return Changed; + break; case LibFunc_rewind: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); - return Changed; + break; case LibFunc_rmdir: case LibFunc_remove: case LibFunc_realpath: @@ -577,7 +574,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); - return Changed; + break; case LibFunc_rename: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); @@ -585,20 +582,20 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 0); Changed |= setOnlyReadsMemory(F, 1); - return Changed; + break; case LibFunc_readlink: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 0); - return Changed; + break; case LibFunc_write: // May throw; "write" is a valid pthread cancellation point. Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); - return Changed; + break; case LibFunc_bcopy: Changed |= setDoesNotThrow(F); Changed |= setOnlyAccessesArgMemory(F); @@ -607,7 +604,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setOnlyReadsMemory(F, 0); Changed |= setOnlyWritesMemory(F, 1); Changed |= setDoesNotCapture(F, 1); - return Changed; + break; case LibFunc_bcmp: Changed |= setDoesNotThrow(F); Changed |= setOnlyAccessesArgMemory(F); @@ -615,14 +612,14 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - return Changed; + break; case LibFunc_bzero: Changed |= setDoesNotThrow(F); Changed |= setOnlyAccessesArgMemory(F); Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyWritesMemory(F, 0); - return Changed; + break; case LibFunc_calloc: case LibFunc_vec_calloc: Changed |= setAllocFamily(F, TheLibFunc == LibFunc_vec_calloc ? "vec_malloc" @@ -634,21 +631,21 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotThrow(F); Changed |= setRetDoesNotAlias(F); Changed |= setWillReturn(F); - return Changed; + break; case LibFunc_chmod: case LibFunc_chown: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); - return Changed; + break; case LibFunc_ctermid: case LibFunc_clearerr: case LibFunc_closedir: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); - return Changed; + break; case LibFunc_atoi: case LibFunc_atol: case LibFunc_atof: @@ -657,13 +654,13 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setOnlyReadsMemory(F); Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 0); - return Changed; + break; case LibFunc_access: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); - return Changed; + break; case LibFunc_fopen: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); @@ -672,19 +669,19 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 0); Changed |= setOnlyReadsMemory(F, 1); - return Changed; + break; case LibFunc_fdopen: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setRetDoesNotAlias(F); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); - return Changed; + break; case LibFunc_feof: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); - return Changed; + break; case LibFunc_free: case LibFunc_vec_free: Changed |= setAllocFamily(F, TheLibFunc == LibFunc_vec_free ? "vec_malloc" @@ -696,7 +693,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotThrow(F); Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 0); - return Changed; + break; case LibFunc_fseek: case LibFunc_ftell: case LibFunc_fgetc: @@ -713,45 +710,45 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); - return Changed; + break; case LibFunc_ferror: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F); - return Changed; + break; case LibFunc_fputc: case LibFunc_fputc_unlocked: case LibFunc_fstat: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 1); - return Changed; + break; case LibFunc_frexp: case LibFunc_frexpf: case LibFunc_frexpl: Changed |= setDoesNotThrow(F); Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 1); - return Changed; + break; case LibFunc_fstatvfs: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 1); - return Changed; + break; case LibFunc_fgets: case LibFunc_fgets_unlocked: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 2); - return Changed; + break; case LibFunc_fread: case LibFunc_fread_unlocked: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 3); - return Changed; + break; case LibFunc_fwrite: case LibFunc_fwrite_unlocked: Changed |= setRetAndArgsNoUndef(F); @@ -759,7 +756,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 3); // FIXME: readonly #1? - return Changed; + break; case LibFunc_fputs: case LibFunc_fputs_unlocked: Changed |= setRetAndArgsNoUndef(F); @@ -767,7 +764,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 0); - return Changed; + break; case LibFunc_fscanf: case LibFunc_fprintf: Changed |= setRetAndArgsNoUndef(F); @@ -775,73 +772,73 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); - return Changed; + break; case LibFunc_fgetpos: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - return Changed; + break; case LibFunc_getc: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); - return Changed; + break; case LibFunc_getlogin_r: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); - return Changed; + break; case LibFunc_getc_unlocked: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); - return Changed; + break; case LibFunc_getenv: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setOnlyReadsMemory(F); Changed |= setDoesNotCapture(F, 0); - return Changed; + break; case LibFunc_gets: case LibFunc_getchar: case LibFunc_getchar_unlocked: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); - return Changed; + break; case LibFunc_getitimer: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 1); - return Changed; + break; case LibFunc_getpwnam: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); - return Changed; + break; case LibFunc_ungetc: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 1); - return Changed; + break; case LibFunc_uname: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); - return Changed; + break; case LibFunc_unlink: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); - return Changed; + break; case LibFunc_unsetenv: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); - return Changed; + break; case LibFunc_utime: case LibFunc_utimes: Changed |= setRetAndArgsNoUndef(F); @@ -850,13 +847,13 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 0); Changed |= setOnlyReadsMemory(F, 1); - return Changed; + break; case LibFunc_putc: case LibFunc_putc_unlocked: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 1); - return Changed; + break; case LibFunc_puts: case LibFunc_printf: case LibFunc_perror: @@ -864,23 +861,23 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); - return Changed; + break; case LibFunc_pread: // May throw; "pread" is a valid pthread cancellation point. Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotCapture(F, 1); - return Changed; + break; case LibFunc_pwrite: // May throw; "pwrite" is a valid pthread cancellation point. Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); - return Changed; + break; case LibFunc_putchar: case LibFunc_putchar_unlocked: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); - return Changed; + break; case LibFunc_popen: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); @@ -889,18 +886,18 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 0); Changed |= setOnlyReadsMemory(F, 1); - return Changed; + break; case LibFunc_pclose: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); - return Changed; + break; case LibFunc_vscanf: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); - return Changed; + break; case LibFunc_vsscanf: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); @@ -908,20 +905,20 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 0); Changed |= setOnlyReadsMemory(F, 1); - return Changed; + break; case LibFunc_vfscanf: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); - return Changed; + break; case LibFunc_vprintf: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); - return Changed; + break; case LibFunc_vfprintf: case LibFunc_vsprintf: Changed |= setRetAndArgsNoUndef(F); @@ -929,63 +926,63 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); - return Changed; + break; case LibFunc_vsnprintf: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 2); Changed |= setOnlyReadsMemory(F, 2); - return Changed; + break; case LibFunc_open: // May throw; "open" is a valid pthread cancellation point. Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); - return Changed; + break; case LibFunc_opendir: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setRetDoesNotAlias(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); - return Changed; + break; case LibFunc_tmpfile: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setRetDoesNotAlias(F); - return Changed; + break; case LibFunc_times: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); - return Changed; + break; case LibFunc_htonl: case LibFunc_htons: case LibFunc_ntohl: case LibFunc_ntohs: Changed |= setDoesNotThrow(F); Changed |= setDoesNotAccessMemory(F); - return Changed; + break; case LibFunc_lstat: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 0); - return Changed; + break; case LibFunc_lchown: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); - return Changed; + break; case LibFunc_qsort: // May throw; places call through function pointer. // Cannot give undef pointer/size Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotCapture(F, 3); - return Changed; + break; case LibFunc_dunder_strndup: Changed |= setArgNoUndef(F, 1); LLVM_FALLTHROUGH; @@ -995,28 +992,28 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setWillReturn(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); - return Changed; + break; case LibFunc_dunder_strtok_r: Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); - return Changed; + break; case LibFunc_under_IO_getc: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); - return Changed; + break; case LibFunc_under_IO_putc: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 1); - return Changed; + break; case LibFunc_dunder_isoc99_scanf: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); - return Changed; + break; case LibFunc_stat64: case LibFunc_lstat64: case LibFunc_statvfs64: @@ -1025,7 +1022,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 0); - return Changed; + break; case LibFunc_dunder_isoc99_sscanf: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); @@ -1033,7 +1030,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 0); Changed |= setOnlyReadsMemory(F, 1); - return Changed; + break; case LibFunc_fopen64: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); @@ -1042,30 +1039,30 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 0); Changed |= setOnlyReadsMemory(F, 1); - return Changed; + break; case LibFunc_fseeko64: case LibFunc_ftello64: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); - return Changed; + break; case LibFunc_tmpfile64: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setRetDoesNotAlias(F); - return Changed; + break; case LibFunc_fstat64: case LibFunc_fstatvfs64: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 1); - return Changed; + break; case LibFunc_open64: // May throw; "open" is a valid pthread cancellation point. Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F, 0); - return Changed; + break; case LibFunc_gettimeofday: // Currently some platforms have the restrict keyword on the arguments to // gettimeofday. To be conservative, do not add noalias to gettimeofday's @@ -1074,7 +1071,7 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotThrow(F); Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - return Changed; + break; case LibFunc_memset_pattern4: case LibFunc_memset_pattern8: case LibFunc_memset_pattern16: @@ -1089,18 +1086,18 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setOnlyAccessesArgMemory(F); Changed |= setOnlyWritesMemory(F, 0); Changed |= setDoesNotThrow(F); - return Changed; + break; // int __nvvm_reflect(const char *) case LibFunc_nvvm_reflect: Changed |= setRetAndArgsNoUndef(F); Changed |= setDoesNotAccessMemory(F); Changed |= setDoesNotThrow(F); - return Changed; + break; case LibFunc_ldexp: case LibFunc_ldexpf: case LibFunc_ldexpl: Changed |= setWillReturn(F); - return Changed; + break; case LibFunc_abs: case LibFunc_acos: case LibFunc_acosf: @@ -1227,12 +1224,17 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, Changed |= setDoesNotFreeMemory(F); Changed |= setOnlyWritesMemory(F); Changed |= setWillReturn(F); - return Changed; + break; default: // FIXME: It'd be really nice to cover all the library functions we're // aware of here. - return false; + break; } + // We have to do this step after AllocKind has been inferred on functions so + // we can reliably identify free-like and realloc-like functions. + if (!isLibFreeFunction(&F, TheLibFunc) && !isReallocLikeFn(&F, &TLI)) + Changed |= setDoesNotFreeMemory(F); + return Changed; } static void setArgExtAttr(Function &F, unsigned ArgNo, diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/CanonicalizeAliases.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/CanonicalizeAliases.cpp index f229d4bf14e9..9101a1e41f7b 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/CanonicalizeAliases.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/CanonicalizeAliases.cpp @@ -65,23 +65,6 @@ static bool canonicalizeAliases(Module &M) { canonicalizeAlias(&GA, Changed); return Changed; } - -// Legacy pass that canonicalizes aliases. -class CanonicalizeAliasesLegacyPass : public ModulePass { - -public: - /// Pass identification, replacement for typeid - static char ID; - - /// Specify pass name for debug output - StringRef getPassName() const override { return "Canonicalize Aliases"; } - - explicit CanonicalizeAliasesLegacyPass() : ModulePass(ID) {} - - bool runOnModule(Module &M) override { return canonicalizeAliases(M); } -}; -char CanonicalizeAliasesLegacyPass::ID = 0; - } // anonymous namespace PreservedAnalyses CanonicalizeAliasesPass::run(Module &M, @@ -91,14 +74,3 @@ PreservedAnalyses CanonicalizeAliasesPass::run(Module &M, return PreservedAnalyses::none(); } - -INITIALIZE_PASS_BEGIN(CanonicalizeAliasesLegacyPass, "canonicalize-aliases", - "Canonicalize aliases", false, false) -INITIALIZE_PASS_END(CanonicalizeAliasesLegacyPass, "canonicalize-aliases", - "Canonicalize aliases", false, false) - -namespace llvm { -ModulePass *createCanonicalizeAliasesPass() { - return new CanonicalizeAliasesLegacyPass(); -} -} // namespace llvm diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/CloneFunction.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/CloneFunction.cpp index 8f053cd56e0e..1d348213bfdb 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/CloneFunction.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/CloneFunction.cpp @@ -206,9 +206,20 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, }; // Avoid cloning types, compile units, and (other) subprograms. - for (DISubprogram *ISP : DIFinder->subprograms()) - if (ISP != SPClonedWithinModule) + SmallPtrSet<const DISubprogram *, 16> MappedToSelfSPs; + for (DISubprogram *ISP : DIFinder->subprograms()) { + if (ISP != SPClonedWithinModule) { mapToSelfIfNew(ISP); + MappedToSelfSPs.insert(ISP); + } + } + + // If a subprogram isn't going to be cloned skip its lexical blocks as well. + for (DIScope *S : DIFinder->scopes()) { + auto *LScope = dyn_cast<DILocalScope>(S); + if (LScope && MappedToSelfSPs.count(LScope->getSubprogram())) + mapToSelfIfNew(S); + } for (DICompileUnit *CU : DIFinder->compile_units()) mapToSelfIfNew(CU); @@ -723,14 +734,14 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, } // If the loops above have made these phi nodes have 0 or 1 operand, - // replace them with undef or the input value. We must do this for + // replace them with poison or the input value. We must do this for // correctness, because 0-operand phis are not valid. PN = cast<PHINode>(NewBB->begin()); if (PN->getNumIncomingValues() == 0) { BasicBlock::iterator I = NewBB->begin(); BasicBlock::const_iterator OldI = OldBB->begin(); while ((PN = dyn_cast<PHINode>(I++))) { - Value *NV = UndefValue::get(PN->getType()); + Value *NV = PoisonValue::get(PN->getType()); PN->replaceAllUsesWith(NV); assert(VMap[&*OldI] == PN && "VMap mismatch"); VMap[&*OldI] = NV; diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp index e3e8f63383df..60f910bceab8 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp @@ -117,65 +117,6 @@ static bool runOnFunction(Function &F, bool PostInlining) { return Changed; } -namespace { -struct EntryExitInstrumenter : public FunctionPass { - static char ID; - EntryExitInstrumenter() : FunctionPass(ID) { - initializeEntryExitInstrumenterPass(*PassRegistry::getPassRegistry()); - } - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addPreserved<GlobalsAAWrapperPass>(); - AU.addPreserved<DominatorTreeWrapperPass>(); - } - bool runOnFunction(Function &F) override { return ::runOnFunction(F, false); } -}; -char EntryExitInstrumenter::ID = 0; - -struct PostInlineEntryExitInstrumenter : public FunctionPass { - static char ID; - PostInlineEntryExitInstrumenter() : FunctionPass(ID) { - initializePostInlineEntryExitInstrumenterPass( - *PassRegistry::getPassRegistry()); - } - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addPreserved<GlobalsAAWrapperPass>(); - AU.addPreserved<DominatorTreeWrapperPass>(); - } - bool runOnFunction(Function &F) override { return ::runOnFunction(F, true); } -}; -char PostInlineEntryExitInstrumenter::ID = 0; -} - -INITIALIZE_PASS_BEGIN( - EntryExitInstrumenter, "ee-instrument", - "Instrument function entry/exit with calls to e.g. mcount() (pre inlining)", - false, false) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_END( - EntryExitInstrumenter, "ee-instrument", - "Instrument function entry/exit with calls to e.g. mcount() (pre inlining)", - false, false) - -INITIALIZE_PASS_BEGIN( - PostInlineEntryExitInstrumenter, "post-inline-ee-instrument", - "Instrument function entry/exit with calls to e.g. mcount() " - "(post inlining)", - false, false) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_END( - PostInlineEntryExitInstrumenter, "post-inline-ee-instrument", - "Instrument function entry/exit with calls to e.g. mcount() " - "(post inlining)", - false, false) - -FunctionPass *llvm::createEntryExitInstrumenterPass() { - return new EntryExitInstrumenter(); -} - -FunctionPass *llvm::createPostInlineEntryExitInstrumenterPass() { - return new PostInlineEntryExitInstrumenter(); -} - PreservedAnalyses llvm::EntryExitInstrumenterPass::run(Function &F, FunctionAnalysisManager &AM) { runOnFunction(F, PostInlining); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/Evaluator.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/Evaluator.cpp index 7b8d8553bac2..7509fde6df9d 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/Evaluator.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/Evaluator.cpp @@ -301,9 +301,9 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, BasicBlock *&NextBB, LLVM_DEBUG(dbgs() << "Evaluating Instruction: " << *CurInst << "\n"); if (StoreInst *SI = dyn_cast<StoreInst>(CurInst)) { - if (!SI->isSimple()) { - LLVM_DEBUG(dbgs() << "Store is not simple! Can not evaluate.\n"); - return false; // no volatile/atomic accesses. + if (SI->isVolatile()) { + LLVM_DEBUG(dbgs() << "Store is volatile! Can not evaluate.\n"); + return false; // no volatile accesses. } Constant *Ptr = getVal(SI->getOperand(1)); Constant *FoldedPtr = ConstantFoldConstant(Ptr, DL, TLI); @@ -337,10 +337,10 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, BasicBlock *&NextBB, if (!Res.first->second.write(Val, Offset, DL)) return false; } else if (LoadInst *LI = dyn_cast<LoadInst>(CurInst)) { - if (!LI->isSimple()) { + if (LI->isVolatile()) { LLVM_DEBUG( - dbgs() << "Found a Load! Not a simple load, can not evaluate.\n"); - return false; // no volatile/atomic accesses. + dbgs() << "Found a Load! Volatile load, can not evaluate.\n"); + return false; // no volatile accesses. } Constant *Ptr = getVal(LI->getOperand(0)); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp index 193806d9cc87..8e6d4626c9fd 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp @@ -35,6 +35,13 @@ bool FunctionImportGlobalProcessing::doImportAsDefinition( bool FunctionImportGlobalProcessing::shouldPromoteLocalToGlobal( const GlobalValue *SGV, ValueInfo VI) { assert(SGV->hasLocalLinkage()); + + // Ifuncs and ifunc alias does not have summary. + if (isa<GlobalIFunc>(SGV) || + (isa<GlobalAlias>(SGV) && + isa<GlobalIFunc>(cast<GlobalAlias>(SGV)->getAliaseeObject()))) + return false; + // Both the imported references and the original local variable must // be promoted. if (!isPerformingImport() && !isModuleExporting()) diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/InlineFunction.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/InlineFunction.cpp index 2fb00f95b749..00387ec426bf 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -2194,9 +2194,11 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, CI->setTailCallKind(ChildTCK); InlinedMustTailCalls |= CI->isMustTailCall(); - // Calls inlined through a 'nounwind' call site should be marked - // 'nounwind'. - if (MarkNoUnwind) + // Call sites inlined through a 'nounwind' call site should be + // 'nounwind' as well. However, avoid marking call sites explicitly + // where possible. This helps expose more opportunities for CSE after + // inlining, commonly when the callee is an intrinsic. + if (MarkNoUnwind && !CI->doesNotThrow()) CI->setDoesNotThrow(); } } @@ -2625,7 +2627,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, } else if (!CB.use_empty()) { // No returns, but something is using the return value of the call. Just // nuke the result. - CB.replaceAllUsesWith(UndefValue::get(CB.getType())); + CB.replaceAllUsesWith(PoisonValue::get(CB.getType())); } // Since we are now done with the Call/Invoke, we can delete it. diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp index b203259db1c6..2f1d0c2f9012 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp @@ -439,6 +439,10 @@ bool llvm::wouldInstructionBeTriviallyDead(Instruction *I, return true; } + if (auto *CB = dyn_cast<CallBase>(I)) + if (isRemovableAlloc(CB, TLI)) + return true; + if (!I->willReturn()) return false; @@ -489,16 +493,13 @@ bool llvm::wouldInstructionBeTriviallyDead(Instruction *I, } } - if (isAllocationFn(I, TLI) && isAllocRemovable(cast<CallBase>(I), TLI)) - return true; - - if (CallInst *CI = isFreeCall(I, TLI)) - if (Constant *C = dyn_cast<Constant>(CI->getArgOperand(0))) - return C->isNullValue() || isa<UndefValue>(C); - - if (auto *Call = dyn_cast<CallBase>(I)) + if (auto *Call = dyn_cast<CallBase>(I)) { + if (Value *FreedOp = getFreedOperand(Call, TLI)) + if (Constant *C = dyn_cast<Constant>(FreedOp)) + return C->isNullValue() || isa<UndefValue>(C); if (isMathLibCallNoop(Call, TLI)) return true; + } // Non-volatile atomic loads from constants can be removed. if (auto *LI = dyn_cast<LoadInst>(I)) @@ -637,7 +638,7 @@ bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN, // won't prove fruitful. if (!Visited.insert(I).second) { // Break the cycle and delete the instruction and its operands. - I->replaceAllUsesWith(UndefValue::get(I->getType())); + I->replaceAllUsesWith(PoisonValue::get(I->getType())); (void)RecursivelyDeleteTriviallyDeadInstructions(I, TLI, MSSAU); return true; } @@ -750,8 +751,8 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, // If BB has single-entry PHI nodes, fold them. while (PHINode *PN = dyn_cast<PHINode>(DestBB->begin())) { Value *NewVal = PN->getIncomingValue(0); - // Replace self referencing PHI with undef, it must be dead. - if (NewVal == PN) NewVal = UndefValue::get(PN->getType()); + // Replace self referencing PHI with poison, it must be dead. + if (NewVal == PN) NewVal = PoisonValue::get(PN->getType()); PN->replaceAllUsesWith(NewVal); PN->eraseFromParent(); } @@ -2105,7 +2106,7 @@ llvm::removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB) { // Delete the next to last instruction. Instruction *Inst = &*--EndInst->getIterator(); if (!Inst->use_empty() && !Inst->getType()->isTokenTy()) - Inst->replaceAllUsesWith(UndefValue::get(Inst->getType())); + Inst->replaceAllUsesWith(PoisonValue::get(Inst->getType())); if (Inst->isEHPad() || Inst->getType()->isTokenTy()) { EndInst = Inst; continue; @@ -2144,7 +2145,7 @@ unsigned llvm::changeToUnreachable(Instruction *I, bool PreserveLCSSA, BasicBlock::iterator BBI = I->getIterator(), BBE = BB->end(); while (BBI != BBE) { if (!BBI->use_empty()) - BBI->replaceAllUsesWith(UndefValue::get(BBI->getType())); + BBI->replaceAllUsesWith(PoisonValue::get(BBI->getType())); BB->getInstList().erase(BBI++); ++NumInstrsRemoved; } diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp index 0f33559c7e70..597c88ad13df 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp @@ -622,7 +622,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { // We only need to split loop exit edges. Loop *PredLoop = LI->getLoopFor(ExitPred); if (!PredLoop || PredLoop->contains(Exit) || - ExitPred->getTerminator()->isIndirectTerminator()) + isa<IndirectBrInst>(ExitPred->getTerminator())) continue; SplitLatchEdge |= L->getLoopLatch() == ExitPred; BasicBlock *ExitSplit = SplitCriticalEdge( diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopSimplify.cpp index 55d5c733733b..2ff8a3f7b228 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopSimplify.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopSimplify.cpp @@ -127,7 +127,7 @@ BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, DominatorTree *DT, // If the loop is branched to from an indirect terminator, we won't // be able to fully transform the loop, because it prohibits // edge splitting. - if (P->getTerminator()->isIndirectTerminator()) + if (isa<IndirectBrInst>(P->getTerminator())) return nullptr; // Keep track of it. @@ -256,7 +256,7 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader, if (PN->getIncomingValue(i) != PN || !L->contains(PN->getIncomingBlock(i))) { // We can't split indirect control flow edges. - if (PN->getIncomingBlock(i)->getTerminator()->isIndirectTerminator()) + if (isa<IndirectBrInst>(PN->getIncomingBlock(i)->getTerminator())) return nullptr; OuterLoopPreds.push_back(PN->getIncomingBlock(i)); } @@ -375,7 +375,7 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader, std::vector<BasicBlock*> BackedgeBlocks; for (BasicBlock *P : predecessors(Header)) { // Indirect edges cannot be split, so we must fail if we find one. - if (P->getTerminator()->isIndirectTerminator()) + if (isa<IndirectBrInst>(P->getTerminator())) return nullptr; if (P != Preheader) BackedgeBlocks.push_back(P); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUtils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUtils.cpp index 82f993b4ceab..349063dd5e89 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -602,10 +602,10 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE, // loop will be already eliminated and we have less work to do but according // to API doc of User::dropAllReferences only valid operation after dropping // references, is deletion. So let's substitute all usages of - // instruction from the loop with undef value of corresponding type first. + // instruction from the loop with poison value of corresponding type first. for (auto *Block : L->blocks()) for (Instruction &I : *Block) { - auto *Undef = UndefValue::get(I.getType()); + auto *Poison = PoisonValue::get(I.getType()); for (Use &U : llvm::make_early_inc_range(I.uses())) { if (auto *Usr = dyn_cast<Instruction>(U.getUser())) if (L->contains(Usr->getParent())) @@ -615,7 +615,7 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE, if (DT) assert(!DT->isReachableFromEntry(U) && "Unexpected user in reachable block"); - U.set(Undef); + U.set(Poison); } auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I); if (!DVI) @@ -1357,7 +1357,7 @@ int llvm::rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI, const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop()); if (isa<SCEVCouldNotCompute>(ExitValue) || !SE->isLoopInvariant(ExitValue, L) || - !isSafeToExpand(ExitValue, *SE)) { + !Rewriter.isSafeToExpand(ExitValue)) { // TODO: This should probably be sunk into SCEV in some way; maybe a // getSCEVForExit(SCEV*, L, ExitingBB)? It can be generalized for // most SCEV expressions and other recurrence types (e.g. shift @@ -1370,7 +1370,7 @@ int llvm::rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI, ExitValue = AddRec->evaluateAtIteration(ExitCount, *SE); if (isa<SCEVCouldNotCompute>(ExitValue) || !SE->isLoopInvariant(ExitValue, L) || - !isSafeToExpand(ExitValue, *SE)) + !Rewriter.isSafeToExpand(ExitValue)) continue; } diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LowerAtomic.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LowerAtomic.cpp index 9914a5ca6c5e..2247b8107739 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LowerAtomic.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LowerAtomic.cpp @@ -31,7 +31,7 @@ bool llvm::lowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI) { Value *Res = Builder.CreateSelect(Equal, Val, Orig); Builder.CreateStore(Res, Ptr); - Res = Builder.CreateInsertValue(UndefValue::get(CXI->getType()), Orig, 0); + Res = Builder.CreateInsertValue(PoisonValue::get(CXI->getType()), Orig, 0); Res = Builder.CreateInsertValue(Res, Equal, 1); CXI->replaceAllUsesWith(Res); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp index deaee467531d..d4ab4504064f 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp @@ -81,26 +81,6 @@ bool llvm::nameUnamedGlobals(Module &M) { return Changed; } -namespace { - -// Legacy pass that provides a name to every anon globals. -class NameAnonGlobalLegacyPass : public ModulePass { - -public: - /// Pass identification, replacement for typeid - static char ID; - - /// Specify pass name for debug output - StringRef getPassName() const override { return "Name Anon Globals"; } - - explicit NameAnonGlobalLegacyPass() : ModulePass(ID) {} - - bool runOnModule(Module &M) override { return nameUnamedGlobals(M); } -}; -char NameAnonGlobalLegacyPass::ID = 0; - -} // anonymous namespace - PreservedAnalyses NameAnonGlobalPass::run(Module &M, ModuleAnalysisManager &AM) { if (!nameUnamedGlobals(M)) @@ -108,14 +88,3 @@ PreservedAnalyses NameAnonGlobalPass::run(Module &M, return PreservedAnalyses::none(); } - -INITIALIZE_PASS_BEGIN(NameAnonGlobalLegacyPass, "name-anon-globals", - "Provide a name to nameless globals", false, false) -INITIALIZE_PASS_END(NameAnonGlobalLegacyPass, "name-anon-globals", - "Provide a name to nameless globals", false, false) - -namespace llvm { -ModulePass *createNameAnonGlobalPass() { - return new NameAnonGlobalLegacyPass(); -} -} diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp index 0c8bf3827256..372cd74ea01d 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp @@ -2568,9 +2568,7 @@ namespace { // only needed when the expression includes some subexpression that is not IV // derived. // -// Currently, we only allow division by a nonzero constant here. If this is -// inadequate, we could easily allow division by SCEVUnknown by using -// ValueTracking to check isKnownNonZero(). +// Currently, we only allow division by a value provably non-zero here. // // We cannot generally expand recurrences unless the step dominates the loop // header. The expander handles the special case of affine recurrences by @@ -2588,8 +2586,7 @@ struct SCEVFindUnsafe { bool follow(const SCEV *S) { if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) { - const SCEVConstant *SC = dyn_cast<SCEVConstant>(D->getRHS()); - if (!SC || SC->getValue()->isZero()) { + if (!SE.isKnownNonZero(D->getRHS())) { IsUnsafe = true; return false; } @@ -2613,18 +2610,17 @@ struct SCEVFindUnsafe { } bool isDone() const { return IsUnsafe; } }; -} +} // namespace -namespace llvm { -bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE, bool CanonicalMode) { +bool SCEVExpander::isSafeToExpand(const SCEV *S) const { SCEVFindUnsafe Search(SE, CanonicalMode); visitAll(S, Search); return !Search.IsUnsafe; } -bool isSafeToExpandAt(const SCEV *S, const Instruction *InsertionPoint, - ScalarEvolution &SE) { - if (!isSafeToExpand(S, SE)) +bool SCEVExpander::isSafeToExpandAt(const SCEV *S, + const Instruction *InsertionPoint) const { + if (!isSafeToExpand(S)) return false; // We have to prove that the expanded site of S dominates InsertionPoint. // This is easy when not in the same block, but hard when S is an instruction @@ -2674,4 +2670,3 @@ void SCEVExpanderCleaner::cleanup() { I->eraseFromParent(); } } -} diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 4b5ade99767b..1806081678a8 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -4851,7 +4851,7 @@ static bool removeEmptyCleanup(CleanupReturnInst *RI, DomTreeUpdater *DTU) { PN.moveBefore(InsertPt); // Also, add a dummy incoming value for the original BB itself, // so that the PHI is well-formed until we drop said predecessor. - PN.addIncoming(UndefValue::get(PN.getType()), BB); + PN.addIncoming(PoisonValue::get(PN.getType()), BB); } } diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp index af15e0c31b75..0ab79a32f526 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -593,7 +593,7 @@ bool SimplifyIndvar::eliminateTrunc(TruncInst *TI) { } // Trunc no longer needed. - TI->replaceAllUsesWith(UndefValue::get(TI->getType())); + TI->replaceAllUsesWith(PoisonValue::get(TI->getType())); DeadInsts.emplace_back(TI); return true; } @@ -660,7 +660,7 @@ bool SimplifyIndvar::replaceIVUserWithLoopInvariant(Instruction *I) { auto *IP = GetLoopInvariantInsertPosition(L, I); - if (!isSafeToExpandAt(S, IP, *SE)) { + if (!Rewriter.isSafeToExpandAt(S, IP)) { LLVM_DEBUG(dbgs() << "INDVARS: Can not replace IV user: " << *I << " with non-speculable loop invariant: " << *S << '\n'); return false; @@ -679,20 +679,30 @@ bool SimplifyIndvar::replaceIVUserWithLoopInvariant(Instruction *I) { /// Eliminate redundant type cast between integer and float. bool SimplifyIndvar::replaceFloatIVWithIntegerIV(Instruction *UseInst) { - if (UseInst->getOpcode() != CastInst::SIToFP) + if (UseInst->getOpcode() != CastInst::SIToFP && + UseInst->getOpcode() != CastInst::UIToFP) return false; Value *IVOperand = UseInst->getOperand(0); // Get the symbolic expression for this instruction. - ConstantRange IVRange = SE->getSignedRange(SE->getSCEV(IVOperand)); + const SCEV *IV = SE->getSCEV(IVOperand); + unsigned MaskBits; + if (UseInst->getOpcode() == CastInst::SIToFP) + MaskBits = SE->getSignedRange(IV).getMinSignedBits(); + else + MaskBits = SE->getUnsignedRange(IV).getActiveBits(); unsigned DestNumSigBits = UseInst->getType()->getFPMantissaWidth(); - if (IVRange.getActiveBits() <= DestNumSigBits) { + if (MaskBits <= DestNumSigBits) { for (User *U : UseInst->users()) { - // Match for fptosi of sitofp and with same type. - auto *CI = dyn_cast<FPToSIInst>(U); + // Match for fptosi/fptoui of sitofp and with same type. + auto *CI = dyn_cast<CastInst>(U); if (!CI || IVOperand->getType() != CI->getType()) continue; + CastInst::CastOps Opcode = CI->getOpcode(); + if (Opcode != CastInst::FPToSI && Opcode != CastInst::FPToUI) + continue; + CI->replaceAllUsesWith(IVOperand); DeadInsts.push_back(CI); LLVM_DEBUG(dbgs() << "INDVARS: Replace IV user: " << *CI @@ -1015,7 +1025,7 @@ class WidenIV { SmallPtrSet<Instruction *,16> Widened; - enum ExtendKind { ZeroExtended, SignExtended, Unknown }; + enum class ExtendKind { Zero, Sign, Unknown }; // A map tracking the kind of extension used to widen each narrow IV // and narrow IV user. @@ -1172,7 +1182,7 @@ WidenIV::WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv, HasGuards(HasGuards), UsePostIncrementRanges(UsePostIncrementRanges), DeadInsts(DI) { assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV"); - ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended; + ExtendKindMap[OrigPhi] = WI.IsSigned ? ExtendKind::Sign : ExtendKind::Zero; } Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType, @@ -1225,7 +1235,7 @@ Instruction *WidenIV::cloneBitwiseIVUser(WidenIV::NarrowIVDefUse DU) { // about the narrow operand yet so must insert a [sz]ext. It is probably loop // invariant and will be folded or hoisted. If it actually comes from a // widened IV, it should be removed during a future call to widenIVUse. - bool IsSigned = getExtendKind(NarrowDef) == SignExtended; + bool IsSigned = getExtendKind(NarrowDef) == ExtendKind::Sign; Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) ? WideDef : createExtendInst(NarrowUse->getOperand(0), WideType, @@ -1290,7 +1300,7 @@ Instruction *WidenIV::cloneArithmeticIVUser(WidenIV::NarrowIVDefUse DU, return WideUse == WideAR; }; - bool SignExtend = getExtendKind(NarrowDef) == SignExtended; + bool SignExtend = getExtendKind(NarrowDef) == ExtendKind::Sign; if (!GuessNonIVOperand(SignExtend)) { SignExtend = !SignExtend; if (!GuessNonIVOperand(SignExtend)) @@ -1350,7 +1360,7 @@ WidenIV::getExtendedOperandRecurrence(WidenIV::NarrowIVDefUse DU) { // Only Add/Sub/Mul instructions supported yet. if (OpCode != Instruction::Add && OpCode != Instruction::Sub && OpCode != Instruction::Mul) - return {nullptr, Unknown}; + return {nullptr, ExtendKind::Unknown}; // One operand (NarrowDef) has already been extended to WideDef. Now determine // if extending the other will lead to a recurrence. @@ -1362,14 +1372,14 @@ WidenIV::getExtendedOperandRecurrence(WidenIV::NarrowIVDefUse DU) { const OverflowingBinaryOperator *OBO = cast<OverflowingBinaryOperator>(DU.NarrowUse); ExtendKind ExtKind = getExtendKind(DU.NarrowDef); - if (ExtKind == SignExtended && OBO->hasNoSignedWrap()) + if (ExtKind == ExtendKind::Sign && OBO->hasNoSignedWrap()) ExtendOperExpr = SE->getSignExtendExpr( SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType); - else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap()) + else if (ExtKind == ExtendKind::Zero && OBO->hasNoUnsignedWrap()) ExtendOperExpr = SE->getZeroExtendExpr( SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType); else - return {nullptr, Unknown}; + return {nullptr, ExtendKind::Unknown}; // When creating this SCEV expr, don't apply the current operations NSW or NUW // flags. This instruction may be guarded by control flow that the no-wrap @@ -1387,7 +1397,7 @@ WidenIV::getExtendedOperandRecurrence(WidenIV::NarrowIVDefUse DU) { dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode)); if (!AddRec || AddRec->getLoop() != L) - return {nullptr, Unknown}; + return {nullptr, ExtendKind::Unknown}; return {AddRec, ExtKind}; } @@ -1396,17 +1406,17 @@ WidenIV::getExtendedOperandRecurrence(WidenIV::NarrowIVDefUse DU) { /// widening it's type? In other words, can the extend be safely hoisted out of /// the loop with SCEV reducing the value to a recurrence on the same loop. If /// so, return the extended recurrence and the kind of extension used. Otherwise -/// return {nullptr, Unknown}. +/// return {nullptr, ExtendKind::Unknown}. WidenIV::WidenedRecTy WidenIV::getWideRecurrence(WidenIV::NarrowIVDefUse DU) { if (!DU.NarrowUse->getType()->isIntegerTy()) - return {nullptr, Unknown}; + return {nullptr, ExtendKind::Unknown}; const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse); if (SE->getTypeSizeInBits(NarrowExpr->getType()) >= SE->getTypeSizeInBits(WideType)) { // NarrowUse implicitly widens its operand. e.g. a gep with a narrow // index. So don't follow this use. - return {nullptr, Unknown}; + return {nullptr, ExtendKind::Unknown}; } const SCEV *WideExpr; @@ -1414,21 +1424,21 @@ WidenIV::WidenedRecTy WidenIV::getWideRecurrence(WidenIV::NarrowIVDefUse DU) { if (DU.NeverNegative) { WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType); if (isa<SCEVAddRecExpr>(WideExpr)) - ExtKind = SignExtended; + ExtKind = ExtendKind::Sign; else { WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType); - ExtKind = ZeroExtended; + ExtKind = ExtendKind::Zero; } - } else if (getExtendKind(DU.NarrowDef) == SignExtended) { + } else if (getExtendKind(DU.NarrowDef) == ExtendKind::Sign) { WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType); - ExtKind = SignExtended; + ExtKind = ExtendKind::Sign; } else { WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType); - ExtKind = ZeroExtended; + ExtKind = ExtendKind::Zero; } const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr); if (!AddRec || AddRec->getLoop() != L) - return {nullptr, Unknown}; + return {nullptr, ExtendKind::Unknown}; return {AddRec, ExtKind}; } @@ -1468,7 +1478,7 @@ bool WidenIV::widenLoopCompare(WidenIV::NarrowIVDefUse DU) { // // (A) == icmp slt i32 sext(%narrow), sext(%val) // == icmp slt i32 zext(%narrow), sext(%val) - bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended; + bool IsSigned = getExtendKind(DU.NarrowDef) == ExtendKind::Sign; if (!(DU.NeverNegative || IsSigned == Cmp->isSigned())) return false; @@ -1533,8 +1543,8 @@ bool WidenIV::widenWithVariantUse(WidenIV::NarrowIVDefUse DU) { const OverflowingBinaryOperator *OBO = cast<OverflowingBinaryOperator>(NarrowUse); ExtendKind ExtKind = getExtendKind(NarrowDef); - bool CanSignExtend = ExtKind == SignExtended && OBO->hasNoSignedWrap(); - bool CanZeroExtend = ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap(); + bool CanSignExtend = ExtKind == ExtendKind::Sign && OBO->hasNoSignedWrap(); + bool CanZeroExtend = ExtKind == ExtendKind::Zero && OBO->hasNoUnsignedWrap(); auto AnotherOpExtKind = ExtKind; // Check that all uses are either: @@ -1564,14 +1574,14 @@ bool WidenIV::widenWithVariantUse(WidenIV::NarrowIVDefUse DU) { // predicates. For equality, it's legal to widen icmp for either sign and // zero extend. For sign extend, we can also do so for signed predicates, // likeweise for zero extend we can widen icmp for unsigned predicates. - if (ExtKind == ZeroExtended && ICmpInst::isSigned(Pred)) + if (ExtKind == ExtendKind::Zero && ICmpInst::isSigned(Pred)) return false; - if (ExtKind == SignExtended && ICmpInst::isUnsigned(Pred)) + if (ExtKind == ExtendKind::Sign && ICmpInst::isUnsigned(Pred)) return false; ICmpUsers.push_back(ICmp); continue; } - if (ExtKind == SignExtended) + if (ExtKind == ExtendKind::Sign) User = dyn_cast<SExtInst>(User); else User = dyn_cast<ZExtInst>(User); @@ -1594,7 +1604,7 @@ bool WidenIV::widenWithVariantUse(WidenIV::NarrowIVDefUse DU) { // will most likely not see it. Let's try to prove it. if (OpCode != Instruction::Add) return false; - if (ExtKind != ZeroExtended) + if (ExtKind != ExtendKind::Zero) return false; const SCEV *LHS = SE->getSCEV(OBO->getOperand(0)); const SCEV *RHS = SE->getSCEV(OBO->getOperand(1)); @@ -1609,7 +1619,7 @@ bool WidenIV::widenWithVariantUse(WidenIV::NarrowIVDefUse DU) { return false; // In fact, our 'add' is 'sub nuw'. We will need to widen the 2nd operand as // neg(zext(neg(op))), which is basically sext(op). - AnotherOpExtKind = SignExtended; + AnotherOpExtKind = ExtendKind::Sign; } // Verifying that Defining operand is an AddRec @@ -1621,14 +1631,16 @@ bool WidenIV::widenWithVariantUse(WidenIV::NarrowIVDefUse DU) { LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n"); // Generating a widening use instruction. - Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) - ? WideDef - : createExtendInst(NarrowUse->getOperand(0), WideType, - AnotherOpExtKind, NarrowUse); - Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) - ? WideDef - : createExtendInst(NarrowUse->getOperand(1), WideType, - AnotherOpExtKind, NarrowUse); + Value *LHS = + (NarrowUse->getOperand(0) == NarrowDef) + ? WideDef + : createExtendInst(NarrowUse->getOperand(0), WideType, + AnotherOpExtKind == ExtendKind::Sign, NarrowUse); + Value *RHS = + (NarrowUse->getOperand(1) == NarrowDef) + ? WideDef + : createExtendInst(NarrowUse->getOperand(1), WideType, + AnotherOpExtKind == ExtendKind::Sign, NarrowUse); auto *NarrowBO = cast<BinaryOperator>(NarrowUse); auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, @@ -1667,7 +1679,7 @@ bool WidenIV::widenWithVariantUse(WidenIV::NarrowIVDefUse DU) { auto ExtendedOp = [&](Value * V)->Value * { if (V == NarrowUse) return WideBO; - if (ExtKind == ZeroExtended) + if (ExtKind == ExtendKind::Zero) return Builder.CreateZExt(V, WideBO->getType()); else return Builder.CreateSExt(V, WideBO->getType()); @@ -1723,10 +1735,10 @@ Instruction *WidenIV::widenIVUse(WidenIV::NarrowIVDefUse DU, SCEVExpander &Rewri // This narrow use can be widened by a sext if it's non-negative or its narrow // def was widended by a sext. Same for zext. auto canWidenBySExt = [&]() { - return DU.NeverNegative || getExtendKind(DU.NarrowDef) == SignExtended; + return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ExtendKind::Sign; }; auto canWidenByZExt = [&]() { - return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ZeroExtended; + return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ExtendKind::Zero; }; // Our raison d'etre! Eliminate sign and zero extension. @@ -1774,7 +1786,8 @@ Instruction *WidenIV::widenIVUse(WidenIV::NarrowIVDefUse DU, SCEVExpander &Rewri if (!WideAddRec.first) WideAddRec = getWideRecurrence(DU); - assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown)); + assert((WideAddRec.first == nullptr) == + (WideAddRec.second == ExtendKind::Unknown)); if (!WideAddRec.first) { // If use is a loop condition, try to promote the condition instead of // truncating the IV first. @@ -1869,7 +1882,7 @@ PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) { return nullptr; // Widen the induction variable expression. - const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended + const SCEV *WideIVExpr = getExtendKind(OrigPhi) == ExtendKind::Sign ? SE->getSignExtendExpr(AddRec, WideType) : SE->getZeroExtendExpr(AddRec, WideType); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp index b359717424a6..bca3b0538c5d 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -1948,14 +1948,16 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilderBase &B) { if (Value *Sqrt = replacePowWithSqrt(Pow, B)) return Sqrt; + // If we can approximate pow: // pow(x, n) -> powi(x, n) * sqrt(x) if n has exactly a 0.5 fraction + // pow(x, n) -> powi(x, n) if n is a constant signed integer value const APFloat *ExpoF; - if (match(Expo, m_APFloat(ExpoF)) && !ExpoF->isExactlyValue(0.5) && - !ExpoF->isExactlyValue(-0.5)) { + if (AllowApprox && match(Expo, m_APFloat(ExpoF)) && + !ExpoF->isExactlyValue(0.5) && !ExpoF->isExactlyValue(-0.5)) { APFloat ExpoA(abs(*ExpoF)); APFloat ExpoI(*ExpoF); Value *Sqrt = nullptr; - if (AllowApprox && !ExpoA.isInteger()) { + if (!ExpoA.isInteger()) { APFloat Expo2 = ExpoA; // To check if ExpoA is an integer + 0.5, we add it to itself. If there // is no floating point exception and the result is an integer, then @@ -1979,7 +1981,8 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilderBase &B) { return nullptr; } - // pow(x, n) -> powi(x, n) if n is a constant signed integer value + // 0.5 fraction is now optionally handled. + // Do pow -> powi for remaining integer exponent APSInt IntExpo(TLI->getIntSize(), /*isUnsigned=*/false); if (ExpoF->isInteger() && ExpoF->convertToInteger(IntExpo, APFloat::rmTowardZero, &Ignored) == diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp index 832353741500..9bbfe06b9abb 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp @@ -145,8 +145,6 @@ static bool unifyLoopExits(DominatorTree &DT, LoopInfo &LI, Loop *L) { // locate the exit blocks. SetVector<BasicBlock *> ExitingBlocks; SetVector<BasicBlock *> Exits; - // Record the exit blocks that branch to the same block. - MapVector<BasicBlock *, SetVector<BasicBlock *> > CommonSuccs; // We need SetVectors, but the Loop API takes a vector, so we use a temporary. SmallVector<BasicBlock *, 8> Temp; @@ -160,11 +158,6 @@ static bool unifyLoopExits(DominatorTree &DT, LoopInfo &LI, Loop *L) { if (SL == L || L->contains(SL)) continue; Exits.insert(S); - // The typical case for reducing the number of guard blocks occurs when - // the exit block has a single predecessor and successor. - if (S->getSinglePredecessor()) - if (auto *Succ = S->getSingleSuccessor()) - CommonSuccs[Succ].insert(S); } } @@ -179,39 +172,13 @@ static bool unifyLoopExits(DominatorTree &DT, LoopInfo &LI, Loop *L) { for (auto EB : ExitingBlocks) { dbgs() << " " << EB->getName(); } - dbgs() << "\n"; - - dbgs() << "Exit blocks with a common successor:\n"; - for (auto CS : CommonSuccs) { - dbgs() << " Succ " << CS.first->getName() << ", exits:"; - for (auto Exit : CS.second) - dbgs() << " " << Exit->getName(); - dbgs() << "\n"; - }); + dbgs() << "\n";); if (Exits.size() <= 1) { LLVM_DEBUG(dbgs() << "loop does not have multiple exits; nothing to do\n"); return false; } - // When multiple exit blocks branch to the same block, change the control - // flow hub to after the exit blocks rather than before. This reduces the - // number of guard blocks needed after the loop. - for (auto CS : CommonSuccs) { - auto CB = CS.first; - auto Preds = CS.second; - if (Exits.contains(CB)) - continue; - if (Preds.size() < 2 || Preds.size() == Exits.size()) - continue; - for (auto Exit : Preds) { - Exits.remove(Exit); - ExitingBlocks.remove(Exit->getSinglePredecessor()); - ExitingBlocks.insert(Exit); - } - Exits.insert(CB); - } - SmallVector<BasicBlock *, 8> GuardBlocks; DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); auto LoopExitBlock = CreateControlFlowHub(&DTU, GuardBlocks, ExitingBlocks, @@ -231,17 +198,6 @@ static bool unifyLoopExits(DominatorTree &DT, LoopInfo &LI, Loop *L) { if (auto ParentLoop = L->getParentLoop()) { for (auto G : GuardBlocks) { ParentLoop->addBasicBlockToLoop(G, LI); - // Ensure the guard block predecessors are in a valid loop. After the - // change to the control flow hub for common successors, a guard block - // predecessor may not be in a loop or may be in an outer loop. - for (auto Pred : predecessors(G)) { - auto PredLoop = LI.getLoopFor(Pred); - if (!ParentLoop->contains(PredLoop)) { - if (PredLoop) - LI.removeBlock(Pred); - ParentLoop->addBasicBlockToLoop(Pred, LI); - } - } } ParentLoop->verifyLoop(); } diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/Utils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/Utils.cpp index f34f2df971b1..d002922cfd30 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/Utils.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/Utils.cpp @@ -28,7 +28,6 @@ void llvm::initializeTransformUtils(PassRegistry &Registry) { initializeAssumeSimplifyPassLegacyPassPass(Registry); initializeAssumeBuilderPassLegacyPassPass(Registry); initializeBreakCriticalEdgesPass(Registry); - initializeCanonicalizeAliasesLegacyPassPass(Registry); initializeCanonicalizeFreezeInLoopsPass(Registry); initializeInstNamerPass(Registry); initializeLCSSAWrapperPassPass(Registry); @@ -37,7 +36,6 @@ void llvm::initializeTransformUtils(PassRegistry &Registry) { initializeLowerGlobalDtorsLegacyPassPass(Registry); initializeLowerInvokeLegacyPassPass(Registry); initializeLowerSwitchLegacyPassPass(Registry); - initializeNameAnonGlobalLegacyPassPass(Registry); initializePromoteLegacyPassPass(Registry); initializeStripNonLineTableDebugLegacyPassPass(Registry); initializeUnifyFunctionExitNodesLegacyPassPass(Registry); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 0777a1385916..b887ea41676b 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -92,6 +92,7 @@ #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" @@ -473,7 +474,7 @@ public: virtual std::pair<BasicBlock *, Value *> createVectorizedLoopSkeleton(); /// Widen a single call instruction within the innermost loop. - void widenCallInstruction(CallInst &I, VPValue *Def, VPUser &ArgOperands, + void widenCallInstruction(CallInst &CI, VPValue *Def, VPUser &ArgOperands, VPTransformState &State); /// Fix the vectorized code, taking care of header phi's, live-outs, and more. @@ -1447,15 +1448,14 @@ public: // through scalar predication or masked load/store or masked gather/scatter. // \p VF is the vectorization factor that will be used to vectorize \p I. // Superset of instructions that return true for isScalarWithPredication. - bool isPredicatedInst(Instruction *I, ElementCount VF, - bool IsKnownUniform = false) { - // When we know the load is uniform and the original scalar loop was not - // predicated we don't need to mark it as a predicated instruction. Any - // vectorised blocks created when tail-folding are something artificial we - // have introduced and we know there is always at least one active lane. - // That's why we call Legal->blockNeedsPredication here because it doesn't - // query tail-folding. - if (IsKnownUniform && isa<LoadInst>(I) && + bool isPredicatedInst(Instruction *I, ElementCount VF) { + // When we know the load's address is loop invariant and the instruction + // in the original scalar loop was unconditionally executed then we + // don't need to mark it as a predicated instruction. Tail folding may + // introduce additional predication, but we're guaranteed to always have + // at least one active lane. We call Legal->blockNeedsPredication here + // because it doesn't query tail-folding. + if (Legal->isUniformMemOp(*I) && isa<LoadInst>(I) && !Legal->blockNeedsPredication(I->getParent())) return false; if (!blockNeedsPredicationForAnyReason(I->getParent())) @@ -1657,10 +1657,6 @@ private: InstructionCost getScalarizationOverhead(Instruction *I, ElementCount VF) const; - /// Returns whether the instruction is a load or store and will be a emitted - /// as a vector operation. - bool isConsecutiveLoadOrStore(Instruction *I); - /// Returns true if an artificially high cost for emulated masked memrefs /// should be used. bool useEmulatedMaskMemRefHack(Instruction *I, ElementCount VF); @@ -1919,10 +1915,13 @@ public: auto DiffChecks = RtPtrChecking.getDiffChecks(); if (DiffChecks) { + Value *RuntimeVF = nullptr; MemRuntimeCheckCond = addDiffRuntimeChecks( MemCheckBlock->getTerminator(), L, *DiffChecks, MemCheckExp, - [VF](IRBuilderBase &B, unsigned Bits) { - return getRuntimeVF(B, B.getIntNTy(Bits), VF); + [VF, &RuntimeVF](IRBuilderBase &B, unsigned Bits) { + if (!RuntimeVF) + RuntimeVF = getRuntimeVF(B, B.getIntNTy(Bits), VF); + return RuntimeVF; }, IC); } else { @@ -2947,11 +2946,17 @@ void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) { // If tail is to be folded, vector loop takes care of all iterations. Type *CountTy = Count->getType(); Value *CheckMinIters = Builder.getFalse(); - auto CreateStep = [&]() { + auto CreateStep = [&]() -> Value * { // Create step with max(MinProTripCount, UF * VF). - if (UF * VF.getKnownMinValue() < MinProfitableTripCount.getKnownMinValue()) - return createStepForVF(Builder, CountTy, MinProfitableTripCount, 1); - return createStepForVF(Builder, CountTy, VF, UF); + if (UF * VF.getKnownMinValue() >= MinProfitableTripCount.getKnownMinValue()) + return createStepForVF(Builder, CountTy, VF, UF); + + Value *MinProfTC = + createStepForVF(Builder, CountTy, MinProfitableTripCount, 1); + if (!VF.isScalable()) + return MinProfTC; + return Builder.CreateBinaryIntrinsic( + Intrinsic::umax, MinProfTC, createStepForVF(Builder, CountTy, VF, UF)); }; if (!Cost->foldTailByMasking()) @@ -4168,46 +4173,26 @@ bool InnerLoopVectorizer::useOrderedReductions( return Cost->useOrderedReductions(RdxDesc); } -/// A helper function for checking whether an integer division-related -/// instruction may divide by zero (in which case it must be predicated if -/// executed conditionally in the scalar code). -/// TODO: It may be worthwhile to generalize and check isKnownNonZero(). -/// Non-zero divisors that are non compile-time constants will not be -/// converted into multiplication, so we will still end up scalarizing -/// the division, but can do so w/o predication. -static bool mayDivideByZero(Instruction &I) { - assert((I.getOpcode() == Instruction::UDiv || - I.getOpcode() == Instruction::SDiv || - I.getOpcode() == Instruction::URem || - I.getOpcode() == Instruction::SRem) && - "Unexpected instruction"); - Value *Divisor = I.getOperand(1); - auto *CInt = dyn_cast<ConstantInt>(Divisor); - return !CInt || CInt->isZero(); -} - -void InnerLoopVectorizer::widenCallInstruction(CallInst &I, VPValue *Def, +void InnerLoopVectorizer::widenCallInstruction(CallInst &CI, VPValue *Def, VPUser &ArgOperands, VPTransformState &State) { - assert(!isa<DbgInfoIntrinsic>(I) && + assert(!isa<DbgInfoIntrinsic>(CI) && "DbgInfoIntrinsic should have been dropped during VPlan construction"); - State.setDebugLocFromInst(&I); - - Module *M = I.getParent()->getParent()->getParent(); - auto *CI = cast<CallInst>(&I); + State.setDebugLocFromInst(&CI); SmallVector<Type *, 4> Tys; - for (Value *ArgOperand : CI->args()) + for (Value *ArgOperand : CI.args()) Tys.push_back(ToVectorTy(ArgOperand->getType(), VF.getKnownMinValue())); - Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI); + Intrinsic::ID ID = getVectorIntrinsicIDForCall(&CI, TLI); // The flag shows whether we use Intrinsic or a usual Call for vectorized // version of the instruction. // Is it beneficial to perform intrinsic call compared to lib call? bool NeedToScalarize = false; - InstructionCost CallCost = Cost->getVectorCallCost(CI, VF, NeedToScalarize); - InstructionCost IntrinsicCost = ID ? Cost->getVectorIntrinsicCost(CI, VF) : 0; + InstructionCost CallCost = Cost->getVectorCallCost(&CI, VF, NeedToScalarize); + InstructionCost IntrinsicCost = + ID ? Cost->getVectorIntrinsicCost(&CI, VF) : 0; bool UseVectorIntrinsic = ID && IntrinsicCost <= CallCost; assert((UseVectorIntrinsic || !NeedToScalarize) && "Instruction should be scalarized elsewhere."); @@ -4215,7 +4200,7 @@ void InnerLoopVectorizer::widenCallInstruction(CallInst &I, VPValue *Def, "Either the intrinsic cost or vector call cost must be valid"); for (unsigned Part = 0; Part < UF; ++Part) { - SmallVector<Type *, 2> TysForDecl = {CI->getType()}; + SmallVector<Type *, 2> TysForDecl = {CI.getType()}; SmallVector<Value *, 4> Args; for (auto &I : enumerate(ArgOperands.operands())) { // Some intrinsics have a scalar argument - don't replace it with a @@ -4235,27 +4220,28 @@ void InnerLoopVectorizer::widenCallInstruction(CallInst &I, VPValue *Def, if (UseVectorIntrinsic) { // Use vector version of the intrinsic. if (VF.isVector()) - TysForDecl[0] = VectorType::get(CI->getType()->getScalarType(), VF); + TysForDecl[0] = VectorType::get(CI.getType()->getScalarType(), VF); + Module *M = State.Builder.GetInsertBlock()->getModule(); VectorF = Intrinsic::getDeclaration(M, ID, TysForDecl); assert(VectorF && "Can't retrieve vector intrinsic."); } else { // Use vector version of the function call. - const VFShape Shape = VFShape::get(*CI, VF, false /*HasGlobalPred*/); + const VFShape Shape = VFShape::get(CI, VF, false /*HasGlobalPred*/); #ifndef NDEBUG - assert(VFDatabase(*CI).getVectorizedFunction(Shape) != nullptr && + assert(VFDatabase(CI).getVectorizedFunction(Shape) != nullptr && "Can't create vector function."); #endif - VectorF = VFDatabase(*CI).getVectorizedFunction(Shape); + VectorF = VFDatabase(CI).getVectorizedFunction(Shape); } SmallVector<OperandBundleDef, 1> OpBundles; - CI->getOperandBundlesAsDefs(OpBundles); + CI.getOperandBundlesAsDefs(OpBundles); CallInst *V = Builder.CreateCall(VectorF, Args, OpBundles); if (isa<FPMathOperator>(V)) - V->copyFastMathFlags(CI); + V->copyFastMathFlags(&CI); State.set(Def, V, Part); - State.addMetadata(V, &I); + State.addMetadata(V, &CI); } } @@ -4470,7 +4456,9 @@ bool LoopVectorizationCostModel::isScalarWithPredication( case Instruction::SDiv: case Instruction::SRem: case Instruction::URem: - return mayDivideByZero(*I); + // TODO: We can use the loop-preheader as context point here and get + // context sensitive reasoning + return !isSafeToSpeculativelyExecute(I); } return false; } @@ -5406,7 +5394,7 @@ VectorizationFactor LoopVectorizationCostModel::selectVectorizationFactor( } LLVM_DEBUG(if (ForceVectorization && !ChosenFactor.Width.isScalar() && - ChosenFactor.Cost >= ScalarCost.Cost) dbgs() + !isMoreProfitable(ChosenFactor, ScalarCost)) dbgs() << "LV: Vectorization seems to be not beneficial, " << "but was forced by a user.\n"); LLVM_DEBUG(dbgs() << "LV: Selecting VF: " << ChosenFactor.Width << ".\n"); @@ -6069,7 +6057,8 @@ bool LoopVectorizationCostModel::useEmulatedMaskMemRefHack(Instruction *I, // from moving "masked load/store" check from legality to cost model. // Masked Load/Gather emulation was previously never allowed. // Limited number of Masked Store/Scatter emulation was allowed. - assert(isPredicatedInst(I, VF) && "Expecting a scalar emulated instruction"); + assert((isPredicatedInst(I, VF) || Legal->isUniformMemOp(*I)) && + "Expecting a scalar emulated instruction"); return isa<LoadInst>(I) || (isa<StoreInst>(I) && NumPredStores > NumberOfStoresToPredicate); @@ -6779,19 +6768,29 @@ void LoopVectorizationCostModel::setCostBasedWideningDecision(ElementCount VF) { NumPredStores++; if (Legal->isUniformMemOp(I)) { - // TODO: Avoid replicating loads and stores instead of - // relying on instcombine to remove them. + // Lowering story for uniform memory ops is currently a bit complicated. + // Scalarization works for everything which isn't a store with scalable + // VF. Fixed len VFs just scalarize and then DCE later; scalarization + // knows how to handle uniform-per-part values (i.e. the first lane + // in each unrolled VF) and can thus handle scalable loads too. For + // scalable stores, we use a scatter if legal. If not, we have no way + // to lower (currently) and thus have to abort vectorization. + if (isa<StoreInst>(&I) && VF.isScalable()) { + if (isLegalGatherOrScatter(&I, VF)) + setWideningDecision(&I, VF, CM_GatherScatter, + getGatherScatterCost(&I, VF)); + else + // Error case, abort vectorization + setWideningDecision(&I, VF, CM_Scalarize, + InstructionCost::getInvalid()); + continue; + } // Load: Scalar load + broadcast // Store: Scalar store + isLoopInvariantStoreValue ? 0 : extract - InstructionCost Cost; - if (isa<StoreInst>(&I) && VF.isScalable() && - isLegalGatherOrScatter(&I, VF)) { - Cost = getGatherScatterCost(&I, VF); - setWideningDecision(&I, VF, CM_GatherScatter, Cost); - } else { - Cost = getUniformMemOpCost(&I, VF); - setWideningDecision(&I, VF, CM_Scalarize, Cost); - } + // TODO: Avoid replicating loads and stores instead of relying on + // instcombine to remove them. + setWideningDecision(&I, VF, CM_Scalarize, + getUniformMemOpCost(&I, VF)); continue; } @@ -7146,13 +7145,10 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF, InstWidening Decision = getWideningDecision(I, Width); assert(Decision != CM_Unknown && "CM decision should be taken at this point"); - if (Decision == CM_Scalarize) { - if (VF.isScalable() && isa<StoreInst>(I)) - // We can't scalarize a scalable vector store (even a uniform one - // currently), return an invalid cost so as to prevent vectorization. - return InstructionCost::getInvalid(); + if (getWideningCost(I, VF) == InstructionCost::getInvalid()) + return InstructionCost::getInvalid(); + if (Decision == CM_Scalarize) Width = ElementCount::getFixed(1); - } } VectorTy = ToVectorTy(getLoadStoreType(I), Width); return getMemoryInstructionCost(I, VF); @@ -7308,14 +7304,6 @@ Pass *createLoopVectorizePass(bool InterleaveOnlyWhenForced, } // end namespace llvm -bool LoopVectorizationCostModel::isConsecutiveLoadOrStore(Instruction *Inst) { - // Check if the pointer operand of a load or store instruction is - // consecutive. - if (auto *Ptr = getLoadStorePointerOperand(Inst)) - return Legal->isConsecutivePtr(getLoadStoreType(Inst), Ptr); - return false; -} - void LoopVectorizationCostModel::collectValuesToIgnore() { // Ignore ephemeral values. CodeMetrics::collectEphemeralValues(TheLoop, AC, ValuesToIgnore); @@ -8370,7 +8358,7 @@ VPBasicBlock *VPRecipeBuilder::handleReplication( Range); bool IsPredicated = LoopVectorizationPlanner::getDecisionAndClampRange( - [&](ElementCount VF) { return CM.isPredicatedInst(I, VF, IsUniform); }, + [&](ElementCount VF) { return CM.isPredicatedInst(I, VF); }, Range); // Even if the instruction is not marked as uniform, there are certain @@ -8406,8 +8394,6 @@ VPBasicBlock *VPRecipeBuilder::handleReplication( auto *Recipe = new VPReplicateRecipe(I, Plan->mapToVPValues(I->operands()), IsUniform, IsPredicated); - setRecipe(I, Recipe); - Plan->addVPValue(I, Recipe); // Find if I uses a predicated instruction. If so, it will use its scalar // value. Avoid hoisting the insert-element which packs the scalar value into @@ -8426,6 +8412,8 @@ VPBasicBlock *VPRecipeBuilder::handleReplication( // Finalize the recipe for Instr, first if it is not predicated. if (!IsPredicated) { LLVM_DEBUG(dbgs() << "LV: Scalarizing:" << *I << "\n"); + setRecipe(I, Recipe); + Plan->addVPValue(I, Recipe); VPBB->appendRecipe(Recipe); return VPBB; } @@ -8436,7 +8424,7 @@ VPBasicBlock *VPRecipeBuilder::handleReplication( "predicated replication."); VPBlockUtils::disconnectBlocks(VPBB, SingleSucc); // Record predicated instructions for above packing optimizations. - VPBlockBase *Region = createReplicateRegion(I, Recipe, Plan); + VPBlockBase *Region = createReplicateRegion(Recipe, Plan); VPBlockUtils::insertBlockAfter(Region, VPBB); auto *RegSucc = new VPBasicBlock(); VPBlockUtils::insertBlockAfter(RegSucc, Region); @@ -8444,11 +8432,12 @@ VPBasicBlock *VPRecipeBuilder::handleReplication( return RegSucc; } -VPRegionBlock *VPRecipeBuilder::createReplicateRegion( - Instruction *Instr, VPReplicateRecipe *PredRecipe, VPlanPtr &Plan) { +VPRegionBlock * +VPRecipeBuilder::createReplicateRegion(VPReplicateRecipe *PredRecipe, + VPlanPtr &Plan) { + Instruction *Instr = PredRecipe->getUnderlyingInstr(); // Instructions marked for predication are replicated and placed under an // if-then construct to prevent side-effects. - // Generate recipes to compute the block mask for this region. VPValue *BlockInMask = createBlockInMask(Instr->getParent(), Plan); @@ -8461,9 +8450,13 @@ VPRegionBlock *VPRecipeBuilder::createReplicateRegion( ? nullptr : new VPPredInstPHIRecipe(PredRecipe); if (PHIRecipe) { - Plan->removeVPValueFor(Instr); + setRecipe(Instr, PHIRecipe); Plan->addVPValue(Instr, PHIRecipe); + } else { + setRecipe(Instr, PredRecipe); + Plan->addVPValue(Instr, PredRecipe); } + auto *Exiting = new VPBasicBlock(Twine(RegionName) + ".continue", PHIRecipe); auto *Pred = new VPBasicBlock(Twine(RegionName) + ".if", PredRecipe); VPRegionBlock *Region = new VPRegionBlock(Entry, Exiting, RegionName, true); @@ -9564,12 +9557,19 @@ void VPReplicateRecipe::execute(VPTransformState &State) { return; } - // Generate scalar instances for all VF lanes of all UF parts, unless the - // instruction is uniform inwhich case generate only the first lane for each - // of the UF parts. - unsigned EndLane = IsUniform ? 1 : State.VF.getKnownMinValue(); - assert((!State.VF.isScalable() || IsUniform) && - "Can't scalarize a scalable vector"); + if (IsUniform) { + // Uniform within VL means we need to generate lane 0 only for each + // unrolled copy. + for (unsigned Part = 0; Part < State.UF; ++Part) + State.ILV->scalarizeInstruction(getUnderlyingInstr(), this, + VPIteration(Part, 0), IsPredicated, + State); + return; + } + + // Generate scalar instances for all VF lanes of all UF parts. + assert(!State.VF.isScalable() && "Can't scalarize a scalable vector"); + const unsigned EndLane = State.VF.getKnownMinValue(); for (unsigned Part = 0; Part < State.UF; ++Part) for (unsigned Lane = 0; Lane < EndLane; ++Lane) State.ILV->scalarizeInstruction(getUnderlyingInstr(), this, @@ -9577,52 +9577,6 @@ void VPReplicateRecipe::execute(VPTransformState &State) { State); } -void VPPredInstPHIRecipe::execute(VPTransformState &State) { - assert(State.Instance && "Predicated instruction PHI works per instance."); - Instruction *ScalarPredInst = - cast<Instruction>(State.get(getOperand(0), *State.Instance)); - BasicBlock *PredicatedBB = ScalarPredInst->getParent(); - BasicBlock *PredicatingBB = PredicatedBB->getSinglePredecessor(); - assert(PredicatingBB && "Predicated block has no single predecessor."); - assert(isa<VPReplicateRecipe>(getOperand(0)) && - "operand must be VPReplicateRecipe"); - - // By current pack/unpack logic we need to generate only a single phi node: if - // a vector value for the predicated instruction exists at this point it means - // the instruction has vector users only, and a phi for the vector value is - // needed. In this case the recipe of the predicated instruction is marked to - // also do that packing, thereby "hoisting" the insert-element sequence. - // Otherwise, a phi node for the scalar value is needed. - unsigned Part = State.Instance->Part; - if (State.hasVectorValue(getOperand(0), Part)) { - Value *VectorValue = State.get(getOperand(0), Part); - InsertElementInst *IEI = cast<InsertElementInst>(VectorValue); - PHINode *VPhi = State.Builder.CreatePHI(IEI->getType(), 2); - VPhi->addIncoming(IEI->getOperand(0), PredicatingBB); // Unmodified vector. - VPhi->addIncoming(IEI, PredicatedBB); // New vector with inserted element. - if (State.hasVectorValue(this, Part)) - State.reset(this, VPhi, Part); - else - State.set(this, VPhi, Part); - // NOTE: Currently we need to update the value of the operand, so the next - // predicated iteration inserts its generated value in the correct vector. - State.reset(getOperand(0), VPhi, Part); - } else { - Type *PredInstType = getOperand(0)->getUnderlyingValue()->getType(); - PHINode *Phi = State.Builder.CreatePHI(PredInstType, 2); - Phi->addIncoming(PoisonValue::get(ScalarPredInst->getType()), - PredicatingBB); - Phi->addIncoming(ScalarPredInst, PredicatedBB); - if (State.hasScalarValue(this, *State.Instance)) - State.reset(this, Phi, *State.Instance); - else - State.set(this, Phi, *State.Instance); - // NOTE: Currently we need to update the value of the operand, so the next - // predicated iteration inserts its generated value in the correct vector. - State.reset(getOperand(0), Phi, *State.Instance); - } -} - void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) { VPValue *StoredValue = isStore() ? getStoredValue() : nullptr; @@ -9793,8 +9747,7 @@ static ScalarEpilogueLowering getScalarEpilogueLowering( }; // 4) if the TTI hook indicates this is profitable, request predication. - if (TTI->preferPredicateOverEpilogue(L, LI, *SE, *AC, TLI, DT, - LVL.getLAI())) + if (TTI->preferPredicateOverEpilogue(L, LI, *SE, *AC, TLI, DT, &LVL)) return CM_ScalarEpilogueNotNeededUsePredicate; return CM_ScalarEpilogueAllowed; diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index e136cd9aedac..cd044c78d900 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -3744,7 +3744,7 @@ void BoUpSLP::reorderTopToBottom() { unsigned Opcode0 = TE->getOpcode(); unsigned Opcode1 = TE->getAltOpcode(); // The opcode mask selects between the two opcodes. - SmallBitVector OpcodeMask(TE->Scalars.size(), 0); + SmallBitVector OpcodeMask(TE->Scalars.size(), false); for (unsigned Lane : seq<unsigned>(0, TE->Scalars.size())) if (cast<Instruction>(TE->Scalars[Lane])->getOpcode() == Opcode1) OpcodeMask.set(Lane); @@ -4814,6 +4814,15 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth, return; } + // Don't go into catchswitch blocks, which can happen with PHIs. + // Such blocks can only have PHIs and the catchswitch. There is no + // place to insert a shuffle if we need to, so just avoid that issue. + if (isa<CatchSwitchInst>(BB->getTerminator())) { + LLVM_DEBUG(dbgs() << "SLP: bundle in catchswitch block.\n"); + newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx); + return; + } + // Check that every instruction appears once in this bundle. if (!TryToFindDuplicates(S)) return; diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h index c7949c42c03e..07d3fa56020b 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h +++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h @@ -157,10 +157,8 @@ public: return Ingredient2Recipe[I]; } - /// Create a replicating region for instruction \p I that requires - /// predication. \p PredRecipe is a VPReplicateRecipe holding \p I. - VPRegionBlock *createReplicateRegion(Instruction *I, - VPReplicateRecipe *PredRecipe, + /// Create a replicating region for \p PredRecipe. + VPRegionBlock *createReplicateRegion(VPReplicateRecipe *PredRecipe, VPlanPtr &Plan); /// Build a VPReplicationRecipe for \p I and enclose it within a Region if it diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index fdd901a4a70d..cb7507264667 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -892,6 +892,52 @@ void VPBranchOnMaskRecipe::execute(VPTransformState &State) { ReplaceInstWithInst(CurrentTerminator, CondBr); } +void VPPredInstPHIRecipe::execute(VPTransformState &State) { + assert(State.Instance && "Predicated instruction PHI works per instance."); + Instruction *ScalarPredInst = + cast<Instruction>(State.get(getOperand(0), *State.Instance)); + BasicBlock *PredicatedBB = ScalarPredInst->getParent(); + BasicBlock *PredicatingBB = PredicatedBB->getSinglePredecessor(); + assert(PredicatingBB && "Predicated block has no single predecessor."); + assert(isa<VPReplicateRecipe>(getOperand(0)) && + "operand must be VPReplicateRecipe"); + + // By current pack/unpack logic we need to generate only a single phi node: if + // a vector value for the predicated instruction exists at this point it means + // the instruction has vector users only, and a phi for the vector value is + // needed. In this case the recipe of the predicated instruction is marked to + // also do that packing, thereby "hoisting" the insert-element sequence. + // Otherwise, a phi node for the scalar value is needed. + unsigned Part = State.Instance->Part; + if (State.hasVectorValue(getOperand(0), Part)) { + Value *VectorValue = State.get(getOperand(0), Part); + InsertElementInst *IEI = cast<InsertElementInst>(VectorValue); + PHINode *VPhi = State.Builder.CreatePHI(IEI->getType(), 2); + VPhi->addIncoming(IEI->getOperand(0), PredicatingBB); // Unmodified vector. + VPhi->addIncoming(IEI, PredicatedBB); // New vector with inserted element. + if (State.hasVectorValue(this, Part)) + State.reset(this, VPhi, Part); + else + State.set(this, VPhi, Part); + // NOTE: Currently we need to update the value of the operand, so the next + // predicated iteration inserts its generated value in the correct vector. + State.reset(getOperand(0), VPhi, Part); + } else { + Type *PredInstType = getOperand(0)->getUnderlyingValue()->getType(); + PHINode *Phi = State.Builder.CreatePHI(PredInstType, 2); + Phi->addIncoming(PoisonValue::get(ScalarPredInst->getType()), + PredicatingBB); + Phi->addIncoming(ScalarPredInst, PredicatedBB); + if (State.hasScalarValue(this, *State.Instance)) + State.reset(this, Phi, *State.Instance); + else + State.set(this, Phi, *State.Instance); + // NOTE: Currently we need to update the value of the operand, so the next + // predicated iteration inserts its generated value in the correct vector. + State.reset(getOperand(0), Phi, *State.Instance); + } +} + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp index 3501de6ab38e..43e0a40fedb9 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp @@ -133,7 +133,9 @@ void VPlanVerifier::verifyHierarchicalCFG( verifyRegionRec(TopRegion); } -static bool verifyVPBasicBlock(const VPBasicBlock *VPBB) { +static bool +verifyVPBasicBlock(const VPBasicBlock *VPBB, + DenseMap<const VPBlockBase *, unsigned> &BlockNumbering) { // Verify that phi-like recipes are at the beginning of the block, with no // other recipes in between. auto RecipeI = VPBB->begin(); @@ -165,15 +167,71 @@ static bool verifyVPBasicBlock(const VPBasicBlock *VPBB) { RecipeI++; } + // Verify that defs in VPBB dominate all their uses. The current + // implementation is still incomplete. + DenseMap<const VPRecipeBase *, unsigned> RecipeNumbering; + unsigned Cnt = 0; + for (const VPRecipeBase &R : *VPBB) + RecipeNumbering[&R] = Cnt++; + + for (const VPRecipeBase &R : *VPBB) { + for (const VPValue *V : R.definedValues()) { + for (const VPUser *U : V->users()) { + auto *UI = dyn_cast<VPRecipeBase>(U); + if (!UI || isa<VPHeaderPHIRecipe>(UI)) + continue; + + // If the user is in the same block, check it comes after R in the + // block. + if (UI->getParent() == VPBB) { + if (RecipeNumbering[UI] < RecipeNumbering[&R]) { + errs() << "Use before def!\n"; + return false; + } + continue; + } + + // Skip blocks outside any region for now and blocks outside + // replicate-regions. + auto *ParentR = VPBB->getParent(); + if (!ParentR || !ParentR->isReplicator()) + continue; + + // For replicators, verify that VPPRedInstPHIRecipe defs are only used + // in subsequent blocks. + if (isa<VPPredInstPHIRecipe>(&R)) { + auto I = BlockNumbering.find(UI->getParent()); + unsigned BlockNumber = I == BlockNumbering.end() ? std::numeric_limits<unsigned>::max() : I->second; + if (BlockNumber < BlockNumbering[ParentR]) { + errs() << "Use before def!\n"; + return false; + } + continue; + } + + // All non-VPPredInstPHIRecipe recipes in the block must be used in + // the replicate region only. + if (UI->getParent()->getParent() != ParentR) { + errs() << "Use before def!\n"; + return false; + } + } + } + } return true; } bool VPlanVerifier::verifyPlanIsValid(const VPlan &Plan) { + DenseMap<const VPBlockBase *, unsigned> BlockNumbering; + unsigned Cnt = 0; auto Iter = depth_first( VPBlockRecursiveTraversalWrapper<const VPBlockBase *>(Plan.getEntry())); - for (const VPBasicBlock *VPBB : - VPBlockUtils::blocksOnly<const VPBasicBlock>(Iter)) { - if (!verifyVPBasicBlock(VPBB)) + for (const VPBlockBase *VPB : Iter) { + BlockNumbering[VPB] = Cnt++; + auto *VPBB = dyn_cast<VPBasicBlock>(VPB); + if (!VPBB) + continue; + if (!verifyVPBasicBlock(VPBB, BlockNumbering)) return false; } diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index d12624ffb824..a38936644bd3 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -1302,7 +1302,7 @@ bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) { for (ShuffleVectorInst *SV : Shuffles) { for (auto U : SV->users()) { ShuffleVectorInst *SSV = dyn_cast<ShuffleVectorInst>(U); - if (SSV && isa<UndefValue>(SSV->getOperand(1))) + if (SSV && isa<UndefValue>(SSV->getOperand(1)) && SSV->getType() == VT) Shuffles.push_back(SSV); } } diff --git a/contrib/llvm-project/llvm/tools/llc/llc.cpp b/contrib/llvm-project/llvm/tools/llc/llc.cpp index 853a0bd8eb54..f084ee2daa93 100644 --- a/contrib/llvm-project/llvm/tools/llc/llc.cpp +++ b/contrib/llvm-project/llvm/tools/llc/llc.cpp @@ -359,8 +359,6 @@ int main(int argc, char **argv) { initializeCodeGen(*Registry); initializeLoopStrengthReducePass(*Registry); initializeLowerIntrinsicsPass(*Registry); - initializeEntryExitInstrumenterPass(*Registry); - initializePostInlineEntryExitInstrumenterPass(*Registry); initializeUnreachableBlockElimLegacyPassPass(*Registry); initializeConstantHoistingLegacyPassPass(*Registry); initializeScalarOpts(*Registry); diff --git a/contrib/llvm-project/llvm/tools/llvm-ar/llvm-ar.cpp b/contrib/llvm-project/llvm/tools/llvm-ar/llvm-ar.cpp index e964dc8256a5..1d4a8e9cd398 100644 --- a/contrib/llvm-project/llvm/tools/llvm-ar/llvm-ar.cpp +++ b/contrib/llvm-project/llvm/tools/llvm-ar/llvm-ar.cpp @@ -18,10 +18,14 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/Object/Archive.h" #include "llvm/Object/ArchiveWriter.h" +#include "llvm/Object/COFFImportFile.h" +#include "llvm/Object/ELFObjectFile.h" #include "llvm/Object/IRObjectFile.h" #include "llvm/Object/MachO.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Object/SymbolicFile.h" +#include "llvm/Object/TapiFile.h" +#include "llvm/Object/Wasm.h" #include "llvm/Object/XCOFFObjectFile.h" #include "llvm/Support/Chrono.h" #include "llvm/Support/CommandLine.h" @@ -55,6 +59,7 @@ #endif using namespace llvm; +using namespace llvm::object; // The name this program was invoked as. static StringRef ToolName; @@ -82,7 +87,7 @@ static void printArHelp(StringRef ToolName) { =gnu - gnu =darwin - darwin =bsd - bsd - =aix - aix (big archive) + =bigarchive - big archive (AIX OS) --plugin=<string> - ignored for compatibility -h --help - display this help and exit --output - the directory to extract archive members to @@ -91,6 +96,7 @@ static void printArHelp(StringRef ToolName) { =windows - windows --thin - create a thin archive --version - print the version and exit + -X{32|64|32_64|any} - object mode (only for AIX OS) @<file> - read options from <file> OPERATIONS: @@ -184,6 +190,10 @@ static void failIfError(Error E, Twine Context = "") { }); } +static void warn(Twine Message) { + WithColor::warning(errs(), ToolName) << Message << "\n"; +} + static SmallVector<const char *, 256> PositionalArgs; static bool MRI; @@ -209,6 +219,10 @@ enum ArchiveOperation { CreateSymTab ///< Create a symbol table in an existing archive }; +enum class BitModeTy { Bit32, Bit64, Bit32_64, Any, Unknown }; + +static BitModeTy BitMode = BitModeTy::Bit32; + // Modifiers to follow operation to vary behavior static bool AddAfter = false; ///< 'a' modifier static bool AddBefore = false; ///< 'b' modifier @@ -632,6 +646,71 @@ static bool shouldCreateArchive(ArchiveOperation Op) { llvm_unreachable("Missing entry in covered switch."); } +static bool is64BitSymbolicFile(SymbolicFile &Obj) { + if (auto *IRObj = dyn_cast<IRObjectFile>(&Obj)) + return Triple(IRObj->getTargetTriple()).isArch64Bit(); + if (isa<COFFObjectFile>(Obj) || isa<COFFImportFile>(Obj)) + return false; + if (XCOFFObjectFile *XCOFFObj = dyn_cast<XCOFFObjectFile>(&Obj)) + return XCOFFObj->is64Bit(); + if (isa<WasmObjectFile>(Obj)) + return false; + if (TapiFile *Tapi = dyn_cast<TapiFile>(&Obj)) + return Tapi->is64Bit(); + if (MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(&Obj)) + return MachO->is64Bit(); + if (ELFObjectFileBase *ElfO = dyn_cast<ELFObjectFileBase>(&Obj)) + return ElfO->getBytesInAddress() == 8; + + fail("unsupported file format"); +} + +static bool isValidInBitMode(Binary &Bin) { + if (BitMode == BitModeTy::Bit32_64 || BitMode == BitModeTy::Any) + return true; + + if (SymbolicFile *SymFile = dyn_cast<SymbolicFile>(&Bin)) { + bool Is64Bit = is64BitSymbolicFile(*SymFile); + if ((Is64Bit && (BitMode == BitModeTy::Bit32)) || + (!Is64Bit && (BitMode == BitModeTy::Bit64))) + return false; + } + // In AIX "ar", non-object files are always considered to have a valid bit + // mode. + return true; +} + +Expected<std::unique_ptr<Binary>> getAsBinary(const NewArchiveMember &NM, + LLVMContext *Context) { + auto BinaryOrErr = createBinary(NM.Buf->getMemBufferRef(), Context); + if (BinaryOrErr) + return std::move(*BinaryOrErr); + return BinaryOrErr.takeError(); +} + +Expected<std::unique_ptr<Binary>> getAsBinary(const Archive::Child &C, + LLVMContext *Context) { + return C.getAsBinary(Context); +} + +template <class A> static bool isValidInBitMode(const A &Member) { + if (object::Archive::getDefaultKindForHost() != object::Archive::K_AIXBIG) + return true; + LLVMContext Context; + Expected<std::unique_ptr<Binary>> BinOrErr = getAsBinary(Member, &Context); + // In AIX "ar", if there is a non-object file member, it is never ignored due + // to the bit mode setting. + if (!BinOrErr) { + consumeError(BinOrErr.takeError()); + return true; + } + return isValidInBitMode(*BinOrErr.get()); +} + +static void warnInvalidObjectForFileMode(Twine Name) { + warn("'" + Name + "' is not valid with the current object file mode"); +} + static void performReadOperation(ArchiveOperation Operation, object::Archive *OldArchive) { if (Operation == Extract && OldArchive->isThin()) @@ -646,6 +725,10 @@ static void performReadOperation(ArchiveOperation Operation, failIfError(NameOrErr.takeError()); StringRef Name = NameOrErr.get(); + // Check whether to ignore this object due to its bitness. + if (!isValidInBitMode(C)) + continue; + if (Filter) { auto I = find_if(Members, [Name](StringRef Path) { return comparePaths(Name, Path); @@ -722,8 +805,7 @@ static void addChildMember(std::vector<NewArchiveMember> &Members, Members.push_back(std::move(*NMOrErr)); } -static void addMember(std::vector<NewArchiveMember> &Members, - StringRef FileName, bool FlattenArchive = false) { +static NewArchiveMember getArchiveMember(StringRef FileName) { Expected<NewArchiveMember> NMOrErr = NewArchiveMember::getFile(FileName, Deterministic); failIfError(NMOrErr.takeError(), FileName); @@ -743,9 +825,24 @@ static void addMember(std::vector<NewArchiveMember> &Members, PathOrErr ? *PathOrErr : sys::path::convert_to_slash(FileName)); } } + return std::move(*NMOrErr); +} + +static void addMember(std::vector<NewArchiveMember> &Members, + NewArchiveMember &NM) { + Members.push_back(std::move(NM)); +} + +static void addMember(std::vector<NewArchiveMember> &Members, + StringRef FileName, bool FlattenArchive = false) { + NewArchiveMember NM = getArchiveMember(FileName); + if (!isValidInBitMode(NM)) { + warnInvalidObjectForFileMode(FileName); + return; + } if (FlattenArchive && - identify_magic(NMOrErr->Buf->getBuffer()) == file_magic::archive) { + identify_magic(NM.Buf->getBuffer()) == file_magic::archive) { object::Archive &Lib = readLibrary(FileName); // When creating thin archives, only flatten if the member is also thin. if (!Thin || Lib.isThin()) { @@ -757,7 +854,7 @@ static void addMember(std::vector<NewArchiveMember> &Members, return; } } - Members.push_back(std::move(*NMOrErr)); + Members.push_back(std::move(NM)); } enum InsertAction { @@ -773,6 +870,9 @@ static InsertAction computeInsertAction(ArchiveOperation Operation, StringRef Name, std::vector<StringRef>::iterator &Pos, StringMap<int> &MemberCount) { + if (!isValidInBitMode(Member)) + return IA_AddOldMember; + if (Operation == QuickAppend || Members.empty()) return IA_AddOldMember; auto MI = find_if( @@ -834,7 +934,7 @@ computeNewArchiveMembers(ArchiveOperation Operation, Expected<StringRef> NameOrErr = Child.getName(); failIfError(NameOrErr.takeError()); std::string Name = std::string(NameOrErr.get()); - if (comparePaths(Name, RelPos)) { + if (comparePaths(Name, RelPos) && isValidInBitMode(Child)) { assert(AddAfter || AddBefore); if (AddBefore) InsertPos = Pos; @@ -845,12 +945,25 @@ computeNewArchiveMembers(ArchiveOperation Operation, std::vector<StringRef>::iterator MemberI = Members.end(); InsertAction Action = computeInsertAction(Operation, Child, Name, MemberI, MemberCount); + + auto HandleNewMember = [](auto Member, auto &Members, auto &Child) { + NewArchiveMember NM = getArchiveMember(*Member); + if (isValidInBitMode(NM)) + addMember(Members, NM); + else { + // If a new member is not a valid object for the bit mode, add + // the old member back. + warnInvalidObjectForFileMode(*Member); + addChildMember(Members, Child, /*FlattenArchive=*/Thin); + } + }; + switch (Action) { case IA_AddOldMember: addChildMember(Ret, Child, /*FlattenArchive=*/Thin); break; case IA_AddNewMember: - addMember(Ret, *MemberI); + HandleNewMember(MemberI, Ret, Child); break; case IA_Delete: break; @@ -858,7 +971,7 @@ computeNewArchiveMembers(ArchiveOperation Operation, addChildMember(Moved, Child, /*FlattenArchive=*/Thin); break; case IA_MoveNewMember: - addMember(Moved, *MemberI); + HandleNewMember(MemberI, Moved, Child); break; } // When processing elements with the count param, we need to preserve the @@ -1043,8 +1156,7 @@ static int performOperation(ArchiveOperation Operation, } else { if (!Create) { // Produce a warning if we should and we're creating the archive - WithColor::warning(errs(), ToolName) - << "creating " << ArchiveName << "\n"; + warn("creating " + ArchiveName); } } @@ -1155,6 +1267,15 @@ static bool handleGenericOption(StringRef arg) { return false; } +static BitModeTy getBitMode(const char *RawBitMode) { + return StringSwitch<BitModeTy>(RawBitMode) + .Case("32", BitModeTy::Bit32) + .Case("64", BitModeTy::Bit64) + .Case("32_64", BitModeTy::Bit32_64) + .Case("any", BitModeTy::Any) + .Default(BitModeTy::Unknown); +} + static const char *matchFlagWithArg(StringRef Expected, ArrayRef<const char *>::iterator &ArgIt, ArrayRef<const char *> Args) { @@ -1204,6 +1325,14 @@ static int ar_main(int argc, char **argv) { cl::ExpandResponseFiles(Saver, getRspQuoting(makeArrayRef(argv, argc)), Argv); + // Get BitMode from enviorment variable "OBJECT_MODE" for AIX OS, if + // specified. + if (object::Archive::getDefaultKindForHost() == object::Archive::K_AIXBIG) { + BitMode = getBitMode(getenv("OBJECT_MODE")); + if (BitMode == BitModeTy::Unknown) + BitMode = BitModeTy::Bit32; + } + for (ArrayRef<const char *>::iterator ArgIt = Argv.begin(); ArgIt != Argv.end(); ++ArgIt) { const char *Match = nullptr; @@ -1258,6 +1387,19 @@ static int ar_main(int argc, char **argv) { matchFlagWithArg("rsp-quoting", ArgIt, Argv)) continue; + if (strncmp(*ArgIt, "-X", 2) == 0) { + if (object::Archive::getDefaultKindForHost() == + object::Archive::K_AIXBIG) { + Match = *(*ArgIt + 2) != '\0' ? *ArgIt + 2 : *(++ArgIt); + BitMode = getBitMode(Match); + if (BitMode == BitModeTy::Unknown) + fail(Twine("invalid bit mode: ") + Match); + continue; + } else { + fail(Twine(*ArgIt) + " option not supported on non AIX OS"); + } + } + Options += *ArgIt + 1; } diff --git a/contrib/llvm-project/llvm/tools/llvm-cov/CodeCoverage.cpp b/contrib/llvm-project/llvm/tools/llvm-cov/CodeCoverage.cpp index 6932e9b5bd31..13b6c3002216 100644 --- a/contrib/llvm-project/llvm/tools/llvm-cov/CodeCoverage.cpp +++ b/contrib/llvm-project/llvm/tools/llvm-cov/CodeCoverage.cpp @@ -436,8 +436,7 @@ std::unique_ptr<CoverageMapping> CodeCoverageTool::load() { CoverageMapping::load(ObjectFilenames, PGOFilename, CoverageArches, ViewOpts.CompilationDirectory); if (Error E = CoverageOrErr.takeError()) { - error("Failed to load coverage: " + toString(std::move(E)), - join(ObjectFilenames.begin(), ObjectFilenames.end(), ", ")); + error("Failed to load coverage: " + toString(std::move(E))); return nullptr; } auto Coverage = std::move(CoverageOrErr.get()); @@ -1053,7 +1052,7 @@ int CodeCoverageTool::doShow(int argc, const char **argv, sys::fs::file_status Status; if (std::error_code EC = sys::fs::status(PGOFilename, Status)) { - error("Could not read profile data!", EC.message()); + error("Could not read profile data!" + EC.message(), PGOFilename); return 1; } @@ -1170,6 +1169,12 @@ int CodeCoverageTool::doReport(int argc, const char **argv, return 1; } + sys::fs::file_status Status; + if (std::error_code EC = sys::fs::status(PGOFilename, Status)) { + error("Could not read profile data!" + EC.message(), PGOFilename); + return 1; + } + auto Coverage = load(); if (!Coverage) return 1; @@ -1219,6 +1224,12 @@ int CodeCoverageTool::doExport(int argc, const char **argv, return 1; } + sys::fs::file_status Status; + if (std::error_code EC = sys::fs::status(PGOFilename, Status)) { + error("Could not read profile data!" + EC.message(), PGOFilename); + return 1; + } + auto Coverage = load(); if (!Coverage) { error("Could not load coverage information"); diff --git a/contrib/llvm-project/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp b/contrib/llvm-project/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp index f7d3052c8c4d..cc7f353330b1 100644 --- a/contrib/llvm-project/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp +++ b/contrib/llvm-project/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp @@ -544,7 +544,7 @@ static bool collectObjectSources(ObjectFile &Obj, DWARFContext &DICtx, } // Dedup and order the sources. - llvm::sort(Sources.begin(), Sources.end()); + llvm::sort(Sources); Sources.erase(std::unique(Sources.begin(), Sources.end()), Sources.end()); for (StringRef Name : Sources) diff --git a/contrib/llvm-project/llvm/tools/llvm-dwarfutil/DebugInfoLinker.cpp b/contrib/llvm-project/llvm/tools/llvm-dwarfutil/DebugInfoLinker.cpp new file mode 100644 index 000000000000..458a58c12ca7 --- /dev/null +++ b/contrib/llvm-project/llvm/tools/llvm-dwarfutil/DebugInfoLinker.cpp @@ -0,0 +1,277 @@ +//=== DebugInfoLinker.cpp -------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "DebugInfoLinker.h" +#include "Error.h" +#include "llvm/DWARFLinker/DWARFLinker.h" +#include "llvm/DWARFLinker/DWARFStreamer.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/DebugInfo/DWARF/DWARFExpression.h" +#include "llvm/Object/ObjectFile.h" +#include <memory> +#include <vector> + +namespace llvm { +namespace dwarfutil { + +// ObjFileAddressMap allows to check whether specified DIE referencing +// dead addresses. It uses tombstone values to determine dead addresses. +// The concrete values of tombstone constants were discussed in +// https://reviews.llvm.org/D81784 and https://reviews.llvm.org/D84825. +// So we use following values as indicators of dead addresses: +// +// bfd: (LowPC == 0) or (LowPC == 1 and HighPC == 1 and DWARF v4 (or less)) +// or ([LowPC, HighPC] is not inside address ranges of .text sections). +// +// maxpc: (LowPC == -1) or (LowPC == -2 and DWARF v4 (or less)) +// That value is assumed to be compatible with +// http://www.dwarfstd.org/ShowIssue.php?issue=200609.1 +// +// exec: [LowPC, HighPC] is not inside address ranges of .text sections +// +// universal: maxpc and bfd +class ObjFileAddressMap : public AddressesMap { +public: + ObjFileAddressMap(DWARFContext &Context, const Options &Options, + object::ObjectFile &ObjFile) + : Opts(Options) { + // Remember addresses of existing text sections. + for (const object::SectionRef &Sect : ObjFile.sections()) { + if (!Sect.isText()) + continue; + const uint64_t Size = Sect.getSize(); + if (Size == 0) + continue; + const uint64_t StartAddr = Sect.getAddress(); + TextAddressRanges.insert({StartAddr, StartAddr + Size}); + } + + // Check CU address ranges for tombstone value. + for (std::unique_ptr<DWARFUnit> &CU : Context.compile_units()) { + Expected<llvm::DWARFAddressRangesVector> ARanges = + CU->getUnitDIE().getAddressRanges(); + if (ARanges) { + for (auto &Range : *ARanges) { + if (!isDeadAddressRange(Range.LowPC, Range.HighPC, CU->getVersion(), + Options.Tombstone, CU->getAddressByteSize())) + DWARFAddressRanges.insert({Range.LowPC, Range.HighPC}, 0); + } + } + } + } + + // should be renamed into has valid address ranges + bool hasValidRelocs() override { return !DWARFAddressRanges.empty(); } + + bool isLiveSubprogram(const DWARFDie &DIE, + CompileUnit::DIEInfo &Info) override { + assert((DIE.getTag() == dwarf::DW_TAG_subprogram || + DIE.getTag() == dwarf::DW_TAG_label) && + "Wrong type of input die"); + + if (Optional<uint64_t> LowPC = + dwarf::toAddress(DIE.find(dwarf::DW_AT_low_pc))) { + if (!isDeadAddress(*LowPC, DIE.getDwarfUnit()->getVersion(), + Opts.Tombstone, + DIE.getDwarfUnit()->getAddressByteSize())) { + Info.AddrAdjust = 0; + Info.InDebugMap = true; + return true; + } + } + + return false; + } + + bool isLiveVariable(const DWARFDie &DIE, + CompileUnit::DIEInfo &Info) override { + assert((DIE.getTag() == dwarf::DW_TAG_variable || + DIE.getTag() == dwarf::DW_TAG_constant) && + "Wrong type of input die"); + + if (Expected<DWARFLocationExpressionsVector> Loc = + DIE.getLocations(dwarf::DW_AT_location)) { + DWARFUnit *U = DIE.getDwarfUnit(); + for (const auto &Entry : *Loc) { + DataExtractor Data(toStringRef(Entry.Expr), + U->getContext().isLittleEndian(), 0); + DWARFExpression Expression(Data, U->getAddressByteSize(), + U->getFormParams().Format); + bool HasLiveAddresses = + any_of(Expression, [&](const DWARFExpression::Operation &Op) { + // TODO: add handling of dwarf::DW_OP_addrx + return !Op.isError() && + (Op.getCode() == dwarf::DW_OP_addr && + !isDeadAddress(Op.getRawOperand(0), U->getVersion(), + Opts.Tombstone, + DIE.getDwarfUnit()->getAddressByteSize())); + }); + + if (HasLiveAddresses) { + Info.AddrAdjust = 0; + Info.InDebugMap = true; + return true; + } + } + } else { + // FIXME: missing DW_AT_location is OK here, but other errors should be + // reported to the user. + consumeError(Loc.takeError()); + } + + return false; + } + + bool applyValidRelocs(MutableArrayRef<char>, uint64_t, bool) override { + // no need to apply relocations to the linked binary. + return false; + } + + RangesTy &getValidAddressRanges() override { return DWARFAddressRanges; }; + + void clear() override { DWARFAddressRanges.clear(); } + + llvm::Expected<uint64_t> relocateIndexedAddr(uint64_t, uint64_t) override { + // should not be called. + return object::createError("no relocations in linked binary"); + } + +protected: + // returns true if specified address range is inside address ranges + // of executable sections. + bool isInsideExecutableSectionsAddressRange(uint64_t LowPC, + Optional<uint64_t> HighPC) { + Optional<AddressRange> Range = + TextAddressRanges.getRangeThatContains(LowPC); + + if (HighPC) + return Range.has_value() && Range->end() >= *HighPC; + + return Range.has_value(); + } + + uint64_t isBFDDeadAddressRange(uint64_t LowPC, Optional<uint64_t> HighPC, + uint16_t Version) { + if (LowPC == 0) + return true; + + if ((Version <= 4) && HighPC && (LowPC == 1 && *HighPC == 1)) + return true; + + return !isInsideExecutableSectionsAddressRange(LowPC, HighPC); + } + + uint64_t isMAXPCDeadAddressRange(uint64_t LowPC, Optional<uint64_t> HighPC, + uint16_t Version, uint8_t AddressByteSize) { + if (Version <= 4 && HighPC) { + if (LowPC == (dwarf::computeTombstoneAddress(AddressByteSize) - 1)) + return true; + } else if (LowPC == dwarf::computeTombstoneAddress(AddressByteSize)) + return true; + + if (!isInsideExecutableSectionsAddressRange(LowPC, HighPC)) + warning("Address referencing invalid text section is not marked with " + "tombstone value"); + + return false; + } + + bool isDeadAddressRange(uint64_t LowPC, Optional<uint64_t> HighPC, + uint16_t Version, TombstoneKind Tombstone, + uint8_t AddressByteSize) { + switch (Tombstone) { + case TombstoneKind::BFD: + return isBFDDeadAddressRange(LowPC, HighPC, Version); + case TombstoneKind::MaxPC: + return isMAXPCDeadAddressRange(LowPC, HighPC, Version, AddressByteSize); + case TombstoneKind::Universal: + return isBFDDeadAddressRange(LowPC, HighPC, Version) || + isMAXPCDeadAddressRange(LowPC, HighPC, Version, AddressByteSize); + case TombstoneKind::Exec: + return !isInsideExecutableSectionsAddressRange(LowPC, HighPC); + } + + llvm_unreachable("Unknown tombstone value"); + } + + bool isDeadAddress(uint64_t LowPC, uint16_t Version, TombstoneKind Tombstone, + uint8_t AddressByteSize) { + return isDeadAddressRange(LowPC, None, Version, Tombstone, AddressByteSize); + } + +private: + RangesTy DWARFAddressRanges; + AddressRanges TextAddressRanges; + const Options &Opts; +}; + +bool linkDebugInfo(object::ObjectFile &File, const Options &Options, + raw_pwrite_stream &OutStream) { + + auto ReportWarn = [&](const Twine &Message, StringRef Context, + const DWARFDie *Die) { + warning(Message, Context); + + if (!Options.Verbose || !Die) + return; + + DIDumpOptions DumpOpts; + DumpOpts.ChildRecurseDepth = 0; + DumpOpts.Verbose = Options.Verbose; + + WithColor::note() << " in DIE:\n"; + Die->dump(errs(), /*Indent=*/6, DumpOpts); + }; + auto ReportErr = [&](const Twine &Message, StringRef Context, + const DWARFDie *) { + WithColor::error(errs(), Context) << Message << '\n'; + }; + + // Create output streamer. + DwarfStreamer OutStreamer(OutputFileType::Object, OutStream, nullptr, + ReportWarn, ReportWarn); + if (!OutStreamer.init(File.makeTriple(), "")) + return false; + + // Create DWARF linker. + DWARFLinker DebugInfoLinker(&OutStreamer, DwarfLinkerClient::LLD); + + DebugInfoLinker.setEstimatedObjfilesAmount(1); + DebugInfoLinker.setAccelTableKind(DwarfLinkerAccelTableKind::None); + DebugInfoLinker.setErrorHandler(ReportErr); + DebugInfoLinker.setWarningHandler(ReportWarn); + DebugInfoLinker.setNumThreads(Options.NumThreads); + DebugInfoLinker.setNoODR(!Options.DoODRDeduplication); + DebugInfoLinker.setVerbosity(Options.Verbose); + DebugInfoLinker.setUpdate(!Options.DoGarbageCollection); + + std::vector<std::unique_ptr<DWARFFile>> ObjectsForLinking(1); + std::vector<std::unique_ptr<AddressesMap>> AddresssMapForLinking(1); + std::vector<std::string> EmptyWarnings; + + std::unique_ptr<DWARFContext> Context = DWARFContext::create(File); + + // Add object files to the DWARFLinker. + AddresssMapForLinking[0] = + std::make_unique<ObjFileAddressMap>(*Context, Options, File); + + ObjectsForLinking[0] = std::make_unique<DWARFFile>( + File.getFileName(), &*Context, AddresssMapForLinking[0].get(), + EmptyWarnings); + + for (size_t I = 0; I < ObjectsForLinking.size(); I++) + DebugInfoLinker.addObjectFile(*ObjectsForLinking[I]); + + // Link debug info. + DebugInfoLinker.link(); + OutStreamer.finish(); + return true; +} + +} // end of namespace dwarfutil +} // end of namespace llvm diff --git a/contrib/llvm-project/llvm/tools/llvm-dwarfutil/DebugInfoLinker.h b/contrib/llvm-project/llvm/tools/llvm-dwarfutil/DebugInfoLinker.h new file mode 100644 index 000000000000..e95c83cb9609 --- /dev/null +++ b/contrib/llvm-project/llvm/tools/llvm-dwarfutil/DebugInfoLinker.h @@ -0,0 +1,31 @@ +//===- DebugInfoLinker.h ----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_DWARFUTIL_DEBUGINFOLINKER_H +#define LLVM_TOOLS_LLVM_DWARFUTIL_DEBUGINFOLINKER_H + +#include "Options.h" +#include "llvm/Object/Archive.h" +#include "llvm/Object/ELFObjectFile.h" +#include "llvm/Object/ObjectFile.h" + +namespace llvm { +namespace dwarfutil { + +inline bool isDebugSection(StringRef SecName) { + return SecName.startswith(".debug") || SecName.startswith(".zdebug") || + SecName == ".gdb_index"; +} + +bool linkDebugInfo(object::ObjectFile &file, const Options &Options, + raw_pwrite_stream &OutStream); + +} // end of namespace dwarfutil +} // end of namespace llvm + +#endif // LLVM_TOOLS_LLVM_DWARFUTIL_DEBUGINFOLINKER_H diff --git a/contrib/llvm-project/llvm/tools/llvm-dwarfutil/Error.h b/contrib/llvm-project/llvm/tools/llvm-dwarfutil/Error.h new file mode 100644 index 000000000000..9ef288d4f657 --- /dev/null +++ b/contrib/llvm-project/llvm/tools/llvm-dwarfutil/Error.h @@ -0,0 +1,44 @@ +//===- Error.h --------------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_DWARFUTIL_ERROR_H +#define LLVM_TOOLS_LLVM_DWARFUTIL_ERROR_H + +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/WithColor.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { +namespace dwarfutil { + +inline void error(Error Err, StringRef Prefix = "") { + handleAllErrors(std::move(Err), [&](ErrorInfoBase &Info) { + WithColor::error(errs(), Prefix) << Info.message() << '\n'; + }); + std::exit(EXIT_FAILURE); +} + +inline void warning(const Twine &Message, StringRef Prefix = "") { + WithColor::warning(errs(), Prefix) << Message << '\n'; +} + +inline void verbose(const Twine &Message, bool Verbose) { + if (Verbose) + outs() << Message << '\n'; +} + +} // end of namespace dwarfutil +} // end of namespace llvm + +#endif // LLVM_TOOLS_LLVM_DWARFUTIL_ERROR_H diff --git a/contrib/llvm-project/llvm/tools/llvm-dwarfutil/Options.h b/contrib/llvm-project/llvm/tools/llvm-dwarfutil/Options.h new file mode 100644 index 000000000000..c993200ceb4b --- /dev/null +++ b/contrib/llvm-project/llvm/tools/llvm-dwarfutil/Options.h @@ -0,0 +1,46 @@ +//===- Options.h ------------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_DWARFUTIL_OPTIONS_H +#define LLVM_TOOLS_LLVM_DWARFUTIL_OPTIONS_H + +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringRef.h" + +namespace llvm { +namespace dwarfutil { + +/// The kind of tombstone value. +enum class TombstoneKind { + BFD, /// 0/[1:1]. Bfd default. + MaxPC, /// -1/-2. Assumed to match with + /// http://www.dwarfstd.org/ShowIssue.php?issue=200609.1. + Universal, /// both: BFD + MaxPC + Exec, /// match with address range of executable sections. +}; + +struct Options { + std::string InputFileName; + std::string OutputFileName; + bool DoGarbageCollection = false; + bool DoODRDeduplication = false; + bool BuildSeparateDebugFile = false; + TombstoneKind Tombstone = TombstoneKind::Universal; + bool Verbose = false; + int NumThreads = 0; + bool Verify = false; + + std::string getSeparateDebugFileName() const { + return OutputFileName + ".debug"; + } +}; + +} // namespace dwarfutil +} // namespace llvm + +#endif // LLVM_TOOLS_LLVM_DWARFUTIL_OPTIONS_H diff --git a/contrib/llvm-project/llvm/tools/llvm-dwarfutil/Options.td b/contrib/llvm-project/llvm/tools/llvm-dwarfutil/Options.td new file mode 100644 index 000000000000..4ab1b51d808d --- /dev/null +++ b/contrib/llvm-project/llvm/tools/llvm-dwarfutil/Options.td @@ -0,0 +1,65 @@ +include "llvm/Option/OptParser.td" + +multiclass BB<string name, string help1, string help2> { + def NAME: Flag<["--"], name>, HelpText<help1>; + def no_ # NAME: Flag<["--"], "no-" # name>, HelpText<help2>; +} + +def help : Flag<["--"], "help">, + HelpText<"Prints this help output">; + +def h : Flag<["-"], "h">, + Alias<help>, + HelpText<"Alias for --help">; + +defm odr_deduplication : BB<"odr-deduplication", + "Do ODR deduplication for debug types(default)", + "Don`t do ODR deduplication for debug types">; + +def odr : Flag<["--"], "odr">, + Alias<odr_deduplication>, + HelpText<"Alias for --odr-deduplication">; + +def no_odr : Flag<["--"], "no-odr">, + Alias<no_odr_deduplication>, + HelpText<"Alias for --no-odr-deduplication">; + +defm garbage_collection : BB<"garbage-collection", + "Do garbage collection for debug info(default)", + "Don`t do garbage collection for debug info">; + +defm separate_debug_file : BB<"separate-debug-file", + "Create two output files: file w/o debug tables and file with debug tables", + "Create single output file, containing debug tables(default)">; + +def tombstone: Separate<["--", "-"], "tombstone">, + MetaVarName<"[bfd,maxpc,exec,universal]">, + HelpText<"Tombstone value used as a marker of invalid address(default: universal)\n" + " =bfd - Zero for all addresses and [1,1] for DWARF v4 (or less) address ranges and exec\n" + " =maxpc - Minus 1 for all addresses and minus 2 for DWARF v4 (or less) address ranges\n" + " =exec - Match with address ranges of executable sections\n" + " =universal - Both: bfd and maxpc" + >; +def: Joined<["--", "-"], "tombstone=">, Alias<tombstone>; + +def threads: Separate<["--", "-"], "num-threads">, + MetaVarName<"<threads>">, + HelpText<"Number of available threads for multi-threaded execution. " + "Defaults to the number of cores on the current machine">; + +def: Separate<["-"], "j">, + Alias<threads>, + HelpText<"Alias for --num-threads">; + +def verbose : Flag<["--"], "verbose">, + HelpText<"Enable verbose logging">; + +def verify : Flag<["--"], "verify">, + HelpText<"Run the DWARF verifier on the resulting debug info">; + +def version : Flag<["--"], "version">, + HelpText<"Print the version and exit">; + +def V : Flag<["-"], "V">, + Alias<version>, + HelpText<"Alias for --version">; diff --git a/contrib/llvm-project/llvm/tools/llvm-dwarfutil/llvm-dwarfutil.cpp b/contrib/llvm-project/llvm/tools/llvm-dwarfutil/llvm-dwarfutil.cpp new file mode 100644 index 000000000000..e77c82e0fad9 --- /dev/null +++ b/contrib/llvm-project/llvm/tools/llvm-dwarfutil/llvm-dwarfutil.cpp @@ -0,0 +1,527 @@ +//=== llvm-dwarfutil.cpp --------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "DebugInfoLinker.h" +#include "Error.h" +#include "Options.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/DebugInfo/DWARF/DWARFVerifier.h" +#include "llvm/MC/MCTargetOptionsCommandFlags.h" +#include "llvm/ObjCopy/CommonConfig.h" +#include "llvm/ObjCopy/ConfigManager.h" +#include "llvm/ObjCopy/ObjCopy.h" +#include "llvm/Option/Arg.h" +#include "llvm/Option/ArgList.h" +#include "llvm/Option/Option.h" +#include "llvm/Support/CRC.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/FileUtilities.h" +#include "llvm/Support/InitLLVM.h" +#include "llvm/Support/PrettyStackTrace.h" +#include "llvm/Support/Process.h" +#include "llvm/Support/Signals.h" +#include "llvm/Support/TargetSelect.h" + +using namespace llvm; +using namespace object; + +namespace { +enum ID { + OPT_INVALID = 0, // This is not an option ID. +#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ + HELPTEXT, METAVAR, VALUES) \ + OPT_##ID, +#include "Options.inc" +#undef OPTION +}; + +#define PREFIX(NAME, VALUE) const char *const NAME[] = VALUE; +#include "Options.inc" +#undef PREFIX + +const opt::OptTable::Info InfoTable[] = { +#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ + HELPTEXT, METAVAR, VALUES) \ + { \ + PREFIX, NAME, HELPTEXT, \ + METAVAR, OPT_##ID, opt::Option::KIND##Class, \ + PARAM, FLAGS, OPT_##GROUP, \ + OPT_##ALIAS, ALIASARGS, VALUES}, +#include "Options.inc" +#undef OPTION +}; + +class DwarfutilOptTable : public opt::OptTable { +public: + DwarfutilOptTable() : OptTable(InfoTable) {} +}; +} // namespace + +namespace llvm { +namespace dwarfutil { + +std::string ToolName; + +static mc::RegisterMCTargetOptionsFlags MOF; + +static Error validateAndSetOptions(opt::InputArgList &Args, Options &Options) { + auto UnknownArgs = Args.filtered(OPT_UNKNOWN); + if (!UnknownArgs.empty()) + return createStringError( + std::errc::invalid_argument, + formatv("unknown option: {0}", (*UnknownArgs.begin())->getSpelling()) + .str() + .c_str()); + + std::vector<std::string> InputFiles = Args.getAllArgValues(OPT_INPUT); + if (InputFiles.size() != 2) + return createStringError( + std::errc::invalid_argument, + formatv("exactly two positional arguments expected, {0} provided", + InputFiles.size()) + .str() + .c_str()); + + Options.InputFileName = InputFiles[0]; + Options.OutputFileName = InputFiles[1]; + + Options.BuildSeparateDebugFile = + Args.hasFlag(OPT_separate_debug_file, OPT_no_separate_debug_file, false); + Options.DoODRDeduplication = + Args.hasFlag(OPT_odr_deduplication, OPT_no_odr_deduplication, true); + Options.DoGarbageCollection = + Args.hasFlag(OPT_garbage_collection, OPT_no_garbage_collection, true); + Options.Verbose = Args.hasArg(OPT_verbose); + Options.Verify = Args.hasArg(OPT_verify); + + if (opt::Arg *NumThreads = Args.getLastArg(OPT_threads)) + Options.NumThreads = atoi(NumThreads->getValue()); + else + Options.NumThreads = 0; // Use all available hardware threads + + if (opt::Arg *Tombstone = Args.getLastArg(OPT_tombstone)) { + StringRef S = Tombstone->getValue(); + if (S == "bfd") + Options.Tombstone = TombstoneKind::BFD; + else if (S == "maxpc") + Options.Tombstone = TombstoneKind::MaxPC; + else if (S == "universal") + Options.Tombstone = TombstoneKind::Universal; + else if (S == "exec") + Options.Tombstone = TombstoneKind::Exec; + else + return createStringError( + std::errc::invalid_argument, + formatv("unknown tombstone value: '{0}'", S).str().c_str()); + } + + if (Options.Verbose) { + if (Options.NumThreads != 1 && Args.hasArg(OPT_threads)) + warning("--num-threads set to 1 because verbose mode is specified"); + + Options.NumThreads = 1; + } + + if (Options.DoODRDeduplication && Args.hasArg(OPT_odr_deduplication) && + !Options.DoGarbageCollection) + return createStringError( + std::errc::invalid_argument, + "cannot use --odr-deduplication without --garbage-collection"); + + if (Options.BuildSeparateDebugFile && Options.OutputFileName == "-") + return createStringError( + std::errc::invalid_argument, + "unable to write to stdout when --separate-debug-file specified"); + + return Error::success(); +} + +static Error setConfigToAddNewDebugSections(objcopy::ConfigManager &Config, + ObjectFile &ObjFile) { + // Add new debug sections. + for (SectionRef Sec : ObjFile.sections()) { + Expected<StringRef> SecName = Sec.getName(); + if (!SecName) + return SecName.takeError(); + + if (isDebugSection(*SecName)) { + Expected<StringRef> SecData = Sec.getContents(); + if (!SecData) + return SecData.takeError(); + + Config.Common.AddSection.emplace_back(objcopy::NewSectionInfo( + *SecName, MemoryBuffer::getMemBuffer(*SecData, *SecName, false))); + } + } + + return Error::success(); +} + +static Error verifyOutput(const Options &Opts) { + if (Opts.OutputFileName == "-") { + warning("verification skipped because writing to stdout"); + return Error::success(); + } + + std::string FileName = Opts.BuildSeparateDebugFile + ? Opts.getSeparateDebugFileName() + : Opts.OutputFileName; + Expected<OwningBinary<Binary>> BinOrErr = createBinary(FileName); + if (!BinOrErr) + return createFileError(FileName, BinOrErr.takeError()); + + if (BinOrErr->getBinary()->isObject()) { + if (ObjectFile *Obj = static_cast<ObjectFile *>(BinOrErr->getBinary())) { + verbose("Verifying DWARF...", Opts.Verbose); + std::unique_ptr<DWARFContext> DICtx = DWARFContext::create(*Obj); + DIDumpOptions DumpOpts; + if (!DICtx->verify(Opts.Verbose ? outs() : nulls(), + DumpOpts.noImplicitRecursion())) + return createFileError(FileName, + createError("output verification failed")); + + return Error::success(); + } + } + + // The file "FileName" was created by this utility in the previous steps + // (i.e. it is already known that it should pass the isObject check). + // If the createBinary() function does not return an error, the isObject + // check should also be successful. + llvm_unreachable( + formatv("tool unexpectedly did not emit a supported object file: '{0}'", + FileName) + .str() + .c_str()); +} + +class raw_crc_ostream : public raw_ostream { +public: + explicit raw_crc_ostream(raw_ostream &O) : OS(O) { SetUnbuffered(); } + + void reserveExtraSpace(uint64_t ExtraSize) override { + OS.reserveExtraSpace(ExtraSize); + } + + uint32_t getCRC32() { return CRC32; } + +protected: + raw_ostream &OS; + uint32_t CRC32 = 0; + + /// See raw_ostream::write_impl. + void write_impl(const char *Ptr, size_t Size) override { + CRC32 = crc32( + CRC32, ArrayRef<uint8_t>(reinterpret_cast<const uint8_t *>(Ptr), Size)); + OS.write(Ptr, Size); + } + + /// Return the current position within the stream, not counting the bytes + /// currently in the buffer. + uint64_t current_pos() const override { return OS.tell(); } +}; + +static Expected<uint32_t> saveSeparateDebugInfo(const Options &Opts, + ObjectFile &InputFile) { + objcopy::ConfigManager Config; + std::string OutputFilename = Opts.getSeparateDebugFileName(); + Config.Common.InputFilename = Opts.InputFileName; + Config.Common.OutputFilename = OutputFilename; + Config.Common.OnlyKeepDebug = true; + uint32_t WrittenFileCRC32 = 0; + + if (Error Err = writeToOutput( + Config.Common.OutputFilename, [&](raw_ostream &OutFile) -> Error { + raw_crc_ostream CRCBuffer(OutFile); + if (Error Err = objcopy::executeObjcopyOnBinary(Config, InputFile, + CRCBuffer)) + return Err; + + WrittenFileCRC32 = CRCBuffer.getCRC32(); + return Error::success(); + })) + return std::move(Err); + + return WrittenFileCRC32; +} + +static Error saveNonDebugInfo(const Options &Opts, ObjectFile &InputFile, + uint32_t GnuDebugLinkCRC32) { + objcopy::ConfigManager Config; + Config.Common.InputFilename = Opts.InputFileName; + Config.Common.OutputFilename = Opts.OutputFileName; + Config.Common.StripDebug = true; + std::string SeparateDebugFileName = Opts.getSeparateDebugFileName(); + Config.Common.AddGnuDebugLink = sys::path::filename(SeparateDebugFileName); + Config.Common.GnuDebugLinkCRC32 = GnuDebugLinkCRC32; + + if (Error Err = writeToOutput( + Config.Common.OutputFilename, [&](raw_ostream &OutFile) -> Error { + if (Error Err = + objcopy::executeObjcopyOnBinary(Config, InputFile, OutFile)) + return Err; + + return Error::success(); + })) + return Err; + + return Error::success(); +} + +static Error splitDebugIntoSeparateFile(const Options &Opts, + ObjectFile &InputFile) { + Expected<uint32_t> SeparateDebugFileCRC32OrErr = + saveSeparateDebugInfo(Opts, InputFile); + if (!SeparateDebugFileCRC32OrErr) + return SeparateDebugFileCRC32OrErr.takeError(); + + if (Error Err = + saveNonDebugInfo(Opts, InputFile, *SeparateDebugFileCRC32OrErr)) + return Err; + + return Error::success(); +} + +using DebugInfoBits = SmallString<10000>; + +static Error addSectionsFromLinkedData(objcopy::ConfigManager &Config, + ObjectFile &InputFile, + DebugInfoBits &LinkedDebugInfoBits) { + if (dyn_cast<ELFObjectFile<ELF32LE>>(&InputFile)) { + Expected<ELFObjectFile<ELF32LE>> MemFile = ELFObjectFile<ELF32LE>::create( + MemoryBufferRef(LinkedDebugInfoBits, "")); + if (!MemFile) + return MemFile.takeError(); + + if (Error Err = setConfigToAddNewDebugSections(Config, *MemFile)) + return Err; + } else if (dyn_cast<ELFObjectFile<ELF64LE>>(&InputFile)) { + Expected<ELFObjectFile<ELF64LE>> MemFile = ELFObjectFile<ELF64LE>::create( + MemoryBufferRef(LinkedDebugInfoBits, "")); + if (!MemFile) + return MemFile.takeError(); + + if (Error Err = setConfigToAddNewDebugSections(Config, *MemFile)) + return Err; + } else if (dyn_cast<ELFObjectFile<ELF32BE>>(&InputFile)) { + Expected<ELFObjectFile<ELF32BE>> MemFile = ELFObjectFile<ELF32BE>::create( + MemoryBufferRef(LinkedDebugInfoBits, "")); + if (!MemFile) + return MemFile.takeError(); + + if (Error Err = setConfigToAddNewDebugSections(Config, *MemFile)) + return Err; + } else if (dyn_cast<ELFObjectFile<ELF64BE>>(&InputFile)) { + Expected<ELFObjectFile<ELF64BE>> MemFile = ELFObjectFile<ELF64BE>::create( + MemoryBufferRef(LinkedDebugInfoBits, "")); + if (!MemFile) + return MemFile.takeError(); + + if (Error Err = setConfigToAddNewDebugSections(Config, *MemFile)) + return Err; + } else + return createStringError(std::errc::invalid_argument, + "unsupported file format"); + + return Error::success(); +} + +static Expected<uint32_t> +saveSeparateLinkedDebugInfo(const Options &Opts, ObjectFile &InputFile, + DebugInfoBits LinkedDebugInfoBits) { + objcopy::ConfigManager Config; + std::string OutputFilename = Opts.getSeparateDebugFileName(); + Config.Common.InputFilename = Opts.InputFileName; + Config.Common.OutputFilename = OutputFilename; + Config.Common.StripDebug = true; + Config.Common.OnlyKeepDebug = true; + uint32_t WrittenFileCRC32 = 0; + + if (Error Err = + addSectionsFromLinkedData(Config, InputFile, LinkedDebugInfoBits)) + return std::move(Err); + + if (Error Err = writeToOutput( + Config.Common.OutputFilename, [&](raw_ostream &OutFile) -> Error { + raw_crc_ostream CRCBuffer(OutFile); + + if (Error Err = objcopy::executeObjcopyOnBinary(Config, InputFile, + CRCBuffer)) + return Err; + + WrittenFileCRC32 = CRCBuffer.getCRC32(); + return Error::success(); + })) + return std::move(Err); + + return WrittenFileCRC32; +} + +static Error saveSingleLinkedDebugInfo(const Options &Opts, + ObjectFile &InputFile, + DebugInfoBits LinkedDebugInfoBits) { + objcopy::ConfigManager Config; + + Config.Common.InputFilename = Opts.InputFileName; + Config.Common.OutputFilename = Opts.OutputFileName; + Config.Common.StripDebug = true; + if (Error Err = + addSectionsFromLinkedData(Config, InputFile, LinkedDebugInfoBits)) + return Err; + + if (Error Err = writeToOutput( + Config.Common.OutputFilename, [&](raw_ostream &OutFile) -> Error { + return objcopy::executeObjcopyOnBinary(Config, InputFile, OutFile); + })) + return Err; + + return Error::success(); +} + +static Error saveLinkedDebugInfo(const Options &Opts, ObjectFile &InputFile, + DebugInfoBits LinkedDebugInfoBits) { + if (Opts.BuildSeparateDebugFile) { + Expected<uint32_t> SeparateDebugFileCRC32OrErr = + saveSeparateLinkedDebugInfo(Opts, InputFile, + std::move(LinkedDebugInfoBits)); + if (!SeparateDebugFileCRC32OrErr) + return SeparateDebugFileCRC32OrErr.takeError(); + + if (Error Err = + saveNonDebugInfo(Opts, InputFile, *SeparateDebugFileCRC32OrErr)) + return Err; + } else { + if (Error Err = saveSingleLinkedDebugInfo(Opts, InputFile, + std::move(LinkedDebugInfoBits))) + return Err; + } + + return Error::success(); +} + +static Error saveCopyOfFile(const Options &Opts, ObjectFile &InputFile) { + objcopy::ConfigManager Config; + + Config.Common.InputFilename = Opts.InputFileName; + Config.Common.OutputFilename = Opts.OutputFileName; + + if (Error Err = writeToOutput( + Config.Common.OutputFilename, [&](raw_ostream &OutFile) -> Error { + return objcopy::executeObjcopyOnBinary(Config, InputFile, OutFile); + })) + return Err; + + return Error::success(); +} + +static Error applyCLOptions(const struct Options &Opts, ObjectFile &InputFile) { + if (Opts.DoGarbageCollection) { + verbose("Do garbage collection for debug info ...", Opts.Verbose); + + DebugInfoBits LinkedDebugInfo; + raw_svector_ostream OutStream(LinkedDebugInfo); + + if (linkDebugInfo(InputFile, Opts, OutStream)) { + if (Error Err = + saveLinkedDebugInfo(Opts, InputFile, std::move(LinkedDebugInfo))) + return Err; + + return Error::success(); + } + + return createStringError(std::errc::invalid_argument, + "possible broken debug info"); + } else if (Opts.BuildSeparateDebugFile) { + if (Error Err = splitDebugIntoSeparateFile(Opts, InputFile)) + return Err; + } else { + if (Error Err = saveCopyOfFile(Opts, InputFile)) + return Err; + } + + return Error::success(); +} + +} // end of namespace dwarfutil +} // end of namespace llvm + +int main(int Argc, char const *Argv[]) { + using namespace dwarfutil; + + InitLLVM X(Argc, Argv); + ToolName = Argv[0]; + + // Parse arguments. + DwarfutilOptTable T; + unsigned MAI; + unsigned MAC; + ArrayRef<const char *> ArgsArr = makeArrayRef(Argv + 1, Argc - 1); + opt::InputArgList Args = T.ParseArgs(ArgsArr, MAI, MAC); + + if (Args.hasArg(OPT_help) || Args.size() == 0) { + T.printHelp( + outs(), (ToolName + " [options] <input file> <output file>").c_str(), + "llvm-dwarfutil is a tool to copy and manipulate debug info", false); + return EXIT_SUCCESS; + } + + if (Args.hasArg(OPT_version)) { + cl::PrintVersionMessage(); + return EXIT_SUCCESS; + } + + Options Opts; + if (Error Err = validateAndSetOptions(Args, Opts)) + error(std::move(Err), dwarfutil::ToolName); + + InitializeAllTargets(); + InitializeAllTargetMCs(); + InitializeAllTargetInfos(); + InitializeAllAsmPrinters(); + InitializeAllAsmParsers(); + + ErrorOr<std::unique_ptr<MemoryBuffer>> BuffOrErr = + MemoryBuffer::getFileOrSTDIN(Opts.InputFileName); + if (BuffOrErr.getError()) + error(createFileError(Opts.InputFileName, BuffOrErr.getError())); + + Expected<std::unique_ptr<Binary>> BinOrErr = + object::createBinary(**BuffOrErr); + if (!BinOrErr) + error(createFileError(Opts.InputFileName, BinOrErr.takeError())); + + Expected<FilePermissionsApplier> PermsApplierOrErr = + FilePermissionsApplier::create(Opts.InputFileName); + if (!PermsApplierOrErr) + error(createFileError(Opts.InputFileName, PermsApplierOrErr.takeError())); + + if (!(*BinOrErr)->isObject()) + error(createFileError(Opts.InputFileName, + createError("unsupported input file"))); + + if (Error Err = + applyCLOptions(Opts, *static_cast<ObjectFile *>((*BinOrErr).get()))) + error(createFileError(Opts.InputFileName, std::move(Err))); + + BinOrErr->reset(); + BuffOrErr->reset(); + + if (Error Err = PermsApplierOrErr->apply(Opts.OutputFileName)) + error(std::move(Err)); + + if (Opts.BuildSeparateDebugFile) + if (Error Err = PermsApplierOrErr->apply(Opts.getSeparateDebugFileName())) + error(std::move(Err)); + + if (Opts.Verify) { + if (Error Err = verifyOutput(Opts)) + error(std::move(Err)); + } + + return EXIT_SUCCESS; +} diff --git a/contrib/llvm-project/llvm/tools/llvm-mc/llvm-mc.cpp b/contrib/llvm-project/llvm/tools/llvm-mc/llvm-mc.cpp index 3e737b9fbaa0..aa380d3fe9bc 100644 --- a/contrib/llvm-project/llvm/tools/llvm-mc/llvm-mc.cpp +++ b/contrib/llvm-project/llvm/tools/llvm-mc/llvm-mc.cpp @@ -77,9 +77,7 @@ static cl::opt<DebugCompressionType> CompressDebugSections( cl::desc("Choose DWARF debug sections compression:"), cl::values(clEnumValN(DebugCompressionType::None, "none", "No compression"), clEnumValN(DebugCompressionType::Z, "zlib", - "Use zlib compression"), - clEnumValN(DebugCompressionType::GNU, "zlib-gnu", - "Use zlib-gnu compression (deprecated)")), + "Use zlib compression")), cl::cat(MCCategory)); static cl::opt<bool> diff --git a/contrib/llvm-project/llvm/tools/llvm-objdump/llvm-objdump.cpp b/contrib/llvm-project/llvm/tools/llvm-objdump/llvm-objdump.cpp index 1245f9e18206..9e4fa7c0d9dd 100644 --- a/contrib/llvm-project/llvm/tools/llvm-objdump/llvm-objdump.cpp +++ b/contrib/llvm-project/llvm/tools/llvm-objdump/llvm-objdump.cpp @@ -1131,7 +1131,21 @@ static void emitPostInstructionInfo(formatted_raw_ostream &FOS, FOS.flush(); } -static void disassembleObject(const Target *TheTarget, const ObjectFile &Obj, +static void createFakeELFSections(ObjectFile &Obj) { + assert(Obj.isELF()); + if (auto *Elf32LEObj = dyn_cast<ELF32LEObjectFile>(&Obj)) + Elf32LEObj->createFakeSections(); + else if (auto *Elf64LEObj = dyn_cast<ELF64LEObjectFile>(&Obj)) + Elf64LEObj->createFakeSections(); + else if (auto *Elf32BEObj = dyn_cast<ELF32BEObjectFile>(&Obj)) + Elf32BEObj->createFakeSections(); + else if (auto *Elf64BEObj = cast<ELF64BEObjectFile>(&Obj)) + Elf64BEObj->createFakeSections(); + else + llvm_unreachable("Unsupported binary format"); +} + +static void disassembleObject(const Target *TheTarget, ObjectFile &Obj, MCContext &Ctx, MCDisassembler *PrimaryDisAsm, MCDisassembler *SecondaryDisAsm, const MCInstrAnalysis *MIA, MCInstPrinter *IP, @@ -1198,6 +1212,9 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile &Obj, if (Obj.isWasm()) addMissingWasmCodeSymbols(cast<WasmObjectFile>(Obj), AllSymbols); + if (Obj.isELF() && Obj.sections().empty()) + createFakeELFSections(Obj); + BumpPtrAllocator A; StringSaver Saver(A); addPltEntries(Obj, AllSymbols, Saver); @@ -1261,6 +1278,25 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile &Obj, LLVM_DEBUG(LVP.dump()); + std::unordered_map<uint64_t, BBAddrMap> AddrToBBAddrMap; + auto ReadBBAddrMap = [&](Optional<unsigned> SectionIndex = None) { + AddrToBBAddrMap.clear(); + if (const auto *Elf = dyn_cast<ELFObjectFileBase>(&Obj)) { + auto BBAddrMapsOrErr = Elf->readBBAddrMap(SectionIndex); + if (!BBAddrMapsOrErr) + reportWarning(toString(BBAddrMapsOrErr.takeError()), + Obj.getFileName()); + for (auto &FunctionBBAddrMap : *BBAddrMapsOrErr) + AddrToBBAddrMap.emplace(FunctionBBAddrMap.Addr, + std::move(FunctionBBAddrMap)); + } + }; + + // For non-relocatable objects, Read all LLVM_BB_ADDR_MAP sections into a + // single mapping, since they don't have any conflicts. + if (SymbolizeOperands && !Obj.isRelocatableObject()) + ReadBBAddrMap(); + for (const SectionRef &Section : ToolSectionFilter(Obj)) { if (FilterSections.empty() && !DisassembleAll && (!Section.isText() || Section.isVirtual())) @@ -1271,19 +1307,10 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile &Obj, if (!SectSize) continue; - std::unordered_map<uint64_t, BBAddrMap> AddrToBBAddrMap; - if (SymbolizeOperands) { - if (auto *Elf = dyn_cast<ELFObjectFileBase>(&Obj)) { - // Read the BB-address-map corresponding to this section, if present. - auto SectionBBAddrMapsOrErr = Elf->readBBAddrMap(Section.getIndex()); - if (!SectionBBAddrMapsOrErr) - reportWarning(toString(SectionBBAddrMapsOrErr.takeError()), - Obj.getFileName()); - for (auto &FunctionBBAddrMap : *SectionBBAddrMapsOrErr) - AddrToBBAddrMap.emplace(FunctionBBAddrMap.Addr, - std::move(FunctionBBAddrMap)); - } - } + // For relocatable object files, read the LLVM_BB_ADDR_MAP section + // corresponding to this section, if present. + if (SymbolizeOperands && Obj.isRelocatableObject()) + ReadBBAddrMap(Section.getIndex()); // Get the list of all the symbols in this section. SectionSymbolsTy &Symbols = AllSymbols[Section]; @@ -1688,7 +1715,7 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile &Obj, reportWarning("failed to disassemble missing symbol " + Sym, FileName); } -static void disassembleObject(const ObjectFile *Obj, bool InlineRelocs) { +static void disassembleObject(ObjectFile *Obj, bool InlineRelocs) { const Target *TheTarget = getTarget(Obj); // Package up features to be passed to target/subtarget @@ -1890,7 +1917,7 @@ static size_t getMaxSectionNameWidth(const ObjectFile &Obj) { return MaxWidth; } -void objdump::printSectionHeaders(const ObjectFile &Obj) { +void objdump::printSectionHeaders(ObjectFile &Obj) { size_t NameWidth = getMaxSectionNameWidth(Obj); size_t AddressWidth = 2 * Obj.getBytesInAddress(); bool HasLMAColumn = shouldDisplayLMA(Obj); @@ -1903,6 +1930,9 @@ void objdump::printSectionHeaders(const ObjectFile &Obj) { outs() << "Idx " << left_justify("Name", NameWidth) << " Size " << left_justify("VMA", AddressWidth) << " Type\n"; + if (Obj.isELF() && Obj.sections().empty()) + createFakeELFSections(Obj); + uint64_t Idx; for (const SectionRef &Section : ToolSectionFilter(Obj, &Idx)) { StringRef Name = unwrapOrError(Section.getName(), Obj.getFileName()); diff --git a/contrib/llvm-project/llvm/tools/llvm-objdump/llvm-objdump.h b/contrib/llvm-project/llvm/tools/llvm-objdump/llvm-objdump.h index dd9f58aa3308..c64c042d513e 100644 --- a/contrib/llvm-project/llvm/tools/llvm-objdump/llvm-objdump.h +++ b/contrib/llvm-project/llvm/tools/llvm-objdump/llvm-objdump.h @@ -124,7 +124,7 @@ SectionFilter ToolSectionFilter(llvm::object::ObjectFile const &O, bool isRelocAddressLess(object::RelocationRef A, object::RelocationRef B); void printRelocations(const object::ObjectFile *O); void printDynamicRelocations(const object::ObjectFile *O); -void printSectionHeaders(const object::ObjectFile &O); +void printSectionHeaders(object::ObjectFile &O); void printSectionContents(const object::ObjectFile *O); void printSymbolTable(const object::ObjectFile &O, StringRef ArchiveName, StringRef ArchitectureName = StringRef(), diff --git a/contrib/llvm-project/llvm/tools/llvm-profdata/llvm-profdata.cpp b/contrib/llvm-project/llvm/tools/llvm-profdata/llvm-profdata.cpp index 9c6586483ef0..0c23d7c1435f 100644 --- a/contrib/llvm-project/llvm/tools/llvm-profdata/llvm-profdata.cpp +++ b/contrib/llvm-project/llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -2471,9 +2471,10 @@ static int showHotFunctionList(const sampleprof::SampleProfileMap &Profiles, (ProfileTotalSample > 0) ? (Func.getTotalSamples() * 100.0) / ProfileTotalSample : 0; - PrintValues.emplace_back(HotFuncInfo( - Func.getContext().toString(), Func.getTotalSamples(), - TotalSamplePercent, FuncPair.second.second, Func.getEntrySamples())); + PrintValues.emplace_back( + HotFuncInfo(Func.getContext().toString(), Func.getTotalSamples(), + TotalSamplePercent, FuncPair.second.second, + Func.getHeadSamplesEstimate())); } dumpHotFunctionList(ColumnTitle, ColumnOffset, PrintValues, HotFuncCount, Profiles.size(), HotFuncSample, ProfileTotalSample, diff --git a/contrib/llvm-project/llvm/tools/llvm-size/llvm-size.cpp b/contrib/llvm-project/llvm/tools/llvm-size/llvm-size.cpp index ec9a4cde56b6..1c7484ba5496 100644 --- a/contrib/llvm-project/llvm/tools/llvm-size/llvm-size.cpp +++ b/contrib/llvm-project/llvm/tools/llvm-size/llvm-size.cpp @@ -868,8 +868,11 @@ int main(int argc, char **argv) { StringSaver Saver(A); SizeOptTable Tbl; ToolName = argv[0]; - opt::InputArgList Args = Tbl.parseArgs(argc, argv, OPT_UNKNOWN, Saver, - [&](StringRef Msg) { error(Msg); }); + opt::InputArgList Args = + Tbl.parseArgs(argc, argv, OPT_UNKNOWN, Saver, [&](StringRef Msg) { + error(Msg); + exit(1); + }); if (Args.hasArg(OPT_help)) { Tbl.printHelp( outs(), diff --git a/contrib/llvm-project/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp b/contrib/llvm-project/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp index b782c7a1720a..7ec70e42f1c1 100644 --- a/contrib/llvm-project/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp +++ b/contrib/llvm-project/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp @@ -365,20 +365,15 @@ static SmallVector<uint8_t> parseBuildIDArg(const opt::InputArgList &Args, return BuildID; } -// Symbolize the markup from stdin and write the result to stdout. +// Symbolize markup from stdin and write the result to stdout. static void filterMarkup(const opt::InputArgList &Args) { - MarkupParser Parser; MarkupFilter Filter(outs(), parseColorArg(Args)); - for (std::string InputString; std::getline(std::cin, InputString);) { + std::string InputString; + while (std::getline(std::cin, InputString)) { InputString += '\n'; - Parser.parseLine(InputString); - Filter.beginLine(InputString); - while (Optional<MarkupNode> Element = Parser.nextNode()) - Filter.filter(*Element); + Filter.filter(InputString); } - Parser.flush(); - while (Optional<MarkupNode> Element = Parser.nextNode()) - Filter.filter(*Element); + Filter.finish(); } ExitOnError ExitOnErr; diff --git a/contrib/llvm-project/llvm/tools/opt/opt.cpp b/contrib/llvm-project/llvm/tools/opt/opt.cpp index 1160412e37af..a02997f82bb3 100644 --- a/contrib/llvm-project/llvm/tools/opt/opt.cpp +++ b/contrib/llvm-project/llvm/tools/opt/opt.cpp @@ -352,32 +352,6 @@ static void AddOptimizationPasses(legacy::PassManagerBase &MPM, if (TM) TM->adjustPassManager(Builder); - switch (PGOKindFlag) { - case InstrGen: - Builder.EnablePGOInstrGen = true; - Builder.PGOInstrGen = ProfileFile; - break; - case InstrUse: - Builder.PGOInstrUse = ProfileFile; - break; - case SampleUse: - Builder.PGOSampleUse = ProfileFile; - break; - default: - break; - } - - switch (CSPGOKindFlag) { - case CSInstrGen: - Builder.EnablePGOCSInstrGen = true; - break; - case CSInstrUse: - Builder.EnablePGOCSInstrUse = true; - break; - default: - break; - } - Builder.populateFunctionPassManager(FPM); Builder.populateModulePassManager(MPM); } @@ -545,8 +519,6 @@ int main(int argc, char **argv) { initializeIndirectBrExpandPassPass(Registry); initializeInterleavedLoadCombinePass(Registry); initializeInterleavedAccessPass(Registry); - initializeEntryExitInstrumenterPass(Registry); - initializePostInlineEntryExitInstrumenterPass(Registry); initializeUnreachableBlockElimLegacyPassPass(Registry); initializeExpandReductionsPass(Registry); initializeExpandVectorPredicationPass(Registry); diff --git a/contrib/llvm-project/llvm/utils/TableGen/CTagsEmitter.cpp b/contrib/llvm-project/llvm/utils/TableGen/CTagsEmitter.cpp index ccb7f3300dde..fe62d6a9b67f 100644 --- a/contrib/llvm-project/llvm/utils/TableGen/CTagsEmitter.cpp +++ b/contrib/llvm-project/llvm/utils/TableGen/CTagsEmitter.cpp @@ -27,18 +27,22 @@ namespace { class Tag { private: - const std::string *Id; - SMLoc Loc; + StringRef Id; + StringRef BufferIdentifier; + unsigned Line; public: - Tag(const std::string &Name, const SMLoc Location) - : Id(&Name), Loc(Location) {} - int operator<(const Tag &B) const { return *Id < *B.Id; } - void emit(raw_ostream &OS) const { + Tag(StringRef Name, const SMLoc Location) : Id(Name) { const MemoryBuffer *CurMB = - SrcMgr.getMemoryBuffer(SrcMgr.FindBufferContainingLoc(Loc)); - auto BufferName = CurMB->getBufferIdentifier(); - std::pair<unsigned, unsigned> LineAndColumn = SrcMgr.getLineAndColumn(Loc); - OS << *Id << "\t" << BufferName << "\t" << LineAndColumn.first << "\n"; + SrcMgr.getMemoryBuffer(SrcMgr.FindBufferContainingLoc(Location)); + BufferIdentifier = CurMB->getBufferIdentifier(); + auto LineAndColumn = SrcMgr.getLineAndColumn(Location); + Line = LineAndColumn.first; + } + int operator<(const Tag &B) const { + return std::make_tuple(Id, BufferIdentifier, Line) < std::make_tuple(B.Id, B.BufferIdentifier, B.Line); + } + void emit(raw_ostream &OS) const { + OS << Id << "\t" << BufferIdentifier << "\t" << Line << "\n"; } }; @@ -67,8 +71,11 @@ void CTagsEmitter::run(raw_ostream &OS) { std::vector<Tag> Tags; // Collect tags. Tags.reserve(Classes.size() + Defs.size()); - for (const auto &C : Classes) + for (const auto &C : Classes) { Tags.push_back(Tag(C.first, locate(C.second.get()))); + for (SMLoc FwdLoc : C.second->getForwardDeclarationLocs()) + Tags.push_back(Tag(C.first, FwdLoc)); + } for (const auto &D : Defs) Tags.push_back(Tag(D.first, locate(D.second.get()))); // Emit tags. diff --git a/contrib/llvm-project/llvm/utils/TableGen/CallingConvEmitter.cpp b/contrib/llvm-project/llvm/utils/TableGen/CallingConvEmitter.cpp index 8f080cd250ab..e8ec90e9c078 100644 --- a/contrib/llvm-project/llvm/utils/TableGen/CallingConvEmitter.cpp +++ b/contrib/llvm-project/llvm/utils/TableGen/CallingConvEmitter.cpp @@ -149,7 +149,8 @@ void CallingConvEmitter::EmitAction(Record *Action, << "(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))\n" << IndentStr << " return false;\n"; DelegateToMap[CurrentAction].insert(CC->getName().str()); - } else if (Action->isSubClassOf("CCAssignToReg")) { + } else if (Action->isSubClassOf("CCAssignToReg") || + Action->isSubClassOf("CCAssignToRegAndStack")) { ListInit *RegList = Action->getValueAsListInit("RegList"); if (RegList->size() == 1) { std::string Name = getQualifiedName(RegList->getElementAsRecord(0)); @@ -178,6 +179,28 @@ void CallingConvEmitter::EmitAction(Record *Action, } O << IndentStr << " State.addLoc(CCValAssign::getReg(ValNo, ValVT, " << "Reg, LocVT, LocInfo));\n"; + if (Action->isSubClassOf("CCAssignToRegAndStack")) { + int Size = Action->getValueAsInt("Size"); + int Align = Action->getValueAsInt("Align"); + O << IndentStr << " (void)State.AllocateStack("; + if (Size) + O << Size << ", "; + else + O << "\n" + << IndentStr + << " State.getMachineFunction().getDataLayout()." + "getTypeAllocSize(EVT(LocVT).getTypeForEVT(State.getContext()))," + " "; + if (Align) + O << "Align(" << Align << ")"; + else + O << "\n" + << IndentStr + << " State.getMachineFunction().getDataLayout()." + "getABITypeAlign(EVT(LocVT).getTypeForEVT(State.getContext()" + "))"; + O << ");\n"; + } O << IndentStr << " return false;\n"; O << IndentStr << "}\n"; } else if (Action->isSubClassOf("CCAssignToRegWithShadow")) { diff --git a/contrib/llvm-project/llvm/utils/TableGen/DXILEmitter.cpp b/contrib/llvm-project/llvm/utils/TableGen/DXILEmitter.cpp index fd58e798b445..b9c563c62bbe 100644 --- a/contrib/llvm-project/llvm/utils/TableGen/DXILEmitter.cpp +++ b/contrib/llvm-project/llvm/utils/TableGen/DXILEmitter.cpp @@ -122,15 +122,14 @@ static std::string buildCategoryStr(StringSet<> &Cetegorys) { static void emitDXILEnums(std::vector<DXILOperationData> &DXILOps, raw_ostream &OS) { // Sort by Category + OpName. - std::sort(DXILOps.begin(), DXILOps.end(), - [](DXILOperationData &A, DXILOperationData &B) { - // Group by Category first. - if (A.Category == B.Category) - // Inside same Category, order by OpName. - return A.DXILOp < B.DXILOp; - else - return A.Category < B.Category; - }); + llvm::sort(DXILOps, [](DXILOperationData &A, DXILOperationData &B) { + // Group by Category first. + if (A.Category == B.Category) + // Inside same Category, order by OpName. + return A.DXILOp < B.DXILOp; + else + return A.Category < B.Category; + }); OS << "// Enumeration for operations specified by DXIL\n"; OS << "enum class OpCode : unsigned {\n"; @@ -160,20 +159,19 @@ static void emitDXILEnums(std::vector<DXILOperationData> &DXILOps, std::make_pair(It.getKey().str(), buildCategoryStr(It.second))); } // Sort by Category + ClassName. - std::sort(ClassVec.begin(), ClassVec.end(), - [](std::pair<std::string, std::string> &A, - std::pair<std::string, std::string> &B) { - StringRef ClassA = A.first; - StringRef CategoryA = A.second; - StringRef ClassB = B.first; - StringRef CategoryB = B.second; - // Group by Category first. - if (CategoryA == CategoryB) - // Inside same Category, order by ClassName. - return ClassA < ClassB; - else - return CategoryA < CategoryB; - }); + llvm::sort(ClassVec, [](std::pair<std::string, std::string> &A, + std::pair<std::string, std::string> &B) { + StringRef ClassA = A.first; + StringRef CategoryA = A.second; + StringRef ClassB = B.first; + StringRef CategoryB = B.second; + // Group by Category first. + if (CategoryA == CategoryB) + // Inside same Category, order by ClassName. + return ClassA < ClassB; + else + return CategoryA < CategoryB; + }); OS << "// Groups for DXIL operations with equivalent function templates\n"; OS << "enum class OpCodeClass : unsigned {\n"; @@ -266,10 +264,9 @@ static std::string getDXILOpClassName(StringRef DXILOpClass) { static void emitDXILOperationTable(std::vector<DXILOperationData> &DXILOps, raw_ostream &OS) { // Sort by DXILOpID. - std::sort(DXILOps.begin(), DXILOps.end(), - [](DXILOperationData &A, DXILOperationData &B) { - return A.DXILOpID < B.DXILOpID; - }); + llvm::sort(DXILOps, [](DXILOperationData &A, DXILOperationData &B) { + return A.DXILOpID < B.DXILOpID; + }); // Collect Names. SequenceToOffsetTable<std::string> OpClassStrings; diff --git a/contrib/llvm-project/llvm/utils/TableGen/DirectiveEmitter.cpp b/contrib/llvm-project/llvm/utils/TableGen/DirectiveEmitter.cpp index f3751591f3d9..f32fbe3e25cd 100644 --- a/contrib/llvm-project/llvm/utils/TableGen/DirectiveEmitter.cpp +++ b/contrib/llvm-project/llvm/utils/TableGen/DirectiveEmitter.cpp @@ -15,6 +15,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringSet.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/TableGen/Error.h" #include "llvm/TableGen/Record.h" @@ -668,6 +669,85 @@ void GenerateFlangClauseParserKindMap(const DirectiveLanguage &DirLang, << " Parser clause\");\n"; } +bool compareClauseName(Record *R1, Record *R2) { + Clause C1{R1}; + Clause C2{R2}; + return (C1.getName() > C2.getName()); +} + +// Generate the parser for the clauses. +void GenerateFlangClausesParser(const DirectiveLanguage &DirLang, + raw_ostream &OS) { + std::vector<Record *> Clauses = DirLang.getClauses(); + // Sort clauses in reverse alphabetical order so with clauses with same + // beginning, the longer option is tried before. + llvm::sort(Clauses, compareClauseName); + IfDefScope Scope("GEN_FLANG_CLAUSES_PARSER", OS); + OS << "\n"; + unsigned index = 0; + unsigned lastClauseIndex = DirLang.getClauses().size() - 1; + OS << "TYPE_PARSER(\n"; + for (const auto &C : Clauses) { + Clause Clause{C}; + if (Clause.getAliases().empty()) { + OS << " \"" << Clause.getName() << "\""; + } else { + OS << " (" + << "\"" << Clause.getName() << "\"_tok"; + for (StringRef alias : Clause.getAliases()) { + OS << " || \"" << alias << "\"_tok"; + } + OS << ")"; + } + + OS << " >> construct<" << DirLang.getFlangClauseBaseClass() + << ">(construct<" << DirLang.getFlangClauseBaseClass() + << "::" << Clause.getFormattedParserClassName() << ">("; + if (Clause.getFlangClass().empty()) { + OS << "))"; + if (index != lastClauseIndex) + OS << " ||"; + OS << "\n"; + ++index; + continue; + } + + if (Clause.isValueOptional()) + OS << "maybe("; + OS << "parenthesized("; + + if (!Clause.getPrefix().empty()) + OS << "\"" << Clause.getPrefix() << ":\" >> "; + + // The common Flang parser are used directly. Their name is identical to + // the Flang class with first letter as lowercase. If the Flang class is + // not a common class, we assume there is a specific Parser<>{} with the + // Flang class name provided. + llvm::SmallString<128> Scratch; + StringRef Parser = + llvm::StringSwitch<StringRef>(Clause.getFlangClass()) + .Case("Name", "name") + .Case("ScalarIntConstantExpr", "scalarIntConstantExpr") + .Case("ScalarIntExpr", "scalarIntExpr") + .Case("ScalarLogicalExpr", "scalarLogicalExpr") + .Default(("Parser<" + Clause.getFlangClass() + ">{}") + .toStringRef(Scratch)); + OS << Parser; + if (!Clause.getPrefix().empty() && Clause.isPrefixOptional()) + OS << " || " << Parser; + OS << ")"; // close parenthesized(. + + if (Clause.isValueOptional()) // close maybe(. + OS << ")"; + OS << "))"; + if (index != lastClauseIndex) + OS << " ||"; + OS << "\n"; + ++index; + } + OS << ")\n"; +} + // Generate the implementation section for the enumeration in the directive // language void EmitDirectivesFlangImpl(const DirectiveLanguage &DirLang, @@ -688,6 +768,8 @@ void EmitDirectivesFlangImpl(const DirectiveLanguage &DirLang, GenerateFlangClauseCheckPrototypes(DirLang, OS); GenerateFlangClauseParserKindMap(DirLang, OS); + + GenerateFlangClausesParser(DirLang, OS); } void GenerateClauseClassMacro(const DirectiveLanguage &DirLang, diff --git a/contrib/llvm-project/llvm/utils/TableGen/SearchableTableEmitter.cpp b/contrib/llvm-project/llvm/utils/TableGen/SearchableTableEmitter.cpp index ea849807de03..327c53e93a41 100644 --- a/contrib/llvm-project/llvm/utils/TableGen/SearchableTableEmitter.cpp +++ b/contrib/llvm-project/llvm/utils/TableGen/SearchableTableEmitter.cpp @@ -15,6 +15,7 @@ #include "CodeGenIntrinsics.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/TableGen/Error.h" #include "llvm/TableGen/Record.h" @@ -650,8 +651,9 @@ void SearchableTableEmitter::collectTableEntries( SearchIndex Idx; std::copy(Table.Fields.begin(), Table.Fields.end(), std::back_inserter(Idx.Fields)); - std::sort(Table.Entries.begin(), Table.Entries.end(), - [&](Record *LHS, Record *RHS) { return compareBy(LHS, RHS, Idx); }); + llvm::sort(Table.Entries, [&](Record *LHS, Record *RHS) { + return compareBy(LHS, RHS, Idx); + }); } void SearchableTableEmitter::run(raw_ostream &OS) { |