diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2021-12-02 21:02:54 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2021-12-02 21:02:54 +0000 |
commit | f65dcba83ce5035ab88a85fe17628b447eb56e1b (patch) | |
tree | 35f37bb72b3cfc6060193e66c76ee7c9478969b0 | |
parent | 846a2208a8ab099f595fe7e8b2e6d54a7b5e67fb (diff) |
Vendor import of llvm-project main llvmorg-14-init-11187-g222442ec2d71.vendor/llvm-project/llvmorg-14-init-11187-g222442ec2d71
743 files changed, 23440 insertions, 13594 deletions
diff --git a/clang/include/clang/AST/Decl.h b/clang/include/clang/AST/Decl.h index 85a3a8ab6970..2eacf1105c18 100644 --- a/clang/include/clang/AST/Decl.h +++ b/clang/include/clang/AST/Decl.h @@ -1840,7 +1840,8 @@ enum class MultiVersionKind { None, Target, CPUSpecific, - CPUDispatch + CPUDispatch, + TargetClones }; /// Represents a function declaration or definition. @@ -2459,6 +2460,10 @@ public: /// the target functionality. bool isTargetMultiVersion() const; + /// True if this function is a multiversioned dispatch function as a part of + /// the target-clones functionality. + bool isTargetClonesMultiVersion() const; + /// \brief Get the associated-constraints of this function declaration. /// Currently, this will either be a vector of size 1 containing the /// trailing-requires-clause or an empty vector. diff --git a/clang/include/clang/AST/DeclBase.h b/clang/include/clang/AST/DeclBase.h index 18468c8ca1c4..2a0a19597391 100644 --- a/clang/include/clang/AST/DeclBase.h +++ b/clang/include/clang/AST/DeclBase.h @@ -352,7 +352,7 @@ protected: DeclContext *Parent, std::size_t Extra = 0); private: - bool AccessDeclContextSanity() const; + bool AccessDeclContextCheck() const; /// Get the module ownership kind to use for a local lexical child of \p DC, /// which may be either a local or (rarely) an imported declaration. @@ -472,11 +472,11 @@ public: void setAccess(AccessSpecifier AS) { Access = AS; - assert(AccessDeclContextSanity()); + assert(AccessDeclContextCheck()); } AccessSpecifier getAccess() const { - assert(AccessDeclContextSanity()); + assert(AccessDeclContextCheck()); return AccessSpecifier(Access); } diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h index 991abef73363..2c63406fba18 100644 --- a/clang/include/clang/AST/Expr.h +++ b/clang/include/clang/AST/Expr.h @@ -6305,8 +6305,10 @@ public: bool isCmpXChg() const { return getOp() == AO__c11_atomic_compare_exchange_strong || getOp() == AO__c11_atomic_compare_exchange_weak || + getOp() == AO__hip_atomic_compare_exchange_strong || getOp() == AO__opencl_atomic_compare_exchange_strong || getOp() == AO__opencl_atomic_compare_exchange_weak || + getOp() == AO__hip_atomic_compare_exchange_weak || getOp() == AO__atomic_compare_exchange || getOp() == AO__atomic_compare_exchange_n; } @@ -6341,6 +6343,8 @@ public: auto Kind = (Op >= AO__opencl_atomic_load && Op <= AO__opencl_atomic_fetch_max) ? AtomicScopeModelKind::OpenCL + : (Op >= AO__hip_atomic_load && Op <= AO__hip_atomic_fetch_max) + ? AtomicScopeModelKind::HIP : AtomicScopeModelKind::None; return AtomicScopeModel::create(Kind); } diff --git a/clang/include/clang/AST/GlobalDecl.h b/clang/include/clang/AST/GlobalDecl.h index 8cb56fb4ae90..88abba28c991 100644 --- a/clang/include/clang/AST/GlobalDecl.h +++ b/clang/include/clang/AST/GlobalDecl.h @@ -18,6 +18,7 @@ #include "clang/AST/DeclCXX.h" #include "clang/AST/DeclObjC.h" #include "clang/AST/DeclOpenMP.h" +#include "clang/AST/DeclTemplate.h" #include "clang/Basic/ABI.h" #include "clang/Basic/LLVM.h" #include "llvm/ADT/DenseMapInfo.h" @@ -129,8 +130,12 @@ public: } KernelReferenceKind getKernelReferenceKind() const { - assert(isa<FunctionDecl>(getDecl()) && - cast<FunctionDecl>(getDecl())->hasAttr<CUDAGlobalAttr>() && + assert(((isa<FunctionDecl>(getDecl()) && + cast<FunctionDecl>(getDecl())->hasAttr<CUDAGlobalAttr>()) || + (isa<FunctionTemplateDecl>(getDecl()) && + cast<FunctionTemplateDecl>(getDecl()) + ->getTemplatedDecl() + ->hasAttr<CUDAGlobalAttr>())) && "Decl is not a GPU kernel!"); return static_cast<KernelReferenceKind>(Value.getInt()); } diff --git a/clang/include/clang/ASTMatchers/ASTMatchers.h b/clang/include/clang/ASTMatchers/ASTMatchers.h index d6e5b215462b..5221d05477d0 100644 --- a/clang/include/clang/ASTMatchers/ASTMatchers.h +++ b/clang/include/clang/ASTMatchers/ASTMatchers.h @@ -4227,8 +4227,8 @@ AST_MATCHER(VarDecl, isInitCapture) { return Node.isInitCapture(); } /// lambdaExpr(forEachLambdaCapture( /// lambdaCapture(capturesVar(varDecl(hasType(isInteger())))))) /// will trigger two matches, binding for 'x' and 'y' respectively. -AST_MATCHER_P(LambdaExpr, forEachLambdaCapture, LambdaCaptureMatcher, - InnerMatcher) { +AST_MATCHER_P(LambdaExpr, forEachLambdaCapture, + internal::Matcher<LambdaCapture>, InnerMatcher) { BoundNodesTreeBuilder Result; bool Matched = false; for (const auto &Capture : Node.captures()) { @@ -4655,7 +4655,8 @@ extern const internal::VariadicAllOfMatcher<LambdaCapture> lambdaCapture; /// lambdaExpr(hasAnyCapture(lambdaCapture())) and /// lambdaExpr(hasAnyCapture(lambdaCapture(refersToVarDecl(hasName("t"))))) /// both match `[=](){ return t; }`. -AST_MATCHER_P(LambdaExpr, hasAnyCapture, LambdaCaptureMatcher, InnerMatcher) { +AST_MATCHER_P(LambdaExpr, hasAnyCapture, internal::Matcher<LambdaCapture>, + InnerMatcher) { for (const LambdaCapture &Capture : Node.captures()) { clang::ast_matchers::internal::BoundNodesTreeBuilder Result(*Builder); if (InnerMatcher.matches(Capture, Finder, &Result)) { diff --git a/clang/include/clang/Analysis/CFG.h b/clang/include/clang/Analysis/CFG.h index 3b9b22e87f35..b8e453fcc235 100644 --- a/clang/include/clang/Analysis/CFG.h +++ b/clang/include/clang/Analysis/CFG.h @@ -515,7 +515,7 @@ public: /// of the most derived class while we're in the base class. VirtualBaseBranch, - /// Number of different kinds, for validity checks. We subtract 1 so that + /// Number of different kinds, for assertions. We subtract 1 so that /// to keep receiving compiler warnings when we don't cover all enum values /// in a switch. NumKindsMinusOne = VirtualBaseBranch diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysis.h b/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysis.h new file mode 100644 index 000000000000..a5d4a5d6ba40 --- /dev/null +++ b/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysis.h @@ -0,0 +1,134 @@ +//===- DataflowAnalysis.h ---------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines base types and functions for building dataflow analyses +// that run over Control-Flow Graphs (CFGs). +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSIS_H +#define LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSIS_H + +#include <iterator> +#include <utility> +#include <vector> + +#include "clang/AST/ASTContext.h" +#include "clang/AST/Stmt.h" +#include "clang/Analysis/CFG.h" +#include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" +#include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h" +#include "llvm/ADT/Any.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" + +namespace clang { +namespace dataflow { + +/// Base class template for dataflow analyses built on a single lattice type. +/// +/// Requirements: +/// +/// `Derived` must be derived from a specialization of this class template and +/// must provide the following public members: +/// * `LatticeT initialElement()` - returns a lattice element that models the +/// initial state of a basic block; +/// * `LatticeT transfer(const Stmt *, const LatticeT &, Environment &)` - +/// applies the analysis transfer function for a given statement and lattice +/// element. +/// +/// `LatticeT` is a bounded join-semilattice that is used by `Derived` and must +/// provide the following public members: +/// * `LatticeJoinEffect join(const LatticeT &)` - joins the object and the +/// argument by computing their least upper bound, modifies the object if +/// necessary, and returns an effect indicating whether any changes were +/// made to it; +/// * `bool operator==(const LatticeT &) const` - returns true if and only if +/// the object is equal to the argument. +template <typename Derived, typename LatticeT> +class DataflowAnalysis : public TypeErasedDataflowAnalysis { +public: + /// Bounded join-semilattice that is used in the analysis. + using Lattice = LatticeT; + + explicit DataflowAnalysis(ASTContext &Context) : Context(Context) {} + + ASTContext &getASTContext() final { return Context; } + + TypeErasedLattice typeErasedInitialElement() final { + return {static_cast<Derived *>(this)->initialElement()}; + } + + LatticeJoinEffect joinTypeErased(TypeErasedLattice &E1, + const TypeErasedLattice &E2) final { + Lattice &L1 = llvm::any_cast<Lattice &>(E1.Value); + const Lattice &L2 = llvm::any_cast<const Lattice &>(E2.Value); + return L1.join(L2); + } + + bool isEqualTypeErased(const TypeErasedLattice &E1, + const TypeErasedLattice &E2) final { + const Lattice &L1 = llvm::any_cast<const Lattice &>(E1.Value); + const Lattice &L2 = llvm::any_cast<const Lattice &>(E2.Value); + return L1 == L2; + } + + TypeErasedLattice transferTypeErased(const Stmt *Stmt, + const TypeErasedLattice &E, + Environment &Env) final { + const Lattice &L = llvm::any_cast<const Lattice &>(E.Value); + return {static_cast<Derived *>(this)->transfer(Stmt, L, Env)}; + } + +private: + ASTContext &Context; +}; + +// Model of the program at a given program point. +template <typename LatticeT> struct DataflowAnalysisState { + // Model of a program property. + LatticeT Lattice; + + // Model of the state of the program (store and heap). + Environment Env; +}; + +/// Performs dataflow analysis and returns a mapping from basic block IDs to +/// dataflow analysis states that model the respective basic blocks. Indices +/// of the returned vector correspond to basic block IDs. +/// +/// Requirements: +/// +/// `Cfg` must have been built with `CFG::BuildOptions::setAllAlwaysAdd()` to +/// ensure that all sub-expressions in a basic block are evaluated. +template <typename AnalysisT> +std::vector<llvm::Optional<DataflowAnalysisState<typename AnalysisT::Lattice>>> +runDataflowAnalysis(const CFG &Cfg, AnalysisT &Analysis, + const Environment &InitEnv) { + auto TypeErasedBlockStates = + runTypeErasedDataflowAnalysis(Cfg, Analysis, InitEnv); + std::vector< + llvm::Optional<DataflowAnalysisState<typename AnalysisT::Lattice>>> + BlockStates; + BlockStates.reserve(TypeErasedBlockStates.size()); + llvm::transform(std::move(TypeErasedBlockStates), + std::back_inserter(BlockStates), [](auto &OptState) { + return std::move(OptState).map([](auto &&State) { + return DataflowAnalysisState<typename AnalysisT::Lattice>{ + llvm::any_cast<typename AnalysisT::Lattice>( + std::move(State.Lattice.Value)), + std::move(State.Env)}; + }); + }); + return BlockStates; +} + +} // namespace dataflow +} // namespace clang + +#endif // LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSIS_H diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h new file mode 100644 index 000000000000..69a5c2e47b66 --- /dev/null +++ b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h @@ -0,0 +1,27 @@ +//===-- DataflowEnvironment.h -----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines an Environment class that is used by dataflow analyses +// that run over Control-Flow Graphs (CFGs) to keep track of the state of the +// program at given program points. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWENVIRONMENT_H +#define LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWENVIRONMENT_H + +namespace clang { +namespace dataflow { + +/// Holds the state of the program (store and heap) at a given program point. +class Environment {}; + +} // namespace dataflow +} // namespace clang + +#endif // LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWENVIRONMENT_H diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowLattice.h b/clang/include/clang/Analysis/FlowSensitive/DataflowLattice.h new file mode 100644 index 000000000000..37d2e0200410 --- /dev/null +++ b/clang/include/clang/Analysis/FlowSensitive/DataflowLattice.h @@ -0,0 +1,29 @@ +//===- DataflowLattice.h ----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines base types for building lattices to be used in dataflow +// analyses that run over Control-Flow Graphs (CFGs). +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWLATTICE_H +#define LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWLATTICE_H + +namespace clang { +namespace dataflow { + +/// Effect indicating whether a lattice join operation resulted in a new value. +enum class LatticeJoinEffect { + Unchanged, + Changed, +}; + +} // namespace dataflow +} // namespace clang + +#endif // LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWLATTICE_H diff --git a/clang/include/clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h b/clang/include/clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h new file mode 100644 index 000000000000..9448b911f471 --- /dev/null +++ b/clang/include/clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h @@ -0,0 +1,95 @@ +//===- TypeErasedDataflowAnalysis.h -----------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines type-erased base types and functions for building dataflow +// analyses that run over Control-Flow Graphs (CFGs). +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_TYPEERASEDDATAFLOWANALYSIS_H +#define LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_TYPEERASEDDATAFLOWANALYSIS_H + +#include <vector> + +#include "clang/AST/ASTContext.h" +#include "clang/AST/Stmt.h" +#include "clang/Analysis/CFG.h" +#include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" +#include "clang/Analysis/FlowSensitive/DataflowLattice.h" +#include "llvm/ADT/Any.h" +#include "llvm/ADT/Optional.h" + +namespace clang { +namespace dataflow { + +/// Type-erased lattice element container. +/// +/// Requirements: +/// +/// The type of the object stored in the container must be a bounded +/// join-semilattice. +struct TypeErasedLattice { + llvm::Any Value; +}; + +/// Type-erased base class for dataflow analyses built on a single lattice type. +class TypeErasedDataflowAnalysis { +public: + virtual ~TypeErasedDataflowAnalysis() {} + + /// Returns the `ASTContext` that is used by the analysis. + virtual ASTContext &getASTContext() = 0; + + /// Returns a type-erased lattice element that models the initial state of a + /// basic block. + virtual TypeErasedLattice typeErasedInitialElement() = 0; + + /// Joins two type-erased lattice elements by computing their least upper + /// bound. Places the join result in the left element and returns an effect + /// indicating whether any changes were made to it. + virtual LatticeJoinEffect joinTypeErased(TypeErasedLattice &, + const TypeErasedLattice &) = 0; + + /// Returns true if and only if the two given type-erased lattice elements are + /// equal. + virtual bool isEqualTypeErased(const TypeErasedLattice &, + const TypeErasedLattice &) = 0; + + /// Applies the analysis transfer function for a given statement and + /// type-erased lattice element. + virtual TypeErasedLattice transferTypeErased(const Stmt *, + const TypeErasedLattice &, + Environment &) = 0; +}; + +/// Type-erased model of the program at a given program point. +struct TypeErasedDataflowAnalysisState { + /// Type-erased model of a program property. + TypeErasedLattice Lattice; + + /// Model of the state of the program (store and heap). + Environment Env; +}; + +/// Performs dataflow analysis and returns a mapping from basic block IDs to +/// dataflow analysis states that model the respective basic blocks. Indices +/// of the returned vector correspond to basic block IDs. +/// +/// Requirements: +/// +/// `Cfg` must have been built with `CFG::BuildOptions::setAllAlwaysAdd()` to +/// ensure that all sub-expressions in a basic block are evaluated. +std::vector<llvm::Optional<TypeErasedDataflowAnalysisState>> +runTypeErasedDataflowAnalysis(const CFG &Cfg, + TypeErasedDataflowAnalysis &Analysis, + const Environment &InitEnv); + +} // namespace dataflow +} // namespace clang + +#endif // LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_TYPEERASEDDATAFLOWANALYSIS_H diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 39588d94cf09..fab3f3edfb83 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -2677,6 +2677,40 @@ def Target : InheritableAttr { }]; } +def TargetClones : InheritableAttr { + let Spellings = [GCC<"target_clones">]; + let Args = [VariadicStringArgument<"featuresStrs">]; + let Documentation = [TargetClonesDocs]; + let Subjects = SubjectList<[Function], ErrorDiag>; + let AdditionalMembers = [{ + StringRef getFeatureStr(unsigned Index) const { + return *(featuresStrs_begin() + Index); + } + // 'default' is always moved to the end, so it isn't considered + // when mangling the index. + unsigned getMangledIndex(unsigned Index) const { + if (getFeatureStr(Index) == "default") + return std::count_if(featuresStrs_begin(), featuresStrs_end(), + [](StringRef S) { return S != "default"; }); + + return std::count_if(featuresStrs_begin(), featuresStrs_begin() + Index, + [](StringRef S) { return S != "default"; }); + } + + // True if this is the first of this version to appear in the config string. + // This is used to make sure we don't try to emit this function multiple + // times. + bool isFirstOfVersion(unsigned Index) const { + StringRef FeatureStr(getFeatureStr(Index)); + return 0 == std::count_if( + featuresStrs_begin(), featuresStrs_begin() + Index, + [FeatureStr](StringRef S) { return S == FeatureStr; }); + } + }]; +} + +def : MutualExclusions<[TargetClones, Target, CPUDispatch, CPUSpecific]>; + def MinVectorWidth : InheritableAttr { let Spellings = [Clang<"min_vector_width">]; let Args = [UnsignedArgument<"VectorWidth">]; diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index e7afb3699eb1..10cce4c2d689 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -2233,6 +2233,40 @@ Additionally, a function may not become multiversioned after its first use. }]; } +def TargetClonesDocs : Documentation { + let Category = DocCatFunction; + let Content = [{ +Clang supports the ``target_clones("OPTIONS")`` attribute. This attribute may be +attached to a function declaration and causes function multiversioning, where +multiple versions of the function will be emitted with different code +generation options. Additionally, these versions will be resolved at runtime +based on the priority of their attribute options. All ``target_clone`` functions +are considered multiversioned functions. + +All multiversioned functions must contain a ``default`` (fallback) +implementation, otherwise usages of the function are considered invalid. +Additionally, a function may not become multiversioned after its first use. + +The options to ``target_clones`` can either be a target-specific architecture +(specified as ``arch=CPU``), or one of a list of subtarget features. + +Example "subtarget features" from the x86 backend include: "mmx", "sse", "sse4.2", +"avx", "xop" and largely correspond to the machine specific options handled by +the front end. + +The versions can either be listed as a comma-separated sequence of string +literals or as a single string literal containing a comma-separated list of +versions. For compatibility with GCC, the two formats can be mixed. For +example, the following will emit 4 versions of the function: + + .. code-block:: c++ + + __attribute__((target_clones("arch=atom,avx2","arch=ivybridge","default"))) + void foo() {} + +}]; +} + def MinVectorWidthDocs : Documentation { let Category = DocCatFunction; let Content = [{ diff --git a/clang/include/clang/Basic/Builtins.def b/clang/include/clang/Basic/Builtins.def index b05777889e79..ad8b66aa490b 100644 --- a/clang/include/clang/Basic/Builtins.def +++ b/clang/include/clang/Basic/Builtins.def @@ -854,6 +854,19 @@ ATOMIC_BUILTIN(__opencl_atomic_fetch_max, "v.", "t") ATOMIC_BUILTIN(__atomic_fetch_min, "v.", "t") ATOMIC_BUILTIN(__atomic_fetch_max, "v.", "t") +// HIP atomic builtins. +ATOMIC_BUILTIN(__hip_atomic_load, "v.", "t") +ATOMIC_BUILTIN(__hip_atomic_store, "v.", "t") +ATOMIC_BUILTIN(__hip_atomic_compare_exchange_weak, "v.", "t") +ATOMIC_BUILTIN(__hip_atomic_compare_exchange_strong, "v.", "t") +ATOMIC_BUILTIN(__hip_atomic_exchange, "v.", "t") +ATOMIC_BUILTIN(__hip_atomic_fetch_add, "v.", "t") +ATOMIC_BUILTIN(__hip_atomic_fetch_and, "v.", "t") +ATOMIC_BUILTIN(__hip_atomic_fetch_or, "v.", "t") +ATOMIC_BUILTIN(__hip_atomic_fetch_xor, "v.", "t") +ATOMIC_BUILTIN(__hip_atomic_fetch_min, "v.", "t") +ATOMIC_BUILTIN(__hip_atomic_fetch_max, "v.", "t") + #undef ATOMIC_BUILTIN // Non-overloaded atomic builtins. diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def index cd6b2df10e52..70b0184f199f 100644 --- a/clang/include/clang/Basic/BuiltinsPPC.def +++ b/clang/include/clang/Basic/BuiltinsPPC.def @@ -404,6 +404,7 @@ BUILTIN(__builtin_altivec_vbpermd, "V2ULLiV2ULLiV16Uc", "") // P8 Crypto built-ins. BUILTIN(__builtin_altivec_crypto_vsbox, "V2ULLiV2ULLi", "") BUILTIN(__builtin_altivec_crypto_vpermxor, "V16UcV16UcV16UcV16Uc", "") +BUILTIN(__builtin_altivec_crypto_vpermxor_be, "V16UcV16UcV16UcV16Uc", "") BUILTIN(__builtin_altivec_crypto_vshasigmaw, "V4UiV4UiIiIi", "") BUILTIN(__builtin_altivec_crypto_vshasigmad, "V2ULLiV2ULLiIiIi", "") BUILTIN(__builtin_altivec_crypto_vcipher, "V2ULLiV2ULLiV2ULLi", "") @@ -424,6 +425,12 @@ BUILTIN(__builtin_altivec_vctzh, "V8UsV8Us", "") BUILTIN(__builtin_altivec_vctzw, "V4UiV4Ui", "") BUILTIN(__builtin_altivec_vctzd, "V2ULLiV2ULLi", "") +// P8 BCD builtins. +BUILTIN(__builtin_ppc_bcdadd, "V16UcV16UcV16UcIi", "") +BUILTIN(__builtin_ppc_bcdsub, "V16UcV16UcV16UcIi", "") +BUILTIN(__builtin_ppc_bcdadd_p, "iiV16UcV16Uc", "") +BUILTIN(__builtin_ppc_bcdsub_p, "iiV16UcV16Uc", "") + BUILTIN(__builtin_altivec_vclzlsbb, "SiV16Uc", "") BUILTIN(__builtin_altivec_vctzlsbb, "SiV16Uc", "") BUILTIN(__builtin_altivec_vprtybw, "V4UiV4Ui", "") diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index ff8c36910e13..2f50918b527b 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -189,6 +189,12 @@ def err_drv_invalid_mtp : Error< "invalid thread pointer reading mode '%0'">; def err_drv_missing_arg_mtp : Error< "missing argument to '%0'">; +def warn_drv_missing_plugin_name : Warning< + "missing plugin name in %0">, + InGroup<InvalidCommandLineArgument>; +def warn_drv_missing_plugin_arg : Warning< + "missing plugin argument for plugin %0 in %1">, + InGroup<InvalidCommandLineArgument>; def err_drv_invalid_libcxx_deployment : Error< "invalid deployment target for -stdlib=libc++ (requires %0 or later)">; def err_drv_invalid_argument_to_option : Error< @@ -394,6 +400,8 @@ def warn_ignoring_verify_debuginfo_preserve_export : Warning< InGroup<UnusedCommandLineArgument>; def err_invalid_branch_protection: Error < "invalid branch protection option '%0' in '%1'">; +def warn_unsupported_branch_protection: Warning < + "invalid branch protection option '%0' in '%1'">, InGroup<BranchProtection>; def err_invalid_sls_hardening : Error< "invalid sls hardening option '%0' in '%1'">; def err_sls_hardening_arm_not_supported : Error< diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td index 85d373845c81..90df3a424406 100644 --- a/clang/include/clang/Basic/DiagnosticGroups.td +++ b/clang/include/clang/Basic/DiagnosticGroups.td @@ -56,7 +56,9 @@ def CoroutineMissingUnhandledException : DiagGroup<"coroutine-missing-unhandled-exception">; def DeprecatedExperimentalCoroutine : DiagGroup<"deprecated-experimental-coroutine">; -def Coroutine : DiagGroup<"coroutine", [CoroutineMissingUnhandledException, DeprecatedExperimentalCoroutine]>; +def DeprecatedCoroutine : + DiagGroup<"deprecated-coroutine", [DeprecatedExperimentalCoroutine]>; +def Coroutine : DiagGroup<"coroutine", [CoroutineMissingUnhandledException, DeprecatedCoroutine]>; def ObjCBoolConstantConversion : DiagGroup<"objc-bool-constant-conversion">; def ConstantConversion : DiagGroup<"constant-conversion", [BitFieldConstantConversion, @@ -1273,9 +1275,14 @@ def : DiagGroup<"spirv-compat", [SpirCompat]>; // Alias. // Warning for the GlobalISel options. def GlobalISel : DiagGroup<"global-isel">; +// A warning group for the GNU extension to allow mixed specifier types for +// target-clones multiversioning. +def TargetClonesMixedSpecifiers : DiagGroup<"target-clones-mixed-specifiers">; + // A warning group specifically for warnings related to function // multiversioning. -def FunctionMultiVersioning : DiagGroup<"function-multiversion">; +def FunctionMultiVersioning + : DiagGroup<"function-multiversion", [TargetClonesMixedSpecifiers]>; def NoDeref : DiagGroup<"noderef">; @@ -1331,3 +1338,6 @@ def PedanticMacros : DiagGroup<"pedantic-macros", BuiltinMacroRedefined, RestrictExpansionMacro, FinalMacro]>; + +def BranchProtection : DiagGroup<"branch-protection">; + diff --git a/clang/include/clang/Basic/DiagnosticIDs.h b/clang/include/clang/Basic/DiagnosticIDs.h index aef86516707c..375930c14848 100644 --- a/clang/include/clang/Basic/DiagnosticIDs.h +++ b/clang/include/clang/Basic/DiagnosticIDs.h @@ -30,7 +30,7 @@ namespace clang { // Size of each of the diagnostic categories. enum { DIAG_SIZE_COMMON = 300, - DIAG_SIZE_DRIVER = 250, + DIAG_SIZE_DRIVER = 300, DIAG_SIZE_FRONTEND = 150, DIAG_SIZE_SERIALIZATION = 120, DIAG_SIZE_LEX = 400, diff --git a/clang/include/clang/Basic/DiagnosticParseKinds.td b/clang/include/clang/Basic/DiagnosticParseKinds.td index 1bc2e8b0c7ef..92e877074ad3 100644 --- a/clang/include/clang/Basic/DiagnosticParseKinds.td +++ b/clang/include/clang/Basic/DiagnosticParseKinds.td @@ -1548,6 +1548,9 @@ def note_meant_to_use_typename : Note< let CategoryName = "Coroutines Issue" in { def err_for_co_await_not_range_for : Error< "'co_await' modifier can only be applied to range-based for loop">; +def warn_deprecated_for_co_await : Warning< + "'for co_await' belongs to CoroutineTS instead of C++20, which is deprecated">, + InGroup<DeprecatedCoroutine>; } let CategoryName = "Concepts Issue" in { diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index dc67f86f25ca..fb5bd53f7432 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -2979,9 +2979,13 @@ def err_attribute_requires_opencl_version : Error< "attribute %0 is supported in the OpenCL version %1%select{| onwards}2">; def err_invalid_branch_protection_spec : Error< "invalid or misplaced branch protection specification '%0'">; +def warn_unsupported_branch_protection_spec : Warning< + "unsupported branch protection specification '%0'">, InGroup<BranchProtection>; + def warn_unsupported_target_attribute : Warning<"%select{unsupported|duplicate|unknown}0%select{| architecture|" - " tune CPU}1 '%2' in the 'target' attribute string; 'target' " + " tune CPU}1 '%2' in the '%select{target|target_clones}3' " + "attribute string; '%select{target|target_clones}3' " "attribute ignored">, InGroup<IgnoredAttributes>; def err_attribute_unsupported @@ -9864,6 +9868,8 @@ def warn_duplicate_attribute_exact : Warning< def warn_duplicate_attribute : Warning< "attribute %0 is already applied with different arguments">, InGroup<IgnoredAttributes>; +def err_disallowed_duplicate_attribute : Error< + "attribute %0 cannot appear more than once on a declaration">; def warn_sync_fetch_and_nand_semantics_change : Warning< "the semantics of this intrinsic changed with GCC " @@ -11254,9 +11260,11 @@ def err_multiversion_duplicate : Error< "multiversioned function redeclarations require identical target attributes">; def err_multiversion_noproto : Error< "multiversioned function must have a prototype">; -def err_multiversion_disallowed_other_attr : Error< - "attribute '%select{target|cpu_specific|cpu_dispatch}0' multiversioning cannot be combined" - " with attribute %1">; +def err_multiversion_disallowed_other_attr + : Error<"attribute " + "'%select{|target|cpu_specific|cpu_dispatch|target_clones}0' " + "multiversioning cannot be combined" + " with attribute %1">; def err_multiversion_mismatched_attrs : Error<"attributes on multiversioned functions must all match, attribute " "%0 %select{is missing|has different arguments}1">; @@ -11264,11 +11272,14 @@ def err_multiversion_diff : Error< "multiversioned function declaration has a different %select{calling convention" "|return type|constexpr specification|inline specification|linkage|" "language linkage}0">; -def err_multiversion_doesnt_support : Error< - "attribute '%select{target|cpu_specific|cpu_dispatch}0' multiversioned functions do not " - "yet support %select{function templates|virtual functions|" - "deduced return types|constructors|destructors|deleted functions|" - "defaulted functions|constexpr functions|consteval function}1">; +def err_multiversion_doesnt_support + : Error<"attribute " + "'%select{|target|cpu_specific|cpu_dispatch|target_clones}0' " + "multiversioned functions do not " + "yet support %select{function templates|virtual functions|" + "deduced return types|constructors|destructors|deleted functions|" + "defaulted functions|constexpr functions|consteval " + "function|lambdas}1">; def err_multiversion_not_allowed_on_main : Error< "'main' cannot be a multiversioned function">; def err_multiversion_not_supported : Error< @@ -11285,6 +11296,19 @@ def warn_multiversion_duplicate_entries : Warning< def warn_dispatch_body_ignored : Warning< "body of cpu_dispatch function will be ignored">, InGroup<FunctionMultiVersioning>; +def err_target_clone_must_have_default + : Error<"'target_clones' multiversioning requires a default target">; +def err_target_clone_doesnt_match + : Error<"'target_clones' attribute does not match previous declaration">; +def warn_target_clone_mixed_values + : ExtWarn< + "mixing 'target_clones' specifier mechanisms is permitted for GCC " + "compatibility; use a comma separated sequence of string literals, " + "or a string literal containing a comma-separated list of versions">, + InGroup<TargetClonesMixedSpecifiers>; +def warn_target_clone_duplicate_options + : Warning<"version list contains duplicate entries">, + InGroup<FunctionMultiVersioning>; // three-way comparison operator diagnostics def err_implied_comparison_category_type_not_found : Error< diff --git a/clang/include/clang/Basic/SyncScope.h b/clang/include/clang/Basic/SyncScope.h index ce8fb9cbed13..34703310af2b 100644 --- a/clang/include/clang/Basic/SyncScope.h +++ b/clang/include/clang/Basic/SyncScope.h @@ -40,6 +40,11 @@ namespace clang { /// Update getAsString. /// enum class SyncScope { + HIPSingleThread, + HIPWavefront, + HIPWorkgroup, + HIPAgent, + HIPSystem, OpenCLWorkGroup, OpenCLDevice, OpenCLAllSVMDevices, @@ -49,6 +54,16 @@ enum class SyncScope { inline llvm::StringRef getAsString(SyncScope S) { switch (S) { + case SyncScope::HIPSingleThread: + return "hip_singlethread"; + case SyncScope::HIPWavefront: + return "hip_wavefront"; + case SyncScope::HIPWorkgroup: + return "hip_workgroup"; + case SyncScope::HIPAgent: + return "hip_agent"; + case SyncScope::HIPSystem: + return "hip_system"; case SyncScope::OpenCLWorkGroup: return "opencl_workgroup"; case SyncScope::OpenCLDevice: @@ -62,7 +77,7 @@ inline llvm::StringRef getAsString(SyncScope S) { } /// Defines the kind of atomic scope models. -enum class AtomicScopeModelKind { None, OpenCL }; +enum class AtomicScopeModelKind { None, OpenCL, HIP }; /// Defines the interface for synch scope model. class AtomicScopeModel { @@ -138,6 +153,58 @@ public: } }; +/// Defines the synch scope model for HIP. +class AtomicScopeHIPModel : public AtomicScopeModel { +public: + /// The enum values match the pre-defined macros + /// __HIP_MEMORY_SCOPE_*, which are used to define memory_scope_* + /// enums in hip-c.h. + enum ID { + SingleThread = 1, + Wavefront = 2, + Workgroup = 3, + Agent = 4, + System = 5, + Last = System + }; + + AtomicScopeHIPModel() {} + + SyncScope map(unsigned S) const override { + switch (static_cast<ID>(S)) { + case SingleThread: + return SyncScope::HIPSingleThread; + case Wavefront: + return SyncScope::HIPWavefront; + case Workgroup: + return SyncScope::HIPWorkgroup; + case Agent: + return SyncScope::HIPAgent; + case System: + return SyncScope::HIPSystem; + } + llvm_unreachable("Invalid language synch scope value"); + } + + bool isValid(unsigned S) const override { + return S >= static_cast<unsigned>(SingleThread) && + S <= static_cast<unsigned>(Last); + } + + ArrayRef<unsigned> getRuntimeValues() const override { + static_assert(Last == System, "Does not include all synch scopes"); + static const unsigned Scopes[] = { + static_cast<unsigned>(SingleThread), static_cast<unsigned>(Wavefront), + static_cast<unsigned>(Workgroup), static_cast<unsigned>(Agent), + static_cast<unsigned>(System)}; + return llvm::makeArrayRef(Scopes); + } + + unsigned getFallBackValue() const override { + return static_cast<unsigned>(System); + } +}; + inline std::unique_ptr<AtomicScopeModel> AtomicScopeModel::create(AtomicScopeModelKind K) { switch (K) { @@ -145,9 +212,11 @@ AtomicScopeModel::create(AtomicScopeModelKind K) { return std::unique_ptr<AtomicScopeModel>{}; case AtomicScopeModelKind::OpenCL: return std::make_unique<AtomicScopeOpenCLModel>(); + case AtomicScopeModelKind::HIP: + return std::make_unique<AtomicScopeHIPModel>(); } llvm_unreachable("Invalid atomic scope model kind"); } -} +} // namespace clang #endif diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 9bde64cf49fd..4e6dd2050344 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -2504,6 +2504,9 @@ defm rwpi : BoolFOption<"rwpi", NegFlag<SetFalse>>; def fplugin_EQ : Joined<["-"], "fplugin=">, Group<f_Group>, Flags<[NoXarchOption]>, MetaVarName<"<dsopath>">, HelpText<"Load the named plugin (dynamic shared object)">; +def fplugin_arg : Joined<["-"], "fplugin-arg-">, + MetaVarName<"<name>-<arg>">, + HelpText<"Pass <arg> to plugin <name>">; def fpass_plugin_EQ : Joined<["-"], "fpass-plugin=">, Group<f_Group>, Flags<[CC1Option]>, MetaVarName<"<dsopath>">, HelpText<"Load pass plugin from a dynamic shared object file (only with new pass manager).">, @@ -2786,10 +2789,11 @@ def fvisibility_ms_compat : Flag<["-"], "fvisibility-ms-compat">, Group<f_Group> def fvisibility_global_new_delete_hidden : Flag<["-"], "fvisibility-global-new-delete-hidden">, Group<f_Group>, HelpText<"Give global C++ operator new and delete declarations hidden visibility">, Flags<[CC1Option]>, MarshallingInfoFlag<LangOpts<"GlobalAllocationFunctionVisibilityHidden">>; -def fnew_infallible : Flag<["-"], "fnew-infallible">, Group<f_Group>, - HelpText<"Treats throwing global C++ operator new as always returning valid memory " - "(annotates with __attribute__((returns_nonnull)) and throw()). This is detectable in source.">, - Flags<[CC1Option]>, MarshallingInfoFlag<LangOpts<"NewInfallible">>; +defm new_infallible : BoolFOption<"new-infallible", + LangOpts<"NewInfallible">, DefaultFalse, + PosFlag<SetTrue, [], "Enable">, NegFlag<SetFalse, [], "Disable">, + BothFlags<[CC1Option], " treating throwing global C++ operator new as always returning valid memory " + "(annotates with __attribute__((returns_nonnull)) and throw()). This is detectable in source.">>; defm whole_program_vtables : BoolFOption<"whole-program-vtables", CodeGenOpts<"WholeProgramVTables">, DefaultFalse, PosFlag<SetTrue, [CC1Option], "Enables whole-program vtable optimization. Requires -flto">, @@ -4519,7 +4523,7 @@ def frecord_marker_EQ : Joined<["-"], "frecord-marker=">, Group<gfortran_Group>; defm aggressive_function_elimination : BooleanFFlag<"aggressive-function-elimination">, Group<gfortran_Group>; defm align_commons : BooleanFFlag<"align-commons">, Group<gfortran_Group>; defm all_intrinsics : BooleanFFlag<"all-intrinsics">, Group<gfortran_Group>; -defm automatic : BooleanFFlag<"automatic">, Group<gfortran_Group>; +def fautomatic : Flag<["-"], "fautomatic">; // -fno-automatic is significant defm backtrace : BooleanFFlag<"backtrace">, Group<gfortran_Group>; defm bounds_check : BooleanFFlag<"bounds-check">, Group<gfortran_Group>; defm check_array_temporaries : BooleanFFlag<"check-array-temporaries">, Group<gfortran_Group>; @@ -4616,6 +4620,9 @@ defm backslash : OptInFC1FFlag<"backslash", "Specify that backslash in string in defm xor_operator : OptInFC1FFlag<"xor-operator", "Enable .XOR. as a synonym of .NEQV.">; defm logical_abbreviations : OptInFC1FFlag<"logical-abbreviations", "Enable logical abbreviations">; defm implicit_none : OptInFC1FFlag<"implicit-none", "No implicit typing allowed unless overridden by IMPLICIT statements">; + +def fno_automatic : Flag<["-"], "fno-automatic">, Group<f_Group>, + HelpText<"Implies the SAVE attribute for non-automatic local objects in subprograms unless RECURSIVE">; } def J : JoinedOrSeparate<["-"], "J">, @@ -5059,9 +5066,10 @@ def msmall_data_limit : Separate<["-"], "msmall-data-limit">, def funwind_tables_EQ : Joined<["-"], "funwind-tables=">, HelpText<"Generate unwinding tables for all functions">, MarshallingInfoInt<CodeGenOpts<"UnwindTables">>; -def mconstructor_aliases : Flag<["-"], "mconstructor-aliases">, - HelpText<"Emit complete constructors and destructors as aliases when possible">, - MarshallingInfoFlag<CodeGenOpts<"CXXCtorDtorAliases">>; +defm constructor_aliases : BoolOption<"m", "constructor-aliases", + CodeGenOpts<"CXXCtorDtorAliases">, DefaultFalse, + PosFlag<SetTrue, [], "Enable">, NegFlag<SetFalse, [], "Disable">, + BothFlags<[CC1Option], " emitting complete constructors and destructors as aliases when possible">>; def mlink_bitcode_file : Separate<["-"], "mlink-bitcode-file">, HelpText<"Link the given bitcode file before performing optimizations.">; def mlink_builtin_bitcode : Separate<["-"], "mlink-builtin-bitcode">, @@ -5174,10 +5182,6 @@ defm debug_pass_manager : BoolOption<"f", "debug-pass-manager", CodeGenOpts<"DebugPassManager">, DefaultFalse, PosFlag<SetTrue, [], "Prints debug information for the new pass manager">, NegFlag<SetFalse, [], "Disables debug printing for the new pass manager">>; -def fexperimental_debug_variable_locations : Flag<["-"], - "fexperimental-debug-variable-locations">, - HelpText<"Use experimental new value-tracking variable locations">, - MarshallingInfoFlag<CodeGenOpts<"ValueTrackingVariableLocations">>; def fverify_debuginfo_preserve : Flag<["-"], "fverify-debuginfo-preserve">, HelpText<"Enable Debug Info Metadata preservation testing in " diff --git a/clang/include/clang/Frontend/PrecompiledPreamble.h b/clang/include/clang/Frontend/PrecompiledPreamble.h index bb7fd97fe5df..dacbffef0b12 100644 --- a/clang/include/clang/Frontend/PrecompiledPreamble.h +++ b/clang/include/clang/Frontend/PrecompiledPreamble.h @@ -274,7 +274,7 @@ class PreambleCallbacks { public: virtual ~PreambleCallbacks() = default; - /// Called before FrontendAction::BeginSourceFile. + /// Called before FrontendAction::Execute. /// Can be used to store references to various CompilerInstance fields /// (e.g. SourceManager) that may be interesting to the consumers of other /// callbacks. @@ -291,7 +291,7 @@ public: /// used instead, but having only this method allows a simpler API. virtual void HandleTopLevelDecl(DeclGroupRef DG); /// Creates wrapper class for PPCallbacks so we can also process information - /// about includes that are inside of a preamble + /// about includes that are inside of a preamble. Called after BeforeExecute. virtual std::unique_ptr<PPCallbacks> createPPCallbacks(); /// The returned CommentHandler will be added to the preprocessor if not null. virtual CommentHandler *getCommentHandler(); diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 43ce5d983217..1a82a9498d1d 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -1296,6 +1296,11 @@ public: EK_Decltype, EK_TemplateArgument, EK_Other } ExprContext; + // A context can be nested in both a discarded statement context and + // an immediate function context, so they need to be tracked independently. + bool InDiscardedStatement; + bool InImmediateFunctionContext; + ExpressionEvaluationContextRecord(ExpressionEvaluationContext Context, unsigned NumCleanupObjects, CleanupInfo ParentCleanup, @@ -1303,7 +1308,8 @@ public: ExpressionKind ExprContext) : Context(Context), ParentCleanup(ParentCleanup), NumCleanupObjects(NumCleanupObjects), NumTypos(0), - ManglingContextDecl(ManglingContextDecl), ExprContext(ExprContext) {} + ManglingContextDecl(ManglingContextDecl), ExprContext(ExprContext), + InDiscardedStatement(false), InImmediateFunctionContext(false) {} bool isUnevaluated() const { return Context == ExpressionEvaluationContext::Unevaluated || @@ -1317,7 +1323,13 @@ public: } bool isImmediateFunctionContext() const { - return Context == ExpressionEvaluationContext::ImmediateFunctionContext; + return Context == ExpressionEvaluationContext::ImmediateFunctionContext || + InImmediateFunctionContext; + } + + bool isDiscardedStatementContext() const { + return Context == ExpressionEvaluationContext::DiscardedStatement || + InDiscardedStatement; } }; @@ -4351,6 +4363,10 @@ public: llvm::Error isValidSectionSpecifier(StringRef Str); bool checkSectionName(SourceLocation LiteralLoc, StringRef Str); bool checkTargetAttr(SourceLocation LiteralLoc, StringRef Str); + bool checkTargetClonesAttrString(SourceLocation LiteralLoc, StringRef Str, + const StringLiteral *Literal, + bool &HasDefault, bool &HasCommas, + SmallVectorImpl<StringRef> &Strings); bool checkMSInheritanceAttrOnDefinition( CXXRecordDecl *RD, SourceRange Range, bool BestCase, MSInheritanceModel SemanticSpelling); @@ -9150,14 +9166,7 @@ public: bool isImmediateFunctionContext() const { assert(!ExprEvalContexts.empty() && "Must be in an expression evaluation context"); - for (const ExpressionEvaluationContextRecord &context : - llvm::reverse(ExprEvalContexts)) { - if (context.isImmediateFunctionContext()) - return true; - if (context.isUnevaluated()) - return false; - } - return false; + return ExprEvalContexts.back().isImmediateFunctionContext(); } /// RAII class used to determine whether SFINAE has diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h index c52da3305f7c..af02fa2e7e87 100644 --- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h +++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h @@ -195,15 +195,23 @@ public: llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> openFileForRead(const Twine &Path) override; - void clearIgnoredFiles() { IgnoredFiles.clear(); } - void ignoreFile(StringRef Filename); + /// Disable minimization of the given file. + void disableMinimization(StringRef Filename); + /// Enable minimization of all files. + void enableMinimizationOfAllFiles() { NotToBeMinimized.clear(); } private: - bool shouldIgnoreFile(StringRef Filename); + /// Check whether the file should be minimized. + bool shouldMinimize(StringRef Filename); llvm::ErrorOr<const CachedFileSystemEntry *> getOrCreateFileSystemEntry(const StringRef Filename); + /// Create a cached file system entry based on the initial status result. + CachedFileSystemEntry + createFileSystemEntry(llvm::ErrorOr<llvm::vfs::Status> &&MaybeStatus, + StringRef Filename, bool ShouldMinimize); + /// The global cache shared between worker threads. DependencyScanningFilesystemSharedCache &SharedCache; /// The local cache is used by the worker thread to cache file system queries @@ -214,7 +222,7 @@ private: /// currently active preprocessor. ExcludedPreprocessorDirectiveSkipMapping *PPSkipMappings; /// The set of files that should not be minimized. - llvm::StringSet<> IgnoredFiles; + llvm::StringSet<> NotToBeMinimized; }; } // end namespace dependencies diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index 294cc20f76c5..2d85d72e5b8a 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -11800,6 +11800,15 @@ void ASTContext::getFunctionFeatureMap(llvm::StringMap<bool> &FeatureMap, Target->getTargetOpts().FeaturesAsWritten.begin(), Target->getTargetOpts().FeaturesAsWritten.end()); Target->initFeatureMap(FeatureMap, getDiagnostics(), TargetCPU, Features); + } else if (const auto *TC = FD->getAttr<TargetClonesAttr>()) { + std::vector<std::string> Features; + StringRef VersionStr = TC->getFeatureStr(GD.getMultiVersionIndex()); + if (VersionStr.startswith("arch=")) + TargetCPU = VersionStr.drop_front(sizeof("arch=") - 1); + else if (VersionStr != "default") + Features.push_back((StringRef{"+"} + VersionStr).str()); + + Target->initFeatureMap(FeatureMap, getDiagnostics(), TargetCPU, Features); } else { FeatureMap = Target->getTargetOpts().FeatureMap; } diff --git a/clang/lib/AST/ASTStructuralEquivalence.cpp b/clang/lib/AST/ASTStructuralEquivalence.cpp index e85feb779190..7fd24e2aa9ad 100644 --- a/clang/lib/AST/ASTStructuralEquivalence.cpp +++ b/clang/lib/AST/ASTStructuralEquivalence.cpp @@ -1347,6 +1347,42 @@ IsStructurallyEquivalentLambdas(StructuralEquivalenceContext &Context, return true; } +/// Determine if context of a class is equivalent. +static bool IsRecordContextStructurallyEquivalent(RecordDecl *D1, + RecordDecl *D2) { + // The context should be completely equal, including anonymous and inline + // namespaces. + // We compare objects as part of full translation units, not subtrees of + // translation units. + DeclContext *DC1 = D1->getDeclContext()->getNonTransparentContext(); + DeclContext *DC2 = D2->getDeclContext()->getNonTransparentContext(); + while (true) { + // Special case: We allow a struct defined in a function to be equivalent + // with a similar struct defined outside of a function. + if ((DC1->isFunctionOrMethod() && DC2->isTranslationUnit()) || + (DC2->isFunctionOrMethod() && DC1->isTranslationUnit())) + return true; + + if (DC1->getDeclKind() != DC2->getDeclKind()) + return false; + if (DC1->isTranslationUnit()) + break; + if (DC1->isInlineNamespace() != DC2->isInlineNamespace()) + return false; + if (const auto *ND1 = dyn_cast<NamedDecl>(DC1)) { + const auto *ND2 = cast<NamedDecl>(DC2); + if (!DC1->isInlineNamespace() && + !IsStructurallyEquivalent(ND1->getIdentifier(), ND2->getIdentifier())) + return false; + } + + DC1 = DC1->getParent()->getNonTransparentContext(); + DC2 = DC2->getParent()->getNonTransparentContext(); + } + + return true; +} + /// Determine structural equivalence of two records. static bool IsStructurallyEquivalent(StructuralEquivalenceContext &Context, RecordDecl *D1, RecordDecl *D2) { @@ -1386,6 +1422,12 @@ static bool IsStructurallyEquivalent(StructuralEquivalenceContext &Context, } } + // If the records occur in different context (namespace), these should be + // different. This is specially important if the definition of one or both + // records is missing. + if (!IsRecordContextStructurallyEquivalent(D1, D2)) + return false; + // If both declarations are class template specializations, we know // the ODR applies, so check the template and template arguments. const auto *Spec1 = dyn_cast<ClassTemplateSpecializationDecl>(D1); diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp index 5ea091edcf4c..68dfef248f65 100644 --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -3271,6 +3271,8 @@ MultiVersionKind FunctionDecl::getMultiVersionKind() const { return MultiVersionKind::CPUDispatch; if (hasAttr<CPUSpecificAttr>()) return MultiVersionKind::CPUSpecific; + if (hasAttr<TargetClonesAttr>()) + return MultiVersionKind::TargetClones; return MultiVersionKind::None; } @@ -3286,6 +3288,10 @@ bool FunctionDecl::isTargetMultiVersion() const { return isMultiVersion() && hasAttr<TargetAttr>(); } +bool FunctionDecl::isTargetClonesMultiVersion() const { + return isMultiVersion() && hasAttr<TargetClonesAttr>(); +} + void FunctionDecl::setPreviousDeclaration(FunctionDecl *PrevDecl) { redeclarable_base::setPreviousDecl(PrevDecl); diff --git a/clang/lib/AST/DeclBase.cpp b/clang/lib/AST/DeclBase.cpp index 4044404f74ef..064012ba865c 100644 --- a/clang/lib/AST/DeclBase.cpp +++ b/clang/lib/AST/DeclBase.cpp @@ -964,7 +964,7 @@ SourceLocation Decl::getBodyRBrace() const { return {}; } -bool Decl::AccessDeclContextSanity() const { +bool Decl::AccessDeclContextCheck() const { #ifndef NDEBUG // Suppress this check if any of the following hold: // 1. this is the translation unit (and thus has no parent) @@ -1212,7 +1212,7 @@ bool DeclContext::Encloses(const DeclContext *DC) const { return getPrimaryContext()->Encloses(DC); for (; DC; DC = DC->getParent()) - if (DC->getPrimaryContext() == this) + if (!isa<LinkageSpecDecl>(DC) && DC->getPrimaryContext() == this) return true; return false; } diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp index 7bd3dce43f4d..d3cb2ff3734c 100644 --- a/clang/lib/AST/Expr.cpp +++ b/clang/lib/AST/Expr.cpp @@ -4681,6 +4681,7 @@ unsigned AtomicExpr::getNumSubExprs(AtomicOp Op) { return 2; case AO__opencl_atomic_load: + case AO__hip_atomic_load: case AO__c11_atomic_store: case AO__c11_atomic_exchange: case AO__atomic_load: @@ -4713,7 +4714,15 @@ unsigned AtomicExpr::getNumSubExprs(AtomicOp Op) { case AO__atomic_fetch_max: return 3; + case AO__hip_atomic_exchange: + case AO__hip_atomic_fetch_add: + case AO__hip_atomic_fetch_and: + case AO__hip_atomic_fetch_or: + case AO__hip_atomic_fetch_xor: + case AO__hip_atomic_fetch_min: + case AO__hip_atomic_fetch_max: case AO__opencl_atomic_store: + case AO__hip_atomic_store: case AO__opencl_atomic_exchange: case AO__opencl_atomic_fetch_add: case AO__opencl_atomic_fetch_sub: @@ -4728,9 +4737,10 @@ unsigned AtomicExpr::getNumSubExprs(AtomicOp Op) { case AO__c11_atomic_compare_exchange_strong: case AO__c11_atomic_compare_exchange_weak: return 5; - + case AO__hip_atomic_compare_exchange_strong: case AO__opencl_atomic_compare_exchange_strong: case AO__opencl_atomic_compare_exchange_weak: + case AO__hip_atomic_compare_exchange_weak: case AO__atomic_compare_exchange: case AO__atomic_compare_exchange_n: return 6; diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index fe96db9ca918..99babd58b027 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -7483,7 +7483,7 @@ public: const Expr *Source = E->getSourceExpr(); if (!Source) return Error(E); - if (Source == E) { // sanity checking. + if (Source == E) { assert(0 && "OpaqueValueExpr recursively refers to itself"); return Error(E); } diff --git a/clang/lib/AST/MicrosoftMangle.cpp b/clang/lib/AST/MicrosoftMangle.cpp index 163d4e95386e..79a448a2435c 100644 --- a/clang/lib/AST/MicrosoftMangle.cpp +++ b/clang/lib/AST/MicrosoftMangle.cpp @@ -21,6 +21,7 @@ #include "clang/AST/DeclTemplate.h" #include "clang/AST/Expr.h" #include "clang/AST/ExprCXX.h" +#include "clang/AST/GlobalDecl.h" #include "clang/AST/Mangle.h" #include "clang/AST/VTableBuilder.h" #include "clang/Basic/ABI.h" @@ -39,6 +40,18 @@ using namespace clang; namespace { +// Get GlobalDecl of DeclContext of local entities. +static GlobalDecl getGlobalDeclAsDeclContext(const DeclContext *DC) { + GlobalDecl GD; + if (auto *CD = dyn_cast<CXXConstructorDecl>(DC)) + GD = GlobalDecl(CD, Ctor_Complete); + else if (auto *DD = dyn_cast<CXXDestructorDecl>(DC)) + GD = GlobalDecl(DD, Dtor_Complete); + else + GD = GlobalDecl(cast<FunctionDecl>(DC)); + return GD; +} + struct msvc_hashing_ostream : public llvm::raw_svector_ostream { raw_ostream &OS; llvm::SmallString<64> Buffer; @@ -345,9 +358,9 @@ public: raw_ostream &getStream() const { return Out; } - void mangle(const NamedDecl *D, StringRef Prefix = "?"); - void mangleName(const NamedDecl *ND); - void mangleFunctionEncoding(const FunctionDecl *FD, bool ShouldMangle); + void mangle(GlobalDecl GD, StringRef Prefix = "?"); + void mangleName(GlobalDecl GD); + void mangleFunctionEncoding(GlobalDecl GD, bool ShouldMangle); void mangleVariableEncoding(const VarDecl *VD); void mangleMemberDataPointer(const CXXRecordDecl *RD, const ValueDecl *VD, StringRef Prefix = "$"); @@ -370,7 +383,7 @@ public: const FunctionDecl *D = nullptr, bool ForceThisQuals = false, bool MangleExceptionSpec = true); - void mangleNestedName(const NamedDecl *ND); + void mangleNestedName(GlobalDecl GD); private: bool isStructorDecl(const NamedDecl *ND) const { @@ -384,10 +397,10 @@ private: AddrSpace == LangAS::ptr32_uptr)); } - void mangleUnqualifiedName(const NamedDecl *ND) { - mangleUnqualifiedName(ND, ND->getDeclName()); + void mangleUnqualifiedName(GlobalDecl GD) { + mangleUnqualifiedName(GD, cast<NamedDecl>(GD.getDecl())->getDeclName()); } - void mangleUnqualifiedName(const NamedDecl *ND, DeclarationName Name); + void mangleUnqualifiedName(GlobalDecl GD, DeclarationName Name); void mangleSourceName(StringRef Name); void mangleOperatorName(OverloadedOperatorKind OO, SourceLocation Loc); void mangleCXXDtorType(CXXDtorType T); @@ -396,9 +409,9 @@ private: void manglePointerCVQualifiers(Qualifiers Quals); void manglePointerExtQualifiers(Qualifiers Quals, QualType PointeeType); - void mangleUnscopedTemplateName(const TemplateDecl *ND); + void mangleUnscopedTemplateName(GlobalDecl GD); void - mangleTemplateInstantiationName(const TemplateDecl *TD, + mangleTemplateInstantiationName(GlobalDecl GD, const TemplateArgumentList &TemplateArgs); void mangleObjCMethodName(const ObjCMethodDecl *MD); @@ -533,7 +546,8 @@ MicrosoftMangleContextImpl::shouldMangleStringLiteral(const StringLiteral *SL) { return true; } -void MicrosoftCXXNameMangler::mangle(const NamedDecl *D, StringRef Prefix) { +void MicrosoftCXXNameMangler::mangle(GlobalDecl GD, StringRef Prefix) { + const NamedDecl *D = cast<NamedDecl>(GD.getDecl()); // MSVC doesn't mangle C++ names the same way it mangles extern "C" names. // Therefore it's really important that we don't decorate the // name with leading underscores or leading/trailing at signs. So, by @@ -542,9 +556,9 @@ void MicrosoftCXXNameMangler::mangle(const NamedDecl *D, StringRef Prefix) { // <mangled-name> ::= ? <name> <type-encoding> Out << Prefix; - mangleName(D); + mangleName(GD); if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) - mangleFunctionEncoding(FD, Context.shouldMangleDeclName(FD)); + mangleFunctionEncoding(GD, Context.shouldMangleDeclName(FD)); else if (const VarDecl *VD = dyn_cast<VarDecl>(D)) mangleVariableEncoding(VD); else if (isa<MSGuidDecl>(D)) @@ -558,8 +572,9 @@ void MicrosoftCXXNameMangler::mangle(const NamedDecl *D, StringRef Prefix) { llvm_unreachable("Tried to mangle unexpected NamedDecl!"); } -void MicrosoftCXXNameMangler::mangleFunctionEncoding(const FunctionDecl *FD, +void MicrosoftCXXNameMangler::mangleFunctionEncoding(GlobalDecl GD, bool ShouldMangle) { + const FunctionDecl *FD = cast<FunctionDecl>(GD.getDecl()); // <type-encoding> ::= <function-class> <function-type> // Since MSVC operates on the type as written and not the canonical type, it @@ -770,13 +785,13 @@ void MicrosoftCXXNameMangler::mangleVirtualMemPtrThunk( mangleCallingConvention(MD->getType()->castAs<FunctionProtoType>()); } -void MicrosoftCXXNameMangler::mangleName(const NamedDecl *ND) { +void MicrosoftCXXNameMangler::mangleName(GlobalDecl GD) { // <name> ::= <unscoped-name> {[<named-scope>]+ | [<nested-name>]}? @ // Always start with the unqualified name. - mangleUnqualifiedName(ND); + mangleUnqualifiedName(GD); - mangleNestedName(ND); + mangleNestedName(GD); // Terminate the whole name with an '@'. Out << '@'; @@ -844,13 +859,14 @@ void MicrosoftCXXNameMangler::mangleBits(llvm::APInt Value) { } } -static const TemplateDecl * -isTemplate(const NamedDecl *ND, const TemplateArgumentList *&TemplateArgs) { +static GlobalDecl isTemplate(GlobalDecl GD, + const TemplateArgumentList *&TemplateArgs) { + const NamedDecl *ND = cast<NamedDecl>(GD.getDecl()); // Check if we have a function template. if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(ND)) { if (const TemplateDecl *TD = FD->getPrimaryTemplate()) { TemplateArgs = FD->getTemplateSpecializationArgs(); - return TD; + return GD.getWithDecl(TD); } } @@ -858,21 +874,22 @@ isTemplate(const NamedDecl *ND, const TemplateArgumentList *&TemplateArgs) { if (const ClassTemplateSpecializationDecl *Spec = dyn_cast<ClassTemplateSpecializationDecl>(ND)) { TemplateArgs = &Spec->getTemplateArgs(); - return Spec->getSpecializedTemplate(); + return GD.getWithDecl(Spec->getSpecializedTemplate()); } // Check if we have a variable template. if (const VarTemplateSpecializationDecl *Spec = dyn_cast<VarTemplateSpecializationDecl>(ND)) { TemplateArgs = &Spec->getTemplateArgs(); - return Spec->getSpecializedTemplate(); + return GD.getWithDecl(Spec->getSpecializedTemplate()); } - return nullptr; + return GlobalDecl(); } -void MicrosoftCXXNameMangler::mangleUnqualifiedName(const NamedDecl *ND, +void MicrosoftCXXNameMangler::mangleUnqualifiedName(GlobalDecl GD, DeclarationName Name) { + const NamedDecl *ND = cast<NamedDecl>(GD.getDecl()); // <unqualified-name> ::= <operator-name> // ::= <ctor-dtor-name> // ::= <source-name> @@ -880,11 +897,11 @@ void MicrosoftCXXNameMangler::mangleUnqualifiedName(const NamedDecl *ND, // Check if we have a template. const TemplateArgumentList *TemplateArgs = nullptr; - if (const TemplateDecl *TD = isTemplate(ND, TemplateArgs)) { + if (GlobalDecl TD = isTemplate(GD, TemplateArgs)) { // Function templates aren't considered for name back referencing. This // makes sense since function templates aren't likely to occur multiple // times in a symbol. - if (isa<FunctionTemplateDecl>(TD)) { + if (isa<FunctionTemplateDecl>(TD.getDecl())) { mangleTemplateInstantiationName(TD, *TemplateArgs); Out << '@'; return; @@ -945,7 +962,19 @@ void MicrosoftCXXNameMangler::mangleUnqualifiedName(const NamedDecl *ND, switch (Name.getNameKind()) { case DeclarationName::Identifier: { if (const IdentifierInfo *II = Name.getAsIdentifierInfo()) { - mangleSourceName(II->getName()); + bool IsDeviceStub = + ND && + ((isa<FunctionDecl>(ND) && ND->hasAttr<CUDAGlobalAttr>()) || + (isa<FunctionTemplateDecl>(ND) && + cast<FunctionTemplateDecl>(ND) + ->getTemplatedDecl() + ->hasAttr<CUDAGlobalAttr>())) && + GD.getKernelReferenceKind() == KernelReferenceKind::Stub; + if (IsDeviceStub) + mangleSourceName( + (llvm::Twine("__device_stub__") + II->getName()).str()); + else + mangleSourceName(II->getName()); break; } @@ -1146,7 +1175,8 @@ void MicrosoftCXXNameMangler::mangleUnqualifiedName(const NamedDecl *ND, // <postfix> ::= <unqualified-name> [<postfix>] // ::= <substitution> [<postfix>] -void MicrosoftCXXNameMangler::mangleNestedName(const NamedDecl *ND) { +void MicrosoftCXXNameMangler::mangleNestedName(GlobalDecl GD) { + const NamedDecl *ND = cast<NamedDecl>(GD.getDecl()); const DeclContext *DC = getEffectiveDeclContext(ND); while (!DC->isTranslationUnit()) { if (isa<TagDecl>(ND) || isa<VarDecl>(ND)) { @@ -1229,7 +1259,7 @@ void MicrosoftCXXNameMangler::mangleNestedName(const NamedDecl *ND) { } else if (isa<NamedDecl>(DC)) { ND = cast<NamedDecl>(DC); if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(ND)) { - mangle(FD, "?"); + mangle(getGlobalDeclAsDeclContext(FD), "?"); break; } else { mangleUnqualifiedName(ND); @@ -1418,7 +1448,7 @@ void MicrosoftCXXNameMangler::mangleObjCMethodName(const ObjCMethodDecl *MD) { } void MicrosoftCXXNameMangler::mangleTemplateInstantiationName( - const TemplateDecl *TD, const TemplateArgumentList &TemplateArgs) { + GlobalDecl GD, const TemplateArgumentList &TemplateArgs) { // <template-name> ::= <unscoped-template-name> <template-args> // ::= <substitution> // Always start with the unqualified name. @@ -1433,8 +1463,8 @@ void MicrosoftCXXNameMangler::mangleTemplateInstantiationName( TemplateArgBackReferences.swap(OuterTemplateArgsContext); PassObjectSizeArgs.swap(OuterPassObjectSizeArgs); - mangleUnscopedTemplateName(TD); - mangleTemplateArgs(TD, TemplateArgs); + mangleUnscopedTemplateName(GD); + mangleTemplateArgs(cast<TemplateDecl>(GD.getDecl()), TemplateArgs); // Restore the previous back reference contexts. NameBackReferences.swap(OuterTemplateContext); @@ -1443,11 +1473,10 @@ void MicrosoftCXXNameMangler::mangleTemplateInstantiationName( PassObjectSizeArgs.swap(OuterPassObjectSizeArgs); } -void -MicrosoftCXXNameMangler::mangleUnscopedTemplateName(const TemplateDecl *TD) { +void MicrosoftCXXNameMangler::mangleUnscopedTemplateName(GlobalDecl GD) { // <unscoped-template-name> ::= ?$ <unqualified-name> Out << "?$"; - mangleUnqualifiedName(TD); + mangleUnqualifiedName(GD); } void MicrosoftCXXNameMangler::mangleIntegerLiteral( @@ -3323,17 +3352,17 @@ void MicrosoftMangleContextImpl::mangleCXXName(GlobalDecl GD, if (auto *CD = dyn_cast<CXXConstructorDecl>(D)) { auto Type = GD.getCtorType(); MicrosoftCXXNameMangler mangler(*this, MHO, CD, Type); - return mangler.mangle(D); + return mangler.mangle(GD); } if (auto *DD = dyn_cast<CXXDestructorDecl>(D)) { auto Type = GD.getDtorType(); MicrosoftCXXNameMangler mangler(*this, MHO, DD, Type); - return mangler.mangle(D); + return mangler.mangle(GD); } MicrosoftCXXNameMangler Mangler(*this, MHO); - return Mangler.mangle(D); + return Mangler.mangle(GD); } void MicrosoftCXXNameMangler::mangleType(const ExtIntType *T, Qualifiers, diff --git a/clang/lib/AST/StmtPrinter.cpp b/clang/lib/AST/StmtPrinter.cpp index fc267d7006a1..b65a38d1e566 100644 --- a/clang/lib/AST/StmtPrinter.cpp +++ b/clang/lib/AST/StmtPrinter.cpp @@ -1691,7 +1691,8 @@ void StmtPrinter::VisitAtomicExpr(AtomicExpr *Node) { PrintExpr(Node->getPtr()); if (Node->getOp() != AtomicExpr::AO__c11_atomic_load && Node->getOp() != AtomicExpr::AO__atomic_load_n && - Node->getOp() != AtomicExpr::AO__opencl_atomic_load) { + Node->getOp() != AtomicExpr::AO__opencl_atomic_load && + Node->getOp() != AtomicExpr::AO__hip_atomic_load) { OS << ", "; PrintExpr(Node->getVal1()); } diff --git a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp new file mode 100644 index 000000000000..bb7eb9971068 --- /dev/null +++ b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp @@ -0,0 +1,35 @@ +//===- TypeErasedDataflowAnalysis.cpp -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines type-erased base types and functions for building dataflow +// analyses that run over Control-Flow Graphs (CFGs). +// +//===----------------------------------------------------------------------===// + +#include <vector> + +#include "clang/Analysis/CFG.h" +#include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" +#include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h" +#include "llvm/ADT/Optional.h" + +using namespace clang; +using namespace dataflow; + +std::vector<llvm::Optional<TypeErasedDataflowAnalysisState>> +runTypeErasedDataflowAnalysis(const CFG &Cfg, + TypeErasedDataflowAnalysis &Analysis, + const Environment &InitEnv) { + // FIXME: Consider enforcing that `Cfg` meets the requirements that + // are specified in the header. This could be done by remembering + // what options were used to build `Cfg` and asserting on them here. + + // FIXME: Implement work list-based algorithm to compute the fixed + // point of `Analysis::transform` for every basic block in `Cfg`. + return {}; +} diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp index f75b8ffcb53d..4d403ae1809d 100644 --- a/clang/lib/Basic/Targets/AArch64.cpp +++ b/clang/lib/Basic/Targets/AArch64.cpp @@ -140,8 +140,8 @@ bool AArch64TargetInfo::setABI(const std::string &Name) { bool AArch64TargetInfo::validateBranchProtection(StringRef Spec, BranchProtectionInfo &BPI, StringRef &Err) const { - llvm::AArch64::ParsedBranchProtection PBP; - if (!llvm::AArch64::parseBranchProtection(Spec, PBP, Err)) + llvm::ARM::ParsedBranchProtection PBP; + if (!llvm::ARM::parseBranchProtection(Spec, PBP, Err)) return false; BPI.SignReturnAddr = diff --git a/clang/lib/Basic/Targets/ARM.cpp b/clang/lib/Basic/Targets/ARM.cpp index fc6b01c87fd2..f330780300f2 100644 --- a/clang/lib/Basic/Targets/ARM.cpp +++ b/clang/lib/Basic/Targets/ARM.cpp @@ -367,6 +367,28 @@ bool ARMTargetInfo::setABI(const std::string &Name) { return false; } +bool ARMTargetInfo::validateBranchProtection(StringRef Spec, + BranchProtectionInfo &BPI, + StringRef &Err) const { + llvm::ARM::ParsedBranchProtection PBP; + if (!llvm::ARM::parseBranchProtection(Spec, PBP, Err)) + return false; + + BPI.SignReturnAddr = + llvm::StringSwitch<LangOptions::SignReturnAddressScopeKind>(PBP.Scope) + .Case("non-leaf", LangOptions::SignReturnAddressScopeKind::NonLeaf) + .Case("all", LangOptions::SignReturnAddressScopeKind::All) + .Default(LangOptions::SignReturnAddressScopeKind::None); + + // Don't care for the sign key, beyond issuing a warning. + if (PBP.Key == "b_key") + Err = "b-key"; + BPI.SignKey = LangOptions::SignReturnAddressKeyKind::AKey; + + BPI.BranchTargetEnforcement = PBP.BranchTargetEnforcement; + return true; +} + // FIXME: This should be based on Arch attributes, not CPU names. bool ARMTargetInfo::initFeatureMap( llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU, @@ -874,6 +896,16 @@ void ARMTargetInfo::getTargetDefines(const LangOptions &Opts, Builder.defineMacro("__ARM_BF16_FORMAT_ALTERNATIVE", "1"); } + if (Opts.BranchTargetEnforcement) + Builder.defineMacro("__ARM_FEATURE_BTI_DEFAULT", "1"); + + if (Opts.hasSignReturnAddress()) { + unsigned Value = Opts.isSignReturnAddressWithAKey() ? 1 : 2; + if (Opts.isSignReturnAddressScopeAll()) + Value |= 1 << 2; + Builder.defineMacro("__ARM_FEATURE_PAC_DEFAULT", Twine(Value)); + } + switch (ArchKind) { default: break; diff --git a/clang/lib/Basic/Targets/ARM.h b/clang/lib/Basic/Targets/ARM.h index d54a049042d6..7d0011d134ea 100644 --- a/clang/lib/Basic/Targets/ARM.h +++ b/clang/lib/Basic/Targets/ARM.h @@ -123,6 +123,9 @@ public: StringRef getABI() const override; bool setABI(const std::string &Name) override; + bool validateBranchProtection(StringRef, BranchProtectionInfo &, + StringRef &) const override; + // FIXME: This should be based on Arch attributes, not CPU names. bool initFeatureMap(llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, diff --git a/clang/lib/Basic/Targets/OSTargets.cpp b/clang/lib/Basic/Targets/OSTargets.cpp index 7cd4a5190120..53748bf067cd 100644 --- a/clang/lib/Basic/Targets/OSTargets.cpp +++ b/clang/lib/Basic/Targets/OSTargets.cpp @@ -181,8 +181,10 @@ static void addVisualCDefines(const LangOptions &Opts, MacroBuilder &Builder) { Builder.defineMacro("_HAS_CHAR16_T_LANGUAGE_SUPPORT", Twine(1)); if (Opts.isCompatibleWithMSVC(LangOptions::MSVC2015)) { - if (Opts.CPlusPlus20) - Builder.defineMacro("_MSVC_LANG", "201705L"); + if (Opts.CPlusPlus2b) + Builder.defineMacro("_MSVC_LANG", "202004L"); + else if (Opts.CPlusPlus20) + Builder.defineMacro("_MSVC_LANG", "202002L"); else if (Opts.CPlusPlus17) Builder.defineMacro("_MSVC_LANG", "201703L"); else if (Opts.CPlusPlus14) @@ -201,6 +203,14 @@ static void addVisualCDefines(const LangOptions &Opts, MacroBuilder &Builder) { } Builder.defineMacro("_INTEGRAL_MAX_BITS", "64"); + + // Starting with VS 2022 17.1, MSVC predefines the below macro to inform + // users of the execution character set defined at compile time. + // The value given is the Windows Code Page Identifier: + // https://docs.microsoft.com/en-us/windows/win32/intl/code-page-identifiers + // + // Clang currently only supports UTF-8, so we'll use 65001 + Builder.defineMacro("_MSVC_EXECUTION_CHARACTER_SET", "65001"); } void addWindowsDefines(const llvm::Triple &Triple, const LangOptions &Opts, diff --git a/clang/lib/Basic/Targets/SPIR.h b/clang/lib/Basic/Targets/SPIR.h index 704b1843dfed..8cf18b6c20f1 100644 --- a/clang/lib/Basic/Targets/SPIR.h +++ b/clang/lib/Basic/Targets/SPIR.h @@ -56,9 +56,14 @@ static const unsigned SPIRDefIsGenMap[] = { 0, // opencl_generic 0, // opencl_global_device 0, // opencl_global_host - 0, // cuda_device - 0, // cuda_constant - 0, // cuda_shared + // cuda_* address space mapping is intended for HIPSPV (HIP to SPIR-V + // translation). This mapping is enabled when the language mode is HIP. + 1, // cuda_device + // cuda_constant pointer can be casted to default/"flat" pointer, but in + // SPIR-V casts between constant and generic pointers are not allowed. For + // this reason cuda_constant is mapped to SPIR-V CrossWorkgroup. + 1, // cuda_constant + 3, // cuda_shared 1, // sycl_global 5, // sycl_global_device 6, // sycl_global_host @@ -74,6 +79,8 @@ class LLVM_LIBRARY_VISIBILITY BaseSPIRTargetInfo : public TargetInfo { protected: BaseSPIRTargetInfo(const llvm::Triple &Triple, const TargetOptions &) : TargetInfo(Triple) { + assert((Triple.isSPIR() || Triple.isSPIRV()) && + "Invalid architecture for SPIR or SPIR-V."); assert(getTriple().getOS() == llvm::Triple::UnknownOS && "SPIR(-V) target must use unknown OS"); assert(getTriple().getEnvironment() == llvm::Triple::UnknownEnvironment && @@ -137,11 +144,16 @@ public: // FIXME: SYCL specification considers unannotated pointers and references // to be pointing to the generic address space. See section 5.9.3 of // SYCL 2020 specification. - // Currently, there is no way of representing SYCL's default address space - // language semantic along with the semantics of embedded C's default - // address space in the same address space map. Hence the map needs to be - // reset to allow mapping to the desired value of 'Default' entry for SYCL. - setAddressSpaceMap(/*DefaultIsGeneric=*/Opts.SYCLIsDevice); + // Currently, there is no way of representing SYCL's and HIP's default + // address space language semantic along with the semantics of embedded C's + // default address space in the same address space map. Hence the map needs + // to be reset to allow mapping to the desired value of 'Default' entry for + // SYCL and HIP. + setAddressSpaceMap( + /*DefaultIsGeneric=*/Opts.SYCLIsDevice || + // The address mapping from HIP language for device code is only defined + // for SPIR-V. + (getTriple().isSPIRV() && Opts.HIP && Opts.CUDAIsDevice)); } void setSupportedOpenCLOpts() override { @@ -159,6 +171,7 @@ class LLVM_LIBRARY_VISIBILITY SPIRTargetInfo : public BaseSPIRTargetInfo { public: SPIRTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) : BaseSPIRTargetInfo(Triple, Opts) { + assert(Triple.isSPIR() && "Invalid architecture for SPIR."); assert(getTriple().getOS() == llvm::Triple::UnknownOS && "SPIR target must use unknown OS"); assert(getTriple().getEnvironment() == llvm::Triple::UnknownEnvironment && @@ -177,6 +190,8 @@ class LLVM_LIBRARY_VISIBILITY SPIR32TargetInfo : public SPIRTargetInfo { public: SPIR32TargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) : SPIRTargetInfo(Triple, Opts) { + assert(Triple.getArch() == llvm::Triple::spir && + "Invalid architecture for 32-bit SPIR."); PointerWidth = PointerAlign = 32; SizeType = TargetInfo::UnsignedInt; PtrDiffType = IntPtrType = TargetInfo::SignedInt; @@ -192,6 +207,8 @@ class LLVM_LIBRARY_VISIBILITY SPIR64TargetInfo : public SPIRTargetInfo { public: SPIR64TargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) : SPIRTargetInfo(Triple, Opts) { + assert(Triple.getArch() == llvm::Triple::spir64 && + "Invalid architecture for 64-bit SPIR."); PointerWidth = PointerAlign = 64; SizeType = TargetInfo::UnsignedLong; PtrDiffType = IntPtrType = TargetInfo::SignedLong; @@ -207,6 +224,7 @@ class LLVM_LIBRARY_VISIBILITY SPIRVTargetInfo : public BaseSPIRTargetInfo { public: SPIRVTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) : BaseSPIRTargetInfo(Triple, Opts) { + assert(Triple.isSPIRV() && "Invalid architecture for SPIR-V."); assert(getTriple().getOS() == llvm::Triple::UnknownOS && "SPIR-V target must use unknown OS"); assert(getTriple().getEnvironment() == llvm::Triple::UnknownEnvironment && @@ -225,6 +243,8 @@ class LLVM_LIBRARY_VISIBILITY SPIRV32TargetInfo : public SPIRVTargetInfo { public: SPIRV32TargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) : SPIRVTargetInfo(Triple, Opts) { + assert(Triple.getArch() == llvm::Triple::spirv32 && + "Invalid architecture for 32-bit SPIR-V."); PointerWidth = PointerAlign = 32; SizeType = TargetInfo::UnsignedInt; PtrDiffType = IntPtrType = TargetInfo::SignedInt; @@ -240,6 +260,8 @@ class LLVM_LIBRARY_VISIBILITY SPIRV64TargetInfo : public SPIRVTargetInfo { public: SPIRV64TargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) : SPIRVTargetInfo(Triple, Opts) { + assert(Triple.getArch() == llvm::Triple::spirv64 && + "Invalid architecture for 64-bit SPIR-V."); PointerWidth = PointerAlign = 64; SizeType = TargetInfo::UnsignedLong; PtrDiffType = IntPtrType = TargetInfo::SignedLong; diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp index 5e3686893719..5c4bd364b06a 100644 --- a/clang/lib/Basic/Targets/X86.cpp +++ b/clang/lib/Basic/Targets/X86.cpp @@ -239,9 +239,9 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features, HasAVX512ER = true; } else if (Feature == "+avx512fp16") { HasAVX512FP16 = true; + HasFloat16 = true; } else if (Feature == "+avx512pf") { HasAVX512PF = true; - HasLegalHalfType = true; } else if (Feature == "+avx512dq") { HasAVX512DQ = true; } else if (Feature == "+avx512bitalg") { @@ -369,8 +369,6 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features, .Default(NoXOP); XOPLevel = std::max(XOPLevel, XLevel); } - // Turn on _float16 for x86 (feature sse2) - HasFloat16 = SSELevel >= SSE2; // LLVM doesn't have a separate switch for fpmath, so only accept it if it // matches the selected sse level. @@ -384,12 +382,10 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features, SimdDefaultAlign = hasFeature("avx512f") ? 512 : hasFeature("avx") ? 256 : 128; - if (!HasX87) { - if (LongDoubleFormat == &llvm::APFloat::x87DoubleExtended()) - HasLongDouble = false; - if (getTriple().getArch() == llvm::Triple::x86) - HasFPReturn = false; - } + // FIXME: We should allow long double type on 32-bits to match with GCC. + // This requires backend to be able to lower f80 without x87 first. + if (!HasX87 && LongDoubleFormat == &llvm::APFloat::x87DoubleExtended()) + HasLongDouble = false; return true; } diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 648c7b3df8ed..510f3911939c 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -1034,8 +1034,9 @@ void EmitAssemblyHelper::EmitAssemblyWithLegacyPassManager( if (!ThinLinkOS) return; } - TheModule->addModuleFlag(Module::Error, "EnableSplitLTOUnit", - CodeGenOpts.EnableSplitLTOUnit); + if (!TheModule->getModuleFlag("EnableSplitLTOUnit")) + TheModule->addModuleFlag(Module::Error, "EnableSplitLTOUnit", + CodeGenOpts.EnableSplitLTOUnit); PerModulePasses.add(createWriteThinLTOBitcodePass( *OS, ThinLinkOS ? &ThinLinkOS->os() : nullptr)); } else { @@ -1049,8 +1050,9 @@ void EmitAssemblyHelper::EmitAssemblyWithLegacyPassManager( if (EmitLTOSummary) { if (!TheModule->getModuleFlag("ThinLTO")) TheModule->addModuleFlag(Module::Error, "ThinLTO", uint32_t(0)); - TheModule->addModuleFlag(Module::Error, "EnableSplitLTOUnit", - uint32_t(1)); + if (!TheModule->getModuleFlag("EnableSplitLTOUnit")) + TheModule->addModuleFlag(Module::Error, "EnableSplitLTOUnit", + uint32_t(1)); } PerModulePasses.add(createBitcodeWriterPass( @@ -1451,8 +1453,9 @@ void EmitAssemblyHelper::RunOptimizationPipeline( if (!ThinLinkOS) return; } - TheModule->addModuleFlag(Module::Error, "EnableSplitLTOUnit", - CodeGenOpts.EnableSplitLTOUnit); + if (!TheModule->getModuleFlag("EnableSplitLTOUnit")) + TheModule->addModuleFlag(Module::Error, "EnableSplitLTOUnit", + CodeGenOpts.EnableSplitLTOUnit); MPM.addPass(ThinLTOBitcodeWriterPass(*OS, ThinLinkOS ? &ThinLinkOS->os() : nullptr)); } else { @@ -1465,8 +1468,9 @@ void EmitAssemblyHelper::RunOptimizationPipeline( if (EmitLTOSummary) { if (!TheModule->getModuleFlag("ThinLTO")) TheModule->addModuleFlag(Module::Error, "ThinLTO", uint32_t(0)); - TheModule->addModuleFlag(Module::Error, "EnableSplitLTOUnit", - uint32_t(1)); + if (!TheModule->getModuleFlag("EnableSplitLTOUnit")) + TheModule->addModuleFlag(Module::Error, "EnableSplitLTOUnit", + uint32_t(1)); } MPM.addPass( BitcodeWriterPass(*OS, CodeGenOpts.EmitLLVMUseLists, EmitLTOSummary)); diff --git a/clang/lib/CodeGen/CGAtomic.cpp b/clang/lib/CodeGen/CGAtomic.cpp index 326ca8d50533..b68e6328acdf 100644 --- a/clang/lib/CodeGen/CGAtomic.cpp +++ b/clang/lib/CodeGen/CGAtomic.cpp @@ -524,12 +524,14 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest, llvm_unreachable("Already handled!"); case AtomicExpr::AO__c11_atomic_compare_exchange_strong: + case AtomicExpr::AO__hip_atomic_compare_exchange_strong: case AtomicExpr::AO__opencl_atomic_compare_exchange_strong: emitAtomicCmpXchgFailureSet(CGF, E, false, Dest, Ptr, Val1, Val2, FailureOrder, Size, Order, Scope); return; case AtomicExpr::AO__c11_atomic_compare_exchange_weak: case AtomicExpr::AO__opencl_atomic_compare_exchange_weak: + case AtomicExpr::AO__hip_atomic_compare_exchange_weak: emitAtomicCmpXchgFailureSet(CGF, E, true, Dest, Ptr, Val1, Val2, FailureOrder, Size, Order, Scope); return; @@ -565,6 +567,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest, } case AtomicExpr::AO__c11_atomic_load: case AtomicExpr::AO__opencl_atomic_load: + case AtomicExpr::AO__hip_atomic_load: case AtomicExpr::AO__atomic_load_n: case AtomicExpr::AO__atomic_load: { llvm::LoadInst *Load = CGF.Builder.CreateLoad(Ptr); @@ -576,6 +579,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest, case AtomicExpr::AO__c11_atomic_store: case AtomicExpr::AO__opencl_atomic_store: + case AtomicExpr::AO__hip_atomic_store: case AtomicExpr::AO__atomic_store: case AtomicExpr::AO__atomic_store_n: { llvm::Value *LoadVal1 = CGF.Builder.CreateLoad(Val1); @@ -586,6 +590,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest, } case AtomicExpr::AO__c11_atomic_exchange: + case AtomicExpr::AO__hip_atomic_exchange: case AtomicExpr::AO__opencl_atomic_exchange: case AtomicExpr::AO__atomic_exchange_n: case AtomicExpr::AO__atomic_exchange: @@ -597,6 +602,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest, : llvm::Instruction::Add; LLVM_FALLTHROUGH; case AtomicExpr::AO__c11_atomic_fetch_add: + case AtomicExpr::AO__hip_atomic_fetch_add: case AtomicExpr::AO__opencl_atomic_fetch_add: case AtomicExpr::AO__atomic_fetch_add: Op = E->getValueType()->isFloatingType() ? llvm::AtomicRMWInst::FAdd @@ -618,6 +624,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest, PostOpMinMax = true; LLVM_FALLTHROUGH; case AtomicExpr::AO__c11_atomic_fetch_min: + case AtomicExpr::AO__hip_atomic_fetch_min: case AtomicExpr::AO__opencl_atomic_fetch_min: case AtomicExpr::AO__atomic_fetch_min: Op = E->getValueType()->isSignedIntegerType() ? llvm::AtomicRMWInst::Min @@ -628,6 +635,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest, PostOpMinMax = true; LLVM_FALLTHROUGH; case AtomicExpr::AO__c11_atomic_fetch_max: + case AtomicExpr::AO__hip_atomic_fetch_max: case AtomicExpr::AO__opencl_atomic_fetch_max: case AtomicExpr::AO__atomic_fetch_max: Op = E->getValueType()->isSignedIntegerType() ? llvm::AtomicRMWInst::Max @@ -638,6 +646,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest, PostOp = llvm::Instruction::And; LLVM_FALLTHROUGH; case AtomicExpr::AO__c11_atomic_fetch_and: + case AtomicExpr::AO__hip_atomic_fetch_and: case AtomicExpr::AO__opencl_atomic_fetch_and: case AtomicExpr::AO__atomic_fetch_and: Op = llvm::AtomicRMWInst::And; @@ -647,6 +656,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest, PostOp = llvm::Instruction::Or; LLVM_FALLTHROUGH; case AtomicExpr::AO__c11_atomic_fetch_or: + case AtomicExpr::AO__hip_atomic_fetch_or: case AtomicExpr::AO__opencl_atomic_fetch_or: case AtomicExpr::AO__atomic_fetch_or: Op = llvm::AtomicRMWInst::Or; @@ -656,6 +666,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest, PostOp = llvm::Instruction::Xor; LLVM_FALLTHROUGH; case AtomicExpr::AO__c11_atomic_fetch_xor: + case AtomicExpr::AO__hip_atomic_fetch_xor: case AtomicExpr::AO__opencl_atomic_fetch_xor: case AtomicExpr::AO__atomic_fetch_xor: Op = llvm::AtomicRMWInst::Xor; @@ -838,6 +849,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { case AtomicExpr::AO__c11_atomic_load: case AtomicExpr::AO__opencl_atomic_load: + case AtomicExpr::AO__hip_atomic_load: case AtomicExpr::AO__atomic_load_n: break; @@ -857,7 +869,9 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { case AtomicExpr::AO__c11_atomic_compare_exchange_strong: case AtomicExpr::AO__c11_atomic_compare_exchange_weak: case AtomicExpr::AO__opencl_atomic_compare_exchange_strong: + case AtomicExpr::AO__hip_atomic_compare_exchange_strong: case AtomicExpr::AO__opencl_atomic_compare_exchange_weak: + case AtomicExpr::AO__hip_atomic_compare_exchange_weak: case AtomicExpr::AO__atomic_compare_exchange_n: case AtomicExpr::AO__atomic_compare_exchange: Val1 = EmitPointerWithAlignment(E->getVal1()); @@ -873,6 +887,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { case AtomicExpr::AO__c11_atomic_fetch_add: case AtomicExpr::AO__c11_atomic_fetch_sub: + case AtomicExpr::AO__hip_atomic_fetch_add: case AtomicExpr::AO__opencl_atomic_fetch_add: case AtomicExpr::AO__opencl_atomic_fetch_sub: if (MemTy->isPointerType()) { @@ -901,7 +916,9 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { case AtomicExpr::AO__c11_atomic_store: case AtomicExpr::AO__c11_atomic_exchange: case AtomicExpr::AO__opencl_atomic_store: + case AtomicExpr::AO__hip_atomic_store: case AtomicExpr::AO__opencl_atomic_exchange: + case AtomicExpr::AO__hip_atomic_exchange: case AtomicExpr::AO__atomic_store_n: case AtomicExpr::AO__atomic_exchange_n: case AtomicExpr::AO__c11_atomic_fetch_and: @@ -916,8 +933,11 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { case AtomicExpr::AO__opencl_atomic_fetch_min: case AtomicExpr::AO__opencl_atomic_fetch_max: case AtomicExpr::AO__atomic_fetch_and: + case AtomicExpr::AO__hip_atomic_fetch_and: case AtomicExpr::AO__atomic_fetch_or: + case AtomicExpr::AO__hip_atomic_fetch_or: case AtomicExpr::AO__atomic_fetch_xor: + case AtomicExpr::AO__hip_atomic_fetch_xor: case AtomicExpr::AO__atomic_fetch_nand: case AtomicExpr::AO__atomic_and_fetch: case AtomicExpr::AO__atomic_or_fetch: @@ -926,7 +946,9 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { case AtomicExpr::AO__atomic_max_fetch: case AtomicExpr::AO__atomic_min_fetch: case AtomicExpr::AO__atomic_fetch_max: + case AtomicExpr::AO__hip_atomic_fetch_max: case AtomicExpr::AO__atomic_fetch_min: + case AtomicExpr::AO__hip_atomic_fetch_min: Val1 = EmitValToTemp(*this, E->getVal1()); break; } @@ -968,11 +990,14 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { case AtomicExpr::AO__c11_atomic_fetch_add: case AtomicExpr::AO__opencl_atomic_fetch_add: case AtomicExpr::AO__atomic_fetch_add: + case AtomicExpr::AO__hip_atomic_fetch_add: case AtomicExpr::AO__c11_atomic_fetch_and: case AtomicExpr::AO__opencl_atomic_fetch_and: + case AtomicExpr::AO__hip_atomic_fetch_and: case AtomicExpr::AO__atomic_fetch_and: case AtomicExpr::AO__c11_atomic_fetch_or: case AtomicExpr::AO__opencl_atomic_fetch_or: + case AtomicExpr::AO__hip_atomic_fetch_or: case AtomicExpr::AO__atomic_fetch_or: case AtomicExpr::AO__c11_atomic_fetch_nand: case AtomicExpr::AO__atomic_fetch_nand: @@ -984,6 +1009,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { case AtomicExpr::AO__opencl_atomic_fetch_min: case AtomicExpr::AO__opencl_atomic_fetch_max: case AtomicExpr::AO__atomic_fetch_xor: + case AtomicExpr::AO__hip_atomic_fetch_xor: case AtomicExpr::AO__c11_atomic_fetch_max: case AtomicExpr::AO__c11_atomic_fetch_min: case AtomicExpr::AO__atomic_add_fetch: @@ -993,7 +1019,9 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { case AtomicExpr::AO__atomic_sub_fetch: case AtomicExpr::AO__atomic_xor_fetch: case AtomicExpr::AO__atomic_fetch_max: + case AtomicExpr::AO__hip_atomic_fetch_max: case AtomicExpr::AO__atomic_fetch_min: + case AtomicExpr::AO__hip_atomic_fetch_min: case AtomicExpr::AO__atomic_max_fetch: case AtomicExpr::AO__atomic_min_fetch: // For these, only library calls for certain sizes exist. @@ -1014,10 +1042,15 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { case AtomicExpr::AO__c11_atomic_exchange: case AtomicExpr::AO__c11_atomic_compare_exchange_weak: case AtomicExpr::AO__c11_atomic_compare_exchange_strong: + case AtomicExpr::AO__hip_atomic_compare_exchange_strong: case AtomicExpr::AO__opencl_atomic_load: + case AtomicExpr::AO__hip_atomic_load: case AtomicExpr::AO__opencl_atomic_store: + case AtomicExpr::AO__hip_atomic_store: case AtomicExpr::AO__opencl_atomic_exchange: + case AtomicExpr::AO__hip_atomic_exchange: case AtomicExpr::AO__opencl_atomic_compare_exchange_weak: + case AtomicExpr::AO__hip_atomic_compare_exchange_weak: case AtomicExpr::AO__opencl_atomic_compare_exchange_strong: case AtomicExpr::AO__atomic_load_n: case AtomicExpr::AO__atomic_store_n: @@ -1079,7 +1112,9 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { case AtomicExpr::AO__c11_atomic_compare_exchange_weak: case AtomicExpr::AO__c11_atomic_compare_exchange_strong: case AtomicExpr::AO__opencl_atomic_compare_exchange_weak: + case AtomicExpr::AO__hip_atomic_compare_exchange_weak: case AtomicExpr::AO__opencl_atomic_compare_exchange_strong: + case AtomicExpr::AO__hip_atomic_compare_exchange_strong: case AtomicExpr::AO__atomic_compare_exchange: case AtomicExpr::AO__atomic_compare_exchange_n: LibCallName = "__atomic_compare_exchange"; @@ -1101,6 +1136,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { case AtomicExpr::AO__opencl_atomic_exchange: case AtomicExpr::AO__atomic_exchange_n: case AtomicExpr::AO__atomic_exchange: + case AtomicExpr::AO__hip_atomic_exchange: LibCallName = "__atomic_exchange"; AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(), MemTy, E->getExprLoc(), TInfo.Width); @@ -1109,6 +1145,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { // void __atomic_store_N(T *mem, T val, int order) case AtomicExpr::AO__c11_atomic_store: case AtomicExpr::AO__opencl_atomic_store: + case AtomicExpr::AO__hip_atomic_store: case AtomicExpr::AO__atomic_store: case AtomicExpr::AO__atomic_store_n: LibCallName = "__atomic_store"; @@ -1121,6 +1158,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { // T __atomic_load_N(T *mem, int order) case AtomicExpr::AO__c11_atomic_load: case AtomicExpr::AO__opencl_atomic_load: + case AtomicExpr::AO__hip_atomic_load: case AtomicExpr::AO__atomic_load: case AtomicExpr::AO__atomic_load_n: LibCallName = "__atomic_load"; @@ -1133,6 +1171,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { case AtomicExpr::AO__c11_atomic_fetch_add: case AtomicExpr::AO__opencl_atomic_fetch_add: case AtomicExpr::AO__atomic_fetch_add: + case AtomicExpr::AO__hip_atomic_fetch_add: LibCallName = "__atomic_fetch_add"; AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(), LoweredMemTy, E->getExprLoc(), TInfo.Width); @@ -1144,6 +1183,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { LLVM_FALLTHROUGH; case AtomicExpr::AO__c11_atomic_fetch_and: case AtomicExpr::AO__opencl_atomic_fetch_and: + case AtomicExpr::AO__hip_atomic_fetch_and: case AtomicExpr::AO__atomic_fetch_and: LibCallName = "__atomic_fetch_and"; AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(), @@ -1156,6 +1196,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { LLVM_FALLTHROUGH; case AtomicExpr::AO__c11_atomic_fetch_or: case AtomicExpr::AO__opencl_atomic_fetch_or: + case AtomicExpr::AO__hip_atomic_fetch_or: case AtomicExpr::AO__atomic_fetch_or: LibCallName = "__atomic_fetch_or"; AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(), @@ -1180,6 +1221,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { LLVM_FALLTHROUGH; case AtomicExpr::AO__c11_atomic_fetch_xor: case AtomicExpr::AO__opencl_atomic_fetch_xor: + case AtomicExpr::AO__hip_atomic_fetch_xor: case AtomicExpr::AO__atomic_fetch_xor: LibCallName = "__atomic_fetch_xor"; AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(), @@ -1190,6 +1232,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { LLVM_FALLTHROUGH; case AtomicExpr::AO__c11_atomic_fetch_min: case AtomicExpr::AO__atomic_fetch_min: + case AtomicExpr::AO__hip_atomic_fetch_min: case AtomicExpr::AO__opencl_atomic_fetch_min: LibCallName = E->getValueType()->isSignedIntegerType() ? "__atomic_fetch_min" @@ -1202,6 +1245,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { LLVM_FALLTHROUGH; case AtomicExpr::AO__c11_atomic_fetch_max: case AtomicExpr::AO__atomic_fetch_max: + case AtomicExpr::AO__hip_atomic_fetch_max: case AtomicExpr::AO__opencl_atomic_fetch_max: LibCallName = E->getValueType()->isSignedIntegerType() ? "__atomic_fetch_max" @@ -1291,10 +1335,12 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { bool IsStore = E->getOp() == AtomicExpr::AO__c11_atomic_store || E->getOp() == AtomicExpr::AO__opencl_atomic_store || + E->getOp() == AtomicExpr::AO__hip_atomic_store || E->getOp() == AtomicExpr::AO__atomic_store || E->getOp() == AtomicExpr::AO__atomic_store_n; bool IsLoad = E->getOp() == AtomicExpr::AO__c11_atomic_load || E->getOp() == AtomicExpr::AO__opencl_atomic_load || + E->getOp() == AtomicExpr::AO__hip_atomic_load || E->getOp() == AtomicExpr::AO__atomic_load || E->getOp() == AtomicExpr::AO__atomic_load_n; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 849423c8b9ba..5d6df59cc405 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -170,8 +170,9 @@ static Value *EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E) { // Convert the type of the pointer to a pointer to the stored type. Val = CGF.EmitToMemory(Val, E->getArg(0)->getType()); + unsigned SrcAddrSpace = Address->getType()->getPointerAddressSpace(); Value *BC = CGF.Builder.CreateBitCast( - Address, llvm::PointerType::getUnqual(Val->getType()), "cast"); + Address, llvm::PointerType::get(Val->getType(), SrcAddrSpace), "cast"); LValue LV = CGF.MakeNaturalAlignAddrLValue(BC, E->getArg(0)->getType()); LV.setNontemporal(true); CGF.EmitStoreOfScalar(Val, LV, false); diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 4f14459e4d28..f6853a22cd36 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -4510,6 +4510,9 @@ void CodeGenFunction::EmitOMPTaskBasedDirective( Address Replacement(CGF.Builder.CreateLoad(Pair.second), CGF.getContext().getDeclAlign(Pair.first)); Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); + if (auto *DI = CGF.getDebugInfo()) + DI->EmitDeclareOfAutoVariable(Pair.first, Pair.second.getPointer(), + CGF.Builder, /*UsePointerValue*/ true); } // Adjust mapping for internal locals by mapping actual memory instead of // a pointer to this memory. diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 59f3e0270571..9ba1a5c25e81 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -710,10 +710,25 @@ void CodeGenModule::Release() { 1); } - if (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_32 || + // Add module metadata for return address signing (ignoring + // non-leaf/all) and stack tagging. These are actually turned on by function + // attributes, but we use module metadata to emit build attributes. This is + // needed for LTO, where the function attributes are inside bitcode + // serialised into a global variable by the time build attributes are + // emitted, so we can't access them. + if (Context.getTargetInfo().hasFeature("ptrauth") && + LangOpts.getSignReturnAddressScope() != + LangOptions::SignReturnAddressScopeKind::None) + getModule().addModuleFlag(llvm::Module::Override, + "sign-return-address-buildattr", 1); + if (LangOpts.Sanitize.has(SanitizerKind::MemTag)) + getModule().addModuleFlag(llvm::Module::Override, + "tag-stack-memory-buildattr", 1); + + if (Arch == llvm::Triple::thumb || Arch == llvm::Triple::thumbeb || + Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_32 || Arch == llvm::Triple::aarch64_be) { - getModule().addModuleFlag(llvm::Module::Error, - "branch-target-enforcement", + getModule().addModuleFlag(llvm::Module::Error, "branch-target-enforcement", LangOpts.BranchTargetEnforcement); getModule().addModuleFlag(llvm::Module::Error, "sign-return-address", @@ -722,9 +737,11 @@ void CodeGenModule::Release() { getModule().addModuleFlag(llvm::Module::Error, "sign-return-address-all", LangOpts.isSignReturnAddressScopeAll()); - getModule().addModuleFlag(llvm::Module::Error, - "sign-return-address-with-bkey", - !LangOpts.isSignReturnAddressWithAKey()); + if (Arch != llvm::Triple::thumb && Arch != llvm::Triple::thumbeb) { + getModule().addModuleFlag(llvm::Module::Error, + "sign-return-address-with-bkey", + !LangOpts.isSignReturnAddressWithAKey()); + } } if (!CodeGenOpts.MemoryProfileOutput.empty()) { @@ -1266,6 +1283,20 @@ static bool isUniqueInternalLinkageDecl(GlobalDecl GD, (CGM.getFunctionLinkage(GD) == llvm::GlobalValue::InternalLinkage); } +static void AppendTargetClonesMangling(const CodeGenModule &CGM, + const TargetClonesAttr *Attr, + unsigned VersionIndex, + raw_ostream &Out) { + Out << '.'; + StringRef FeatureStr = Attr->getFeatureStr(VersionIndex); + if (FeatureStr.startswith("arch=")) + Out << "arch_" << FeatureStr.substr(sizeof("arch=") - 1); + else + Out << FeatureStr; + + Out << '.' << Attr->getMangledIndex(VersionIndex); +} + static std::string getMangledNameImpl(CodeGenModule &CGM, GlobalDecl GD, const NamedDecl *ND, bool OmitMultiVersionMangling = false) { @@ -1319,6 +1350,10 @@ static std::string getMangledNameImpl(CodeGenModule &CGM, GlobalDecl GD, case MultiVersionKind::Target: AppendTargetMangling(CGM, FD->getAttr<TargetAttr>(), Out); break; + case MultiVersionKind::TargetClones: + AppendTargetClonesMangling(CGM, FD->getAttr<TargetClonesAttr>(), + GD.getMultiVersionIndex(), Out); + break; case MultiVersionKind::None: llvm_unreachable("None multiversion type isn't valid here"); } @@ -1983,8 +2018,9 @@ bool CodeGenModule::GetCPUAndFeaturesAttributes(GlobalDecl GD, FD = FD ? FD->getMostRecentDecl() : FD; const auto *TD = FD ? FD->getAttr<TargetAttr>() : nullptr; const auto *SD = FD ? FD->getAttr<CPUSpecificAttr>() : nullptr; + const auto *TC = FD ? FD->getAttr<TargetClonesAttr>() : nullptr; bool AddedAttr = false; - if (TD || SD) { + if (TD || SD || TC) { llvm::StringMap<bool> FeatureMap; getContext().getFunctionFeatureMap(FeatureMap, GD); @@ -3226,6 +3262,12 @@ void CodeGenModule::EmitMultiVersionFunctionDefinition(GlobalDecl GD, for (unsigned I = 0; I < Spec->cpus_size(); ++I) EmitGlobalFunctionDefinition(GD.getWithMultiVersionIndex(I), nullptr); // Requires multiple emits. + } else if (FD->isTargetClonesMultiVersion()) { + auto *Clone = FD->getAttr<TargetClonesAttr>(); + for (unsigned I = 0; I < Clone->featuresStrs_size(); ++I) + if (Clone->isFirstOfVersion(I)) + EmitGlobalFunctionDefinition(GD.getWithMultiVersionIndex(I), nullptr); + EmitTargetClonesResolver(GD); } else EmitGlobalFunctionDefinition(GD, GV); } @@ -3307,6 +3349,63 @@ llvm::GlobalValue::LinkageTypes getMultiversionLinkage(CodeGenModule &CGM, return llvm::GlobalValue::WeakODRLinkage; } +void CodeGenModule::EmitTargetClonesResolver(GlobalDecl GD) { + const auto *FD = cast<FunctionDecl>(GD.getDecl()); + assert(FD && "Not a FunctionDecl?"); + const auto *TC = FD->getAttr<TargetClonesAttr>(); + assert(TC && "Not a target_clones Function?"); + + QualType CanonTy = Context.getCanonicalType(FD->getType()); + llvm::Type *DeclTy = getTypes().ConvertType(CanonTy); + + if (const auto *CXXFD = dyn_cast<CXXMethodDecl>(FD)) { + const CGFunctionInfo &FInfo = getTypes().arrangeCXXMethodDeclaration(CXXFD); + DeclTy = getTypes().GetFunctionType(FInfo); + } + + llvm::Function *ResolverFunc; + if (getTarget().supportsIFunc()) { + auto *IFunc = cast<llvm::GlobalIFunc>( + GetOrCreateMultiVersionResolver(GD, DeclTy, FD)); + ResolverFunc = cast<llvm::Function>(IFunc->getResolver()); + } else + ResolverFunc = + cast<llvm::Function>(GetOrCreateMultiVersionResolver(GD, DeclTy, FD)); + + SmallVector<CodeGenFunction::MultiVersionResolverOption, 10> Options; + for (unsigned VersionIndex = 0; VersionIndex < TC->featuresStrs_size(); + ++VersionIndex) { + if (!TC->isFirstOfVersion(VersionIndex)) + continue; + StringRef Version = TC->getFeatureStr(VersionIndex); + StringRef MangledName = + getMangledName(GD.getWithMultiVersionIndex(VersionIndex)); + llvm::Constant *Func = GetGlobalValue(MangledName); + assert(Func && + "Should have already been created before calling resolver emit"); + + StringRef Architecture; + llvm::SmallVector<StringRef, 1> Feature; + + if (Version.startswith("arch=")) + Architecture = Version.drop_front(sizeof("arch=") - 1); + else if (Version != "default") + Feature.push_back(Version); + + Options.emplace_back(cast<llvm::Function>(Func), Architecture, Feature); + } + + const TargetInfo &TI = getTarget(); + std::stable_sort( + Options.begin(), Options.end(), + [&TI](const CodeGenFunction::MultiVersionResolverOption &LHS, + const CodeGenFunction::MultiVersionResolverOption &RHS) { + return TargetMVPriority(TI, LHS) > TargetMVPriority(TI, RHS); + }); + CodeGenFunction CGF(*this); + CGF.EmitMultiVersionResolver(ResolverFunc, Options); +} + void CodeGenModule::emitMultiVersionFunctions() { std::vector<GlobalDecl> MVFuncsToEmit; MultiVersionFuncs.swap(MVFuncsToEmit); @@ -3511,8 +3610,25 @@ llvm::Constant *CodeGenModule::GetOrCreateMultiVersionResolver( // Since this is the first time we've created this IFunc, make sure // that we put this multiversioned function into the list to be // replaced later if necessary (target multiversioning only). - if (!FD->isCPUDispatchMultiVersion() && !FD->isCPUSpecificMultiVersion()) + if (FD->isTargetMultiVersion()) MultiVersionFuncs.push_back(GD); + else if (FD->isTargetClonesMultiVersion()) { + // In target_clones multiversioning, make sure we emit this if used. + auto DDI = + DeferredDecls.find(getMangledName(GD.getWithMultiVersionIndex(0))); + if (DDI != DeferredDecls.end()) { + addDeferredDeclToEmit(GD); + DeferredDecls.erase(DDI); + } else { + // Emit the symbol of the 1st variant, so that the deferred decls know we + // need it, otherwise the only global value will be the resolver/ifunc, + // which end up getting broken if we search for them with GetGlobalValue'. + GetOrCreateLLVMFunction( + getMangledName(GD.getWithMultiVersionIndex(0)), DeclTy, FD, + /*ForVTable=*/false, /*DontDefer=*/true, + /*IsThunk=*/false, llvm::AttributeList(), ForDefinition); + } + } if (getTarget().supportsIFunc()) { llvm::Type *ResolverType = llvm::FunctionType::get( diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h index fbed22376c82..e1c7f486d334 100644 --- a/clang/lib/CodeGen/CodeGenModule.h +++ b/clang/lib/CodeGen/CodeGenModule.h @@ -1500,6 +1500,7 @@ private: void EmitAliasDefinition(GlobalDecl GD); void emitIFuncDefinition(GlobalDecl GD); void emitCPUDispatchDefinition(GlobalDecl GD); + void EmitTargetClonesResolver(GlobalDecl GD); void EmitObjCPropertyImplementations(const ObjCImplementationDecl *D); void EmitObjCIvarInitializations(ObjCImplementationDecl *D); diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp index 302dc653c46e..36e0319c8ab9 100644 --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -6364,6 +6364,26 @@ public: const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D); if (!FD) return; + auto *Fn = cast<llvm::Function>(GV); + + if (const auto *TA = FD->getAttr<TargetAttr>()) { + ParsedTargetAttr Attr = TA->parse(); + if (!Attr.BranchProtection.empty()) { + TargetInfo::BranchProtectionInfo BPI; + StringRef DiagMsg; + (void)CGM.getTarget().validateBranchProtection(Attr.BranchProtection, + BPI, DiagMsg); + + static const char *SignReturnAddrStr[] = {"none", "non-leaf", "all"}; + assert(static_cast<unsigned>(BPI.SignReturnAddr) <= 2 && + "Unexpected SignReturnAddressScopeKind"); + Fn->addFnAttr("sign-return-address", + SignReturnAddrStr[static_cast<int>(BPI.SignReturnAddr)]); + + Fn->addFnAttr("branch-target-enforcement", + BPI.BranchTargetEnforcement ? "true" : "false"); + } + } const ARMInterruptAttr *Attr = FD->getAttr<ARMInterruptAttr>(); if (!Attr) @@ -6379,8 +6399,6 @@ public: case ARMInterruptAttr::UNDEF: Kind = "UNDEF"; break; } - llvm::Function *Fn = cast<llvm::Function>(GV); - Fn->addFnAttr("interrupt", Kind); ARMABIInfo::ABIKind ABI = cast<ARMABIInfo>(getABIInfo()).getABIKind(); @@ -9339,17 +9357,25 @@ AMDGPUTargetCodeGenInfo::getLLVMSyncScopeID(const LangOptions &LangOpts, llvm::LLVMContext &Ctx) const { std::string Name; switch (Scope) { + case SyncScope::HIPSingleThread: + Name = "singlethread"; + break; + case SyncScope::HIPWavefront: + case SyncScope::OpenCLSubGroup: + Name = "wavefront"; + break; + case SyncScope::HIPWorkgroup: case SyncScope::OpenCLWorkGroup: Name = "workgroup"; break; + case SyncScope::HIPAgent: case SyncScope::OpenCLDevice: Name = "agent"; break; + case SyncScope::HIPSystem: case SyncScope::OpenCLAllSVMDevices: Name = ""; break; - case SyncScope::OpenCLSubGroup: - Name = "wavefront"; } if (Ordering != llvm::AtomicOrdering::SequentiallyConsistent) { diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 8023d03013a1..d501bd026219 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -38,6 +38,7 @@ #include "ToolChains/NaCl.h" #include "ToolChains/NetBSD.h" #include "ToolChains/OpenBSD.h" +#include "ToolChains/PPCFreeBSD.h" #include "ToolChains/PPCLinux.h" #include "ToolChains/PS4CPU.h" #include "ToolChains/RISCVToolchain.h" @@ -5302,7 +5303,11 @@ const ToolChain &Driver::getToolChain(const ArgList &Args, TC = std::make_unique<toolchains::NetBSD>(*this, Target, Args); break; case llvm::Triple::FreeBSD: - TC = std::make_unique<toolchains::FreeBSD>(*this, Target, Args); + if (Target.isPPC()) + TC = std::make_unique<toolchains::PPCFreeBSDToolChain>(*this, Target, + Args); + else + TC = std::make_unique<toolchains::FreeBSD>(*this, Target, Args); break; case llvm::Triple::Minix: TC = std::make_unique<toolchains::Minix>(*this, Target, Args); diff --git a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp index 0b60d097b9ca..abc32f22d2a1 100644 --- a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp +++ b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp @@ -225,7 +225,7 @@ void aarch64::getAArch64TargetFeatures(const Driver &D, bool success = true; // Enable NEON by default. Features.push_back("+neon"); - llvm::StringRef WaMArch = ""; + llvm::StringRef WaMArch; if (ForAS) for (const auto *A : Args.filtered(options::OPT_Wa_COMMA, options::OPT_Xassembler)) @@ -235,7 +235,7 @@ void aarch64::getAArch64TargetFeatures(const Driver &D, // Call getAArch64ArchFeaturesFromMarch only if "-Wa,-march=" or // "-Xassembler -march" is detected. Otherwise it may return false // and causes Clang to error out. - if (WaMArch.size()) + if (!WaMArch.empty()) success = getAArch64ArchFeaturesFromMarch(D, WaMArch, Args, Features); else if ((A = Args.getLastArg(options::OPT_march_EQ))) success = getAArch64ArchFeaturesFromMarch(D, A->getValue(), Args, Features); @@ -259,8 +259,15 @@ void aarch64::getAArch64TargetFeatures(const Driver &D, success = getAArch64MicroArchFeaturesFromMcpu( D, getAArch64TargetCPU(Args, Triple, A), Args, Features); - if (!success) - D.Diag(diag::err_drv_clang_unsupported) << A->getAsString(Args); + if (!success) { + auto Diag = D.Diag(diag::err_drv_clang_unsupported); + // If "-Wa,-march=" is used, 'WaMArch' will contain the argument's value, + // while 'A' is uninitialized. Only dereference 'A' in the other case. + if (!WaMArch.empty()) + Diag << "-march=" + WaMArch.str(); + else + Diag << A->getAsString(Args); + } if (Args.getLastArg(options::OPT_mgeneral_regs_only)) { Features.push_back("-fp-armv8"); diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 55518cd7926f..c5aaa067c4f5 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -403,7 +403,7 @@ shouldUseExceptionTablesForObjCExceptions(const ObjCRuntime &runtime, } /// Adds exception related arguments to the driver command arguments. There's a -/// master flag, -fexceptions and also language specific flags to enable/disable +/// main flag, -fexceptions and also language specific flags to enable/disable /// C++ and Objective-C exceptions. This makes it possible to for example /// disable C++ exceptions but enable Objective-C exceptions. static bool addExceptionArgs(const ArgList &Args, types::ID InputType, @@ -1603,6 +1603,49 @@ void RenderARMABI(const Driver &D, const llvm::Triple &Triple, } } +static void CollectARMPACBTIOptions(const Driver &D, const ArgList &Args, + ArgStringList &CmdArgs, bool isAArch64) { + const Arg *A = isAArch64 + ? Args.getLastArg(options::OPT_msign_return_address_EQ, + options::OPT_mbranch_protection_EQ) + : Args.getLastArg(options::OPT_mbranch_protection_EQ); + if (!A) + return; + + StringRef Scope, Key; + bool IndirectBranches; + + if (A->getOption().matches(options::OPT_msign_return_address_EQ)) { + Scope = A->getValue(); + if (!Scope.equals("none") && !Scope.equals("non-leaf") && + !Scope.equals("all")) + D.Diag(diag::err_invalid_branch_protection) + << Scope << A->getAsString(Args); + Key = "a_key"; + IndirectBranches = false; + } else { + StringRef DiagMsg; + llvm::ARM::ParsedBranchProtection PBP; + if (!llvm::ARM::parseBranchProtection(A->getValue(), PBP, DiagMsg)) + D.Diag(diag::err_invalid_branch_protection) + << DiagMsg << A->getAsString(Args); + if (!isAArch64 && PBP.Key == "b_key") + D.Diag(diag::warn_unsupported_branch_protection) + << "b-key" << A->getAsString(Args); + Scope = PBP.Scope; + Key = PBP.Key; + IndirectBranches = PBP.BranchTargetEnforcement; + } + + CmdArgs.push_back( + Args.MakeArgString(Twine("-msign-return-address=") + Scope)); + if (!Scope.equals("none")) + CmdArgs.push_back( + Args.MakeArgString(Twine("-msign-return-address-key=") + Key)); + if (IndirectBranches) + CmdArgs.push_back("-mbranch-target-enforce"); +} + void Clang::AddARMTargetArgs(const llvm::Triple &Triple, const ArgList &Args, ArgStringList &CmdArgs, bool KernelOrKext) const { RenderARMABI(getToolChain().getDriver(), Triple, Args, CmdArgs); @@ -1644,6 +1687,10 @@ void Clang::AddARMTargetArgs(const llvm::Triple &Triple, const ArgList &Args, CmdArgs.push_back("-mcmse"); AddAAPCSVolatileBitfieldArgs(Args, CmdArgs); + + // Enable/disable return address signing and indirect branch targets. + CollectARMPACBTIOptions(getToolChain().getDriver(), Args, CmdArgs, + false /*isAArch64*/); } void Clang::RenderTargetOptions(const llvm::Triple &EffectiveTriple, @@ -1783,40 +1830,8 @@ void Clang::AddAArch64TargetArgs(const ArgList &Args, } // Enable/disable return address signing and indirect branch targets. - if (Arg *A = Args.getLastArg(options::OPT_msign_return_address_EQ, - options::OPT_mbranch_protection_EQ)) { - - const Driver &D = getToolChain().getDriver(); - - StringRef Scope, Key; - bool IndirectBranches; - - if (A->getOption().matches(options::OPT_msign_return_address_EQ)) { - Scope = A->getValue(); - if (!Scope.equals("none") && !Scope.equals("non-leaf") && - !Scope.equals("all")) - D.Diag(diag::err_invalid_branch_protection) - << Scope << A->getAsString(Args); - Key = "a_key"; - IndirectBranches = false; - } else { - StringRef Err; - llvm::AArch64::ParsedBranchProtection PBP; - if (!llvm::AArch64::parseBranchProtection(A->getValue(), PBP, Err)) - D.Diag(diag::err_invalid_branch_protection) - << Err << A->getAsString(Args); - Scope = PBP.Scope; - Key = PBP.Key; - IndirectBranches = PBP.BranchTargetEnforcement; - } - - CmdArgs.push_back( - Args.MakeArgString(Twine("-msign-return-address=") + Scope)); - CmdArgs.push_back( - Args.MakeArgString(Twine("-msign-return-address-key=") + Key)); - if (IndirectBranches) - CmdArgs.push_back("-mbranch-target-enforce"); - } + CollectARMPACBTIOptions(getToolChain().getDriver(), Args, CmdArgs, + true /*isAArch64*/); // Handle -msve_vector_bits=<bits> if (Arg *A = Args.getLastArg(options::OPT_msve_vector_bits_EQ)) { @@ -5821,9 +5836,12 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, Args.AddLastArg(CmdArgs, options::OPT_fvisibility_inlines_hidden_static_local_var, options::OPT_fno_visibility_inlines_hidden_static_local_var); Args.AddLastArg(CmdArgs, options::OPT_fvisibility_global_new_delete_hidden); - Args.AddLastArg(CmdArgs, options::OPT_fnew_infallible); Args.AddLastArg(CmdArgs, options::OPT_ftlsmodel_EQ); + if (Args.hasFlag(options::OPT_fnew_infallible, + options::OPT_fno_new_infallible, false)) + CmdArgs.push_back("-fnew-infallible"); + if (Args.hasFlag(options::OPT_fno_operator_names, options::OPT_foperator_names, false)) CmdArgs.push_back("-fno-operator-names"); @@ -5886,7 +5904,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, // runtime. if (Args.hasFlag(options::OPT_fopenmp_target_new_runtime, options::OPT_fno_openmp_target_new_runtime, - /*Default=*/false)) + /*Default=*/!getToolChain().getTriple().isAMDGCN())) CmdArgs.push_back("-fopenmp-target-new-runtime"); // When in OpenMP offloading mode, enable debugging on the device. @@ -6659,6 +6677,35 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, A->claim(); } + // Turn -fplugin-arg-pluginname-key=value into + // -plugin-arg-pluginname key=value + // GCC has an actual plugin_argument struct with key/value pairs that it + // passes to its plugins, but we don't, so just pass it on as-is. + // + // The syntax for -fplugin-arg- is ambiguous if both plugin name and + // argument key are allowed to contain dashes. GCC therefore only + // allows dashes in the key. We do the same. + for (const Arg *A : Args.filtered(options::OPT_fplugin_arg)) { + auto ArgValue = StringRef(A->getValue()); + auto FirstDashIndex = ArgValue.find('-'); + StringRef PluginName = ArgValue.substr(0, FirstDashIndex); + StringRef Arg = ArgValue.substr(FirstDashIndex + 1); + + A->claim(); + if (FirstDashIndex == StringRef::npos || Arg.empty()) { + if (PluginName.empty()) { + D.Diag(diag::warn_drv_missing_plugin_name) << A->getAsString(Args); + } else { + D.Diag(diag::warn_drv_missing_plugin_arg) + << PluginName << A->getAsString(Args); + } + continue; + } + + CmdArgs.push_back(Args.MakeArgString(Twine("-plugin-arg-") + PluginName)); + CmdArgs.push_back(Args.MakeArgString(Arg)); + } + // Forward -fpass-plugin=name.so to -cc1. for (const Arg *A : Args.filtered(options::OPT_fpass_plugin_EQ)) { CmdArgs.push_back( diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp index 5397c7a9a0e6..ee573b89bed1 100644 --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -745,7 +745,7 @@ void CudaToolChain::addClangTargetOptions( std::string BitcodeSuffix; if (DriverArgs.hasFlag(options::OPT_fopenmp_target_new_runtime, - options::OPT_fno_openmp_target_new_runtime, false)) + options::OPT_fno_openmp_target_new_runtime, true)) BitcodeSuffix = "new-nvptx-" + GpuArch.str(); else BitcodeSuffix = "nvptx-" + GpuArch.str(); diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp index b82c5d7600df..c169e3d45793 100644 --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -32,7 +32,8 @@ void Flang::AddFortranDialectOptions(const ArgList &Args, options::OPT_fxor_operator, options::OPT_fno_xor_operator, options::OPT_falternative_parameter_statement, options::OPT_fdefault_real_8, options::OPT_fdefault_integer_8, - options::OPT_fdefault_double_8, options::OPT_flarge_sizes}); + options::OPT_fdefault_double_8, options::OPT_flarge_sizes, + options::OPT_fno_automatic}); } void Flang::AddPreprocessingOptions(const ArgList &Args, diff --git a/clang/lib/Driver/ToolChains/FreeBSD.cpp b/clang/lib/Driver/ToolChains/FreeBSD.cpp index dc05f9893465..d08ea282f6df 100644 --- a/clang/lib/Driver/ToolChains/FreeBSD.cpp +++ b/clang/lib/Driver/ToolChains/FreeBSD.cpp @@ -391,7 +391,8 @@ FreeBSD::FreeBSD(const Driver &D, const llvm::Triple &Triple, } ToolChain::CXXStdlibType FreeBSD::GetDefaultCXXStdlibType() const { - if (getTriple().getOSMajorVersion() >= 10) + unsigned Major = getTriple().getOSMajorVersion(); + if (Major >= 10 || Major == 0) return ToolChain::CST_Libcxx; return ToolChain::CST_Libstdcxx; } diff --git a/clang/lib/Driver/ToolChains/Linux.cpp b/clang/lib/Driver/ToolChains/Linux.cpp index 0224383e63a1..198774506e5e 100644 --- a/clang/lib/Driver/ToolChains/Linux.cpp +++ b/clang/lib/Driver/ToolChains/Linux.cpp @@ -421,6 +421,9 @@ std::string Linux::getDynamicLinker(const ArgList &Args) const { (Triple.getEnvironment() == llvm::Triple::MuslEABIHF || tools::arm::getARMFloatABI(*this, Args) == tools::arm::FloatABI::Hard)) ArchName += "hf"; + if (Arch == llvm::Triple::ppc && + Triple.getSubArch() == llvm::Triple::PPCSubArch_spe) + ArchName = "powerpc-sf"; return "/lib/ld-musl-" + ArchName + ".so.1"; } diff --git a/clang/lib/Driver/ToolChains/PPCFreeBSD.cpp b/clang/lib/Driver/ToolChains/PPCFreeBSD.cpp new file mode 100644 index 000000000000..8d381c4f1437 --- /dev/null +++ b/clang/lib/Driver/ToolChains/PPCFreeBSD.cpp @@ -0,0 +1,28 @@ +//===-- PPCFreeBSD.cpp - PowerPC ToolChain Implementations ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "PPCFreeBSD.h" +#include "clang/Driver/Driver.h" +#include "clang/Driver/Options.h" +#include "llvm/Support/Path.h" + +using namespace clang::driver::toolchains; +using namespace llvm::opt; + +void PPCFreeBSDToolChain::AddClangSystemIncludeArgs( + const ArgList &DriverArgs, ArgStringList &CC1Args) const { + if (!DriverArgs.hasArg(clang::driver::options::OPT_nostdinc) && + !DriverArgs.hasArg(options::OPT_nobuiltininc)) { + const Driver &D = getDriver(); + SmallString<128> P(D.ResourceDir); + llvm::sys::path::append(P, "include", "ppc_wrappers"); + addSystemInclude(DriverArgs, CC1Args, P); + } + + FreeBSD::AddClangSystemIncludeArgs(DriverArgs, CC1Args); +} diff --git a/clang/lib/Driver/ToolChains/PPCFreeBSD.h b/clang/lib/Driver/ToolChains/PPCFreeBSD.h new file mode 100644 index 000000000000..d5d9cf4e83a0 --- /dev/null +++ b/clang/lib/Driver/ToolChains/PPCFreeBSD.h @@ -0,0 +1,33 @@ +//===--- PPCFreeBSD.h - PowerPC ToolChain Implementations -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_PPC_FREEBSD_H +#define LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_PPC_FREEBSD_H + +#include "FreeBSD.h" + +namespace clang { +namespace driver { +namespace toolchains { + +class LLVM_LIBRARY_VISIBILITY PPCFreeBSDToolChain : public FreeBSD { +public: + PPCFreeBSDToolChain(const Driver &D, const llvm::Triple &Triple, + const llvm::opt::ArgList &Args) + : FreeBSD(D, Triple, Args) {} + + void + AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs, + llvm::opt::ArgStringList &CC1Args) const override; +}; + +} // end namespace toolchains +} // end namespace driver +} // end namespace clang + +#endif // LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_PPC_FREEBSD_H diff --git a/clang/lib/Driver/ToolChains/PS4CPU.h b/clang/lib/Driver/ToolChains/PS4CPU.h index 82f9523f84fb..4bedabaf267c 100644 --- a/clang/lib/Driver/ToolChains/PS4CPU.h +++ b/clang/lib/Driver/ToolChains/PS4CPU.h @@ -80,6 +80,7 @@ public: return LangOptions::SSPStrong; } + unsigned GetDefaultDwarfVersion() const override { return 4; } llvm::DebuggerKind getDefaultDebuggerTuning() const override { return llvm::DebuggerKind::SCE; } diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp index 1e4f5690ef24..5073f5105d05 100644 --- a/clang/lib/Format/ContinuationIndenter.cpp +++ b/clang/lib/Format/ContinuationIndenter.cpp @@ -1984,9 +1984,17 @@ ContinuationIndenter::createBreakableToken(const FormatToken &Current, } else if (Current.is(TT_LineComment) && (Current.Previous == nullptr || Current.Previous->isNot(TT_ImplicitStringLiteral))) { + bool RegularComments = [&]() { + for (const FormatToken *T = &Current; T && T->is(TT_LineComment); + T = T->Next) { + if (!(T->TokenText.startswith("//") || T->TokenText.startswith("#"))) + return false; + } + return true; + }(); if (!Style.ReflowComments || CommentPragmasRegex.match(Current.TokenText.substr(2)) || - switchesFormatting(Current)) + switchesFormatting(Current) || !RegularComments) return nullptr; return std::make_unique<BreakableLineCommentSection>( Current, StartColumn, /*InPPDirective=*/false, Encoding, Style); @@ -2195,11 +2203,10 @@ ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, // When breaking before a tab character, it may be moved by a few columns, // but will still be expanded to the next tab stop, so we don't save any // columns. - if (NewRemainingTokenColumns == RemainingTokenColumns) { + if (NewRemainingTokenColumns >= RemainingTokenColumns) { // FIXME: Do we need to adjust the penalty? break; } - assert(NewRemainingTokenColumns < RemainingTokenColumns); LLVM_DEBUG(llvm::dbgs() << " Breaking at: " << TailOffset + Split.first << ", " << Split.second << "\n"); diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp index 8ae29c54a762..17de1075aeaa 100644 --- a/clang/lib/Format/Format.cpp +++ b/clang/lib/Format/Format.cpp @@ -2988,9 +2988,8 @@ reformat(const FormatStyle &Style, StringRef Code, // JSON only needs the formatting passing. if (Style.isJson()) { std::vector<tooling::Range> Ranges(1, tooling::Range(0, Code.size())); - auto Env = - Environment::make(Code, FileName, Ranges, FirstStartColumn, - NextStartColumn, LastStartColumn); + auto Env = Environment::make(Code, FileName, Ranges, FirstStartColumn, + NextStartColumn, LastStartColumn); if (!Env) return {}; // Perform the actual formatting pass. @@ -3118,9 +3117,7 @@ tooling::Replacements fixNamespaceEndComments(const FormatStyle &Style, auto Env = Environment::make(Code, FileName, Ranges); if (!Env) return {}; - return NamespaceEndCommentsFixer(*Env, Style) - .process() - .first; + return NamespaceEndCommentsFixer(*Env, Style).process().first; } tooling::Replacements sortUsingDeclarations(const FormatStyle &Style, @@ -3130,9 +3127,7 @@ tooling::Replacements sortUsingDeclarations(const FormatStyle &Style, auto Env = Environment::make(Code, FileName, Ranges); if (!Env) return {}; - return UsingDeclarationsSorter(*Env, Style) - .process() - .first; + return UsingDeclarationsSorter(*Env, Style).process().first; } LangOptions getFormattingLangOpts(const FormatStyle &Style) { diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h index 06d51dd95f50..1a2858018fde 100644 --- a/clang/lib/Format/FormatToken.h +++ b/clang/lib/Format/FormatToken.h @@ -76,6 +76,7 @@ namespace format { TYPE(LineComment) \ TYPE(MacroBlockBegin) \ TYPE(MacroBlockEnd) \ + TYPE(ModulePartitionColon) \ TYPE(NamespaceMacro) \ TYPE(NonNullAssertion) \ TYPE(NullCoalescingEqual) \ diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp index 8075756cca03..64fbd2d5d45b 100644 --- a/clang/lib/Format/FormatTokenLexer.cpp +++ b/clang/lib/Format/FormatTokenLexer.cpp @@ -37,27 +37,40 @@ FormatTokenLexer::FormatTokenLexer( getFormattingLangOpts(Style))); Lex->SetKeepWhitespaceMode(true); - for (const std::string &ForEachMacro : Style.ForEachMacros) - Macros.insert({&IdentTable.get(ForEachMacro), TT_ForEachMacro}); - for (const std::string &IfMacro : Style.IfMacros) - Macros.insert({&IdentTable.get(IfMacro), TT_IfMacro}); - for (const std::string &AttributeMacro : Style.AttributeMacros) - Macros.insert({&IdentTable.get(AttributeMacro), TT_AttributeMacro}); - for (const std::string &StatementMacro : Style.StatementMacros) - Macros.insert({&IdentTable.get(StatementMacro), TT_StatementMacro}); - for (const std::string &TypenameMacro : Style.TypenameMacros) - Macros.insert({&IdentTable.get(TypenameMacro), TT_TypenameMacro}); - for (const std::string &NamespaceMacro : Style.NamespaceMacros) - Macros.insert({&IdentTable.get(NamespaceMacro), TT_NamespaceMacro}); + for (const std::string &ForEachMacro : Style.ForEachMacros) { + auto Identifier = &IdentTable.get(ForEachMacro); + Macros.insert({Identifier, TT_ForEachMacro}); + } + for (const std::string &IfMacro : Style.IfMacros) { + auto Identifier = &IdentTable.get(IfMacro); + Macros.insert({Identifier, TT_IfMacro}); + } + for (const std::string &AttributeMacro : Style.AttributeMacros) { + auto Identifier = &IdentTable.get(AttributeMacro); + Macros.insert({Identifier, TT_AttributeMacro}); + } + for (const std::string &StatementMacro : Style.StatementMacros) { + auto Identifier = &IdentTable.get(StatementMacro); + Macros.insert({Identifier, TT_StatementMacro}); + } + for (const std::string &TypenameMacro : Style.TypenameMacros) { + auto Identifier = &IdentTable.get(TypenameMacro); + Macros.insert({Identifier, TT_TypenameMacro}); + } + for (const std::string &NamespaceMacro : Style.NamespaceMacros) { + auto Identifier = &IdentTable.get(NamespaceMacro); + Macros.insert({Identifier, TT_NamespaceMacro}); + } for (const std::string &WhitespaceSensitiveMacro : Style.WhitespaceSensitiveMacros) { - Macros.insert( - {&IdentTable.get(WhitespaceSensitiveMacro), TT_UntouchableMacroFunc}); + auto Identifier = &IdentTable.get(WhitespaceSensitiveMacro); + Macros.insert({Identifier, TT_UntouchableMacroFunc}); } for (const std::string &StatementAttributeLikeMacro : - Style.StatementAttributeLikeMacros) - Macros.insert({&IdentTable.get(StatementAttributeLikeMacro), - TT_StatementAttributeLikeMacro}); + Style.StatementAttributeLikeMacros) { + auto Identifier = &IdentTable.get(StatementAttributeLikeMacro); + Macros.insert({Identifier, TT_StatementAttributeLikeMacro}); + } } ArrayRef<FormatToken *> FormatTokenLexer::lex() { @@ -739,6 +752,8 @@ bool FormatTokenLexer::tryMerge_TMacro() { Tokens.pop_back(); Tokens.pop_back(); Tokens.back() = String; + if (FirstInLineIndex >= Tokens.size()) + FirstInLineIndex = Tokens.size() - 1; return true; } diff --git a/clang/lib/Format/SortJavaScriptImports.cpp b/clang/lib/Format/SortJavaScriptImports.cpp index 515cfce725a4..77dc0d683e5f 100644 --- a/clang/lib/Format/SortJavaScriptImports.cpp +++ b/clang/lib/Format/SortJavaScriptImports.cpp @@ -553,9 +553,7 @@ tooling::Replacements sortJavaScriptImports(const FormatStyle &Style, auto Env = Environment::make(Code, FileName, Ranges); if (!Env) return {}; - return JavaScriptImportSorter(*Env, Style) - .process() - .first; + return JavaScriptImportSorter(*Env, Style).process().first; } } // end namespace format diff --git a/clang/lib/Format/TokenAnalyzer.cpp b/clang/lib/Format/TokenAnalyzer.cpp index a619c6d939e9..d83e837ca134 100644 --- a/clang/lib/Format/TokenAnalyzer.cpp +++ b/clang/lib/Format/TokenAnalyzer.cpp @@ -37,7 +37,7 @@ namespace format { // FIXME: Instead of printing the diagnostic we should store it and have a // better way to return errors through the format APIs. -class FatalDiagnosticConsumer: public DiagnosticConsumer { +class FatalDiagnosticConsumer : public DiagnosticConsumer { public: void HandleDiagnostic(DiagnosticsEngine::Level DiagLevel, const Diagnostic &Info) override { @@ -71,7 +71,8 @@ Environment::make(StringRef Code, StringRef FileName, } // Validate that we can get the buffer data without a fatal error. Env->SM.getBufferData(Env->ID); - if (Diags.fatalError()) return nullptr; + if (Diags.fatalError()) + return nullptr; return Env; } @@ -80,8 +81,7 @@ Environment::Environment(StringRef Code, StringRef FileName, unsigned LastStartColumn) : VirtualSM(new SourceManagerForFile(FileName, Code)), SM(VirtualSM->get()), ID(VirtualSM->get().getMainFileID()), FirstStartColumn(FirstStartColumn), - NextStartColumn(NextStartColumn), LastStartColumn(LastStartColumn) { -} + NextStartColumn(NextStartColumn), LastStartColumn(LastStartColumn) {} TokenAnalyzer::TokenAnalyzer(const Environment &Env, const FormatStyle &Style) : Style(Style), Env(Env), diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 3897241cb858..a94d8cdc3b04 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -314,10 +314,11 @@ private: // // void (*FunctionPointer)(void); // void (&FunctionReference)(void); + // void (&&FunctionReference)(void); // void (^ObjCBlock)(void); bool MightBeFunctionType = !Contexts[Contexts.size() - 2].IsExpression; bool ProbablyFunctionType = - CurrentToken->isOneOf(tok::star, tok::amp, tok::caret); + CurrentToken->isOneOf(tok::star, tok::amp, tok::ampamp, tok::caret); bool HasMultipleLines = false; bool HasMultipleParametersOnALine = false; bool MightBeObjCForRangeLoop = @@ -902,9 +903,13 @@ private: break; } } - if (Contexts.back().ColonIsDictLiteral || - Style.Language == FormatStyle::LK_Proto || - Style.Language == FormatStyle::LK_TextProto) { + if (Line.First->isOneOf(Keywords.kw_module, Keywords.kw_import) || + Line.First->startsSequence(tok::kw_export, Keywords.kw_module) || + Line.First->startsSequence(tok::kw_export, Keywords.kw_import)) { + Tok->setType(TT_ModulePartitionColon); + } else if (Contexts.back().ColonIsDictLiteral || + Style.Language == FormatStyle::LK_Proto || + Style.Language == FormatStyle::LK_TextProto) { Tok->setType(TT_DictLiteral); if (Style.Language == FormatStyle::LK_TextProto) { if (FormatToken *Previous = Tok->getPreviousNonComment()) @@ -946,11 +951,15 @@ private: !Line.First->isOneOf(tok::kw_enum, tok::kw_case, tok::kw_default)) { FormatToken *Prev = Tok->getPreviousNonComment(); + if (!Prev) + break; if (Prev->isOneOf(tok::r_paren, tok::kw_noexcept)) Tok->setType(TT_CtorInitializerColon); else if (Prev->is(tok::kw_try)) { // Member initializer list within function try block. FormatToken *PrevPrev = Prev->getPreviousNonComment(); + if (!PrevPrev) + break; if (PrevPrev && PrevPrev->isOneOf(tok::r_paren, tok::kw_noexcept)) Tok->setType(TT_CtorInitializerColon); } else @@ -995,6 +1004,8 @@ private: if (CurrentToken && CurrentToken->is(Keywords.kw_await)) next(); } + if (Style.isCpp() && CurrentToken && CurrentToken->is(tok::kw_co_await)) + next(); Contexts.back().ColonIsForRangeExpr = true; next(); if (!parseParens()) @@ -1578,6 +1589,8 @@ private: if (TemplateCloser->is(tok::l_paren)) { // No Matching Paren yet so skip to matching paren TemplateCloser = untilMatchingParen(TemplateCloser); + if (!TemplateCloser) + break; } if (TemplateCloser->is(tok::less)) NestingLevel++; @@ -2336,16 +2349,15 @@ void TokenAnnotator::setCommentLineLevels( if (NextNonCommentLine && CommentLine && NextNonCommentLine->First->NewlinesBefore <= 1 && NextNonCommentLine->First->OriginalColumn == - AL->First->OriginalColumn) { + AL->First->OriginalColumn) { // Align comments for preprocessor lines with the # in column 0 if // preprocessor lines are not indented. Otherwise, align with the next // line. - AL->Level = - (Style.IndentPPDirectives != FormatStyle::PPDIS_BeforeHash && - (NextNonCommentLine->Type == LT_PreprocessorDirective || - NextNonCommentLine->Type == LT_ImportStatement)) - ? 0 - : NextNonCommentLine->Level; + AL->Level = (Style.IndentPPDirectives != FormatStyle::PPDIS_BeforeHash && + (NextNonCommentLine->Type == LT_PreprocessorDirective || + NextNonCommentLine->Type == LT_ImportStatement)) + ? 0 + : NextNonCommentLine->Level; } else { NextNonCommentLine = AL->First->isNot(tok::r_brace) ? AL : nullptr; } @@ -2639,8 +2651,8 @@ void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) { if (Current->Role) Current->Role->precomputeFormattingInfos(Current); if (Current->MatchingParen && - Current->MatchingParen->opensBlockOrBlockTypeList(Style)) { - assert(IndentLevel > 0); + Current->MatchingParen->opensBlockOrBlockTypeList(Style) && + IndentLevel > 0) { --IndentLevel; } Current->IndentLevel = IndentLevel; @@ -2942,6 +2954,14 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, if (Left.is(tok::kw_auto) && Right.isOneOf(tok::l_paren, tok::l_brace)) return false; + // operator co_await(x) + if (Right.is(tok::l_paren) && Left.is(tok::kw_co_await) && Left.Previous && + Left.Previous->is(tok::kw_operator)) + return false; + // co_await (x), co_yield (x), co_return (x) + if (Left.isOneOf(tok::kw_co_await, tok::kw_co_yield, tok::kw_co_return) && + Right.isNot(tok::semi)) + return true; // requires clause Concept1<T> && Concept2<T> if (Left.is(TT_ConstraintJunctions) && Right.is(tok::identifier)) return true; @@ -3159,9 +3179,13 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, if (Left.isIf(Line.Type != LT_PreprocessorDirective)) return Style.SpaceBeforeParensOptions.AfterControlStatements || spaceRequiredBeforeParens(Right); + + // TODO add Operator overloading specific Options to + // SpaceBeforeParensOptions + if (Right.is(TT_OverloadedOperatorLParen)) + return spaceRequiredBeforeParens(Right); // Function declaration or definition - if (Line.MightBeFunctionDecl && (Left.is(TT_FunctionDeclarationName) || - Right.is(TT_OverloadedOperatorLParen))) { + if (Line.MightBeFunctionDecl && (Left.is(TT_FunctionDeclarationName))) { if (Line.mightBeFunctionDefinition()) return Style.SpaceBeforeParensOptions.AfterFunctionDefinitionName || spaceRequiredBeforeParens(Right); @@ -3238,9 +3262,35 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, auto HasExistingWhitespace = [&Right]() { return Right.WhitespaceRange.getBegin() != Right.WhitespaceRange.getEnd(); }; + if (Right.Tok.getIdentifierInfo() && Left.Tok.getIdentifierInfo()) return true; // Never ever merge two identifiers. + + // Leave a space between * and /* to avoid C4138 `comment end` found outside + // of comment. + if (Left.is(tok::star) && Right.is(tok::comment)) + return true; + if (Style.isCpp()) { + // Space between import <iostream>. + // or import .....; + if (Left.is(Keywords.kw_import) && Right.isOneOf(tok::less, tok::ellipsis)) + return true; + // No space between module :. + if (Left.isOneOf(Keywords.kw_module, Keywords.kw_import) && + Right.is(TT_ModulePartitionColon)) + return true; + // No space between import foo:bar but keep a space between import :bar; + if (Left.is(tok::identifier) && Right.is(TT_ModulePartitionColon)) + return false; + // No space between :bar; + if (Left.is(TT_ModulePartitionColon) && + Right.isOneOf(tok::identifier, tok::kw_private)) + return false; + if (Left.is(tok::ellipsis) && Right.is(tok::identifier) && + Line.First->is(Keywords.kw_import)) + return false; + if (Left.is(tok::kw_operator)) return Right.is(tok::coloncolon); if (Right.is(tok::l_brace) && Right.is(BK_BracedInit) && diff --git a/clang/lib/Format/UnwrappedLineFormatter.cpp b/clang/lib/Format/UnwrappedLineFormatter.cpp index 299536cd806e..d099cfee9dea 100644 --- a/clang/lib/Format/UnwrappedLineFormatter.cpp +++ b/clang/lib/Format/UnwrappedLineFormatter.cpp @@ -320,9 +320,9 @@ private: } // Try to merge a control statement block with left brace wrapped if (I[1]->First->is(tok::l_brace) && - (TheLine->First->isOneOf(tok::kw_if, tok::kw_while, tok::kw_for, - tok::kw_switch, tok::kw_try, tok::kw_do, - TT_ForEachMacro) || + (TheLine->First->isOneOf(tok::kw_if, tok::kw_else, tok::kw_while, + tok::kw_for, tok::kw_switch, tok::kw_try, + tok::kw_do, TT_ForEachMacro) || (TheLine->First->is(tok::r_brace) && TheLine->First->Next && TheLine->First->Next->isOneOf(tok::kw_else, tok::kw_catch))) && Style.BraceWrapping.AfterControlStatement == @@ -335,7 +335,7 @@ private: ? 1 : 0; } else if (I[1]->First->is(tok::l_brace) && - TheLine->First->isOneOf(tok::kw_if, tok::kw_while, + TheLine->First->isOneOf(tok::kw_if, tok::kw_else, tok::kw_while, tok::kw_for)) { return (Style.BraceWrapping.AfterControlStatement == FormatStyle::BWACS_Always) @@ -569,7 +569,7 @@ private: // Check that the current line allows merging. This depends on whether we // are in a control flow statements as well as several style flags. - if (Line.First->isOneOf(tok::kw_else, tok::kw_case) || + if (Line.First->is(tok::kw_case) || (Line.First->Next && Line.First->Next->is(tok::kw_else))) return 0; // default: in switch statement @@ -578,20 +578,21 @@ private: if (Tok && Tok->is(tok::colon)) return 0; } - if (Line.First->isOneOf(tok::kw_if, tok::kw_while, tok::kw_do, tok::kw_try, - tok::kw___try, tok::kw_catch, tok::kw___finally, - tok::kw_for, tok::r_brace, Keywords.kw___except)) { + if (Line.First->isOneOf(tok::kw_if, tok::kw_else, tok::kw_while, tok::kw_do, + tok::kw_try, tok::kw___try, tok::kw_catch, + tok::kw___finally, tok::kw_for, tok::r_brace, + Keywords.kw___except)) { if (Style.AllowShortBlocksOnASingleLine == FormatStyle::SBS_Never) return 0; // Don't merge when we can't except the case when // the control statement block is empty if (!Style.AllowShortIfStatementsOnASingleLine && - Line.startsWith(tok::kw_if) && + Line.First->isOneOf(tok::kw_if, tok::kw_else) && !Style.BraceWrapping.AfterControlStatement && !I[1]->First->is(tok::r_brace)) return 0; if (!Style.AllowShortIfStatementsOnASingleLine && - Line.startsWith(tok::kw_if) && + Line.First->isOneOf(tok::kw_if, tok::kw_else) && Style.BraceWrapping.AfterControlStatement == FormatStyle::BWACS_Always && I + 2 != E && !I[2]->First->is(tok::r_brace)) @@ -676,7 +677,7 @@ private: // { <-- current Line // baz(); // } - if (Line.First == Line.Last && + if (Line.First == Line.Last && Line.First->isNot(TT_FunctionLBrace) && Style.BraceWrapping.AfterControlStatement == FormatStyle::BWACS_MultiLine) return 0; diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp index 28d925858f77..5b9fe267aae6 100644 --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -28,9 +28,28 @@ namespace format { class FormatTokenSource { public: virtual ~FormatTokenSource() {} + + // Returns the next token in the token stream. virtual FormatToken *getNextToken() = 0; + // Returns the token precedint the token returned by the last call to + // getNextToken() in the token stream, or nullptr if no such token exists. + virtual FormatToken *getPreviousToken() = 0; + + // Returns the token that would be returned by the next call to + // getNextToken(). + virtual FormatToken *peekNextToken() = 0; + + // Returns whether we are at the end of the file. + // This can be different from whether getNextToken() returned an eof token + // when the FormatTokenSource is a view on a part of the token stream. + virtual bool isEOF() = 0; + + // Gets the current position in the token stream, to be used by setPosition(). virtual unsigned getPosition() = 0; + + // Resets the token stream to the state it was in when getPosition() returned + // Position, and return the token at that position in the stream. virtual FormatToken *setPosition(unsigned Position) = 0; }; @@ -108,6 +127,18 @@ public: return Token; } + FormatToken *getPreviousToken() override { + return PreviousTokenSource->getPreviousToken(); + } + + FormatToken *peekNextToken() override { + if (eof()) + return &FakeEOF; + return PreviousTokenSource->peekNextToken(); + } + + bool isEOF() override { return PreviousTokenSource->isEOF(); } + unsigned getPosition() override { return PreviousTokenSource->getPosition(); } FormatToken *setPosition(unsigned Position) override { @@ -199,16 +230,45 @@ public: : Tokens(Tokens), Position(-1) {} FormatToken *getNextToken() override { + if (Position >= 0 && Tokens[Position]->is(tok::eof)) { + LLVM_DEBUG({ + llvm::dbgs() << "Next "; + dbgToken(Position); + }); + return Tokens[Position]; + } ++Position; + LLVM_DEBUG({ + llvm::dbgs() << "Next "; + dbgToken(Position); + }); return Tokens[Position]; } + FormatToken *getPreviousToken() override { + assert(Position > 0); + return Tokens[Position - 1]; + } + + FormatToken *peekNextToken() override { + int Next = Position + 1; + LLVM_DEBUG({ + llvm::dbgs() << "Peeking "; + dbgToken(Next); + }); + return Tokens[Next]; + } + + bool isEOF() override { return Tokens[Position]->is(tok::eof); } + unsigned getPosition() override { + LLVM_DEBUG(llvm::dbgs() << "Getting Position: " << Position << "\n"); assert(Position >= 0); return Position; } FormatToken *setPosition(unsigned P) override { + LLVM_DEBUG(llvm::dbgs() << "Setting Position: " << P << "\n"); Position = P; return Tokens[Position]; } @@ -216,6 +276,13 @@ public: void reset() { Position = -1; } private: + void dbgToken(int Position, llvm::StringRef Indent = "") { + FormatToken *Tok = Tokens[Position]; + llvm::dbgs() << Indent << "[" << Position + << "] Token: " << Tok->Tok.getName() << " / " << Tok->TokenText + << ", Macro: " << !!Tok->MacroCtx << "\n"; + } + ArrayRef<FormatToken *> Tokens; int Position; }; @@ -399,7 +466,7 @@ void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) { FormatToken *Next; do { Next = Tokens->getNextToken(); - } while (Next && Next->is(tok::comment)); + } while (Next->is(tok::comment)); FormatTok = Tokens->setPosition(StoredPosition); if (Next && Next->isNot(tok::colon)) { // default not followed by ':' is not a case label; treat it like @@ -875,10 +942,7 @@ void UnwrappedLineParser::parsePPEndIf() { parsePPUnknown(); // If the #endif of a potential include guard is the last thing in the file, // then we found an include guard. - unsigned TokenPosition = Tokens->getPosition(); - FormatToken *PeekNext = AllTokens[TokenPosition]; - if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && - PeekNext->is(tok::eof) && + if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() && Style.IndentPPDirectives != FormatStyle::PPDIS_None) IncludeGuard = IG_Found; } @@ -1050,6 +1114,35 @@ static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next, return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma); } +void UnwrappedLineParser::parseModuleImport() { + nextToken(); + while (!eof()) { + if (FormatTok->is(tok::colon)) { + FormatTok->setType(TT_ModulePartitionColon); + } + // Handle import <foo/bar.h> as we would an include statement. + else if (FormatTok->is(tok::less)) { + nextToken(); + while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) { + // Mark tokens up to the trailing line comments as implicit string + // literals. + if (FormatTok->isNot(tok::comment) && + !FormatTok->TokenText.startswith("//")) + FormatTok->setType(TT_ImplicitStringLiteral); + nextToken(); + } + } + if (FormatTok->is(tok::semi)) { + nextToken(); + break; + } + nextToken(); + } + + addUnwrappedLine(); + return; +} + // readTokenWithJavaScriptASI reads the next token and terminates the current // line if JavaScript Automatic Semicolon Insertion must // happen between the current token and the next token. @@ -1097,7 +1190,6 @@ void UnwrappedLineParser::readTokenWithJavaScriptASI() { } void UnwrappedLineParser::parseStructuralElement(bool IsTopLevel) { - assert(!FormatTok->is(tok::l_brace)); if (Style.Language == FormatStyle::LK_TableGen && FormatTok->is(tok::pp_include)) { nextToken(); @@ -1249,6 +1341,10 @@ void UnwrappedLineParser::parseStructuralElement(bool IsTopLevel) { addUnwrappedLine(); return; } + if (Style.isCpp()) { + parseModuleImport(); + return; + } } if (Style.isCpp() && FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals, @@ -1402,9 +1498,7 @@ void UnwrappedLineParser::parseStructuralElement(bool IsTopLevel) { // declaration. if (!IsTopLevel || !Style.isCpp() || !Previous || FormatTok->is(tok::eof)) break; - const unsigned Position = Tokens->getPosition() + 1; - assert(Position < AllTokens.size()); - if (isC78ParameterDecl(FormatTok, AllTokens[Position], Previous)) { + if (isC78ParameterDecl(FormatTok, Tokens->peekNextToken(), Previous)) { addUnwrappedLine(); return; } @@ -1488,7 +1582,7 @@ void UnwrappedLineParser::parseStructuralElement(bool IsTopLevel) { unsigned StoredPosition = Tokens->getPosition(); FormatToken *Next = Tokens->getNextToken(); FormatTok = Tokens->setPosition(StoredPosition); - if (Next && !mustBeJSIdent(Keywords, Next)) { + if (!mustBeJSIdent(Keywords, Next)) { nextToken(); break; } @@ -2099,8 +2193,8 @@ void UnwrappedLineParser::parseIfThenElse() { parseBlock(); addUnwrappedLine(); } else if (FormatTok->Tok.is(tok::kw_if)) { - FormatToken *Previous = AllTokens[Tokens->getPosition() - 1]; - bool PrecededByComment = Previous->is(tok::comment); + FormatToken *Previous = Tokens->getPreviousToken(); + bool PrecededByComment = Previous && Previous->is(tok::comment); if (PrecededByComment) { addUnwrappedLine(); ++Line->Level; @@ -2305,6 +2399,8 @@ void UnwrappedLineParser::parseForOrWhileLoop() { if (Style.Language == FormatStyle::LK_JavaScript && FormatTok->is(Keywords.kw_await)) nextToken(); + if (Style.isCpp() && FormatTok->is(tok::kw_co_await)) + nextToken(); if (FormatTok->Tok.is(tok::l_paren)) parseParens(); if (FormatTok->Tok.is(tok::l_brace)) { @@ -2653,23 +2749,25 @@ bool UnwrappedLineParser::tryToParseSimpleAttribute() { ScopedTokenPosition AutoPosition(Tokens); FormatToken *Tok = Tokens->getNextToken(); // We already read the first [ check for the second. - if (Tok && !Tok->is(tok::l_square)) { + if (!Tok->is(tok::l_square)) { return false; } // Double check that the attribute is just something // fairly simple. - while (Tok) { + while (Tok->isNot(tok::eof)) { if (Tok->is(tok::r_square)) { break; } Tok = Tokens->getNextToken(); } + if (Tok->is(tok::eof)) + return false; Tok = Tokens->getNextToken(); - if (Tok && !Tok->is(tok::r_square)) { + if (!Tok->is(tok::r_square)) { return false; } Tok = Tokens->getNextToken(); - if (Tok && Tok->is(tok::semi)) { + if (Tok->is(tok::semi)) { return false; } return true; @@ -2682,7 +2780,7 @@ void UnwrappedLineParser::parseJavaEnumBody() { unsigned StoredPosition = Tokens->getPosition(); bool IsSimple = true; FormatToken *Tok = Tokens->getNextToken(); - while (Tok) { + while (!Tok->is(tok::eof)) { if (Tok->is(tok::r_brace)) break; if (Tok->isOneOf(tok::l_brace, tok::semi)) { @@ -3292,6 +3390,20 @@ void UnwrappedLineParser::readToken(int LevelDifference) { do { FormatTok = Tokens->getNextToken(); assert(FormatTok); + while (FormatTok->getType() == TT_ConflictStart || + FormatTok->getType() == TT_ConflictEnd || + FormatTok->getType() == TT_ConflictAlternative) { + if (FormatTok->getType() == TT_ConflictStart) { + conditionalCompilationStart(/*Unreachable=*/false); + } else if (FormatTok->getType() == TT_ConflictAlternative) { + conditionalCompilationAlternative(); + } else if (FormatTok->getType() == TT_ConflictEnd) { + conditionalCompilationEnd(); + } + FormatTok = Tokens->getNextToken(); + FormatTok->MustBreakBefore = true; + } + while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) && (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) { distributeComments(Comments, FormatTok); @@ -3313,19 +3425,6 @@ void UnwrappedLineParser::readToken(int LevelDifference) { flushComments(isOnNewLine(*FormatTok)); parsePPDirective(); } - while (FormatTok->getType() == TT_ConflictStart || - FormatTok->getType() == TT_ConflictEnd || - FormatTok->getType() == TT_ConflictAlternative) { - if (FormatTok->getType() == TT_ConflictStart) { - conditionalCompilationStart(/*Unreachable=*/false); - } else if (FormatTok->getType() == TT_ConflictAlternative) { - conditionalCompilationAlternative(); - } else if (FormatTok->getType() == TT_ConflictEnd) { - conditionalCompilationEnd(); - } - FormatTok = Tokens->getNextToken(); - FormatTok->MustBreakBefore = true; - } if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) && !Line->InPPDirective) { diff --git a/clang/lib/Format/UnwrappedLineParser.h b/clang/lib/Format/UnwrappedLineParser.h index bcae0f3ad258..b4c082654597 100644 --- a/clang/lib/Format/UnwrappedLineParser.h +++ b/clang/lib/Format/UnwrappedLineParser.h @@ -110,6 +110,7 @@ private: void parseCaseLabel(); void parseSwitch(); void parseNamespace(); + void parseModuleImport(); void parseNew(); void parseAccessSpecifier(); bool parseEnum(); diff --git a/clang/lib/Format/WhitespaceManager.cpp b/clang/lib/Format/WhitespaceManager.cpp index 74136d2f5caa..fae8a1c3fdc6 100644 --- a/clang/lib/Format/WhitespaceManager.cpp +++ b/clang/lib/Format/WhitespaceManager.cpp @@ -372,8 +372,6 @@ AlignTokenSequence(const FormatStyle &Style, unsigned Start, unsigned End, if (ContinuedStringLiteral) Changes[i].Spaces += Shift; - assert(Shift >= 0); - Changes[i].StartOfTokenColumn += Shift; if (i + 1 != Changes.size()) Changes[i + 1].PreviousEndOfTokenColumn += Shift; @@ -915,7 +913,8 @@ void WhitespaceManager::alignTrailingComments(unsigned Start, unsigned End, Changes[i].StartOfBlockComment->StartOfTokenColumn - Changes[i].StartOfTokenColumn; } - assert(Shift >= 0); + if (Shift < 0) + continue; Changes[i].Spaces += Shift; if (i + 1 != Changes.size()) Changes[i + 1].PreviousEndOfTokenColumn += Shift; @@ -1270,10 +1269,10 @@ WhitespaceManager::linkCells(CellDescriptions &&CellDesc) { void WhitespaceManager::generateChanges() { for (unsigned i = 0, e = Changes.size(); i != e; ++i) { const Change &C = Changes[i]; - if (i > 0) { - assert(Changes[i - 1].OriginalWhitespaceRange.getBegin() != - C.OriginalWhitespaceRange.getBegin() && - "Generating two replacements for the same location"); + if (i > 0 && Changes[i - 1].OriginalWhitespaceRange.getBegin() == + C.OriginalWhitespaceRange.getBegin()) { + // Do not generate two replacements for the same location. + continue; } if (C.CreateReplacement) { std::string ReplacementText = C.PreviousLinePostfix; diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp index 0ecb024fc6b9..0c153446142e 100644 --- a/clang/lib/Frontend/InitPreprocessor.cpp +++ b/clang/lib/Frontend/InitPreprocessor.cpp @@ -505,6 +505,11 @@ static void InitializeStandardPredefinedMacros(const TargetInfo &TI, if (LangOpts.HIP) { Builder.defineMacro("__HIP__"); Builder.defineMacro("__HIPCC__"); + Builder.defineMacro("__HIP_MEMORY_SCOPE_SINGLETHREAD", "1"); + Builder.defineMacro("__HIP_MEMORY_SCOPE_WAVEFRONT", "2"); + Builder.defineMacro("__HIP_MEMORY_SCOPE_WORKGROUP", "3"); + Builder.defineMacro("__HIP_MEMORY_SCOPE_AGENT", "4"); + Builder.defineMacro("__HIP_MEMORY_SCOPE_SYSTEM", "5"); if (LangOpts.CUDAIsDevice) Builder.defineMacro("__HIP_DEVICE_COMPILE__"); } diff --git a/clang/lib/Frontend/PrecompiledPreamble.cpp b/clang/lib/Frontend/PrecompiledPreamble.cpp index af82ab3f5558..8aa80a4c96fb 100644 --- a/clang/lib/Frontend/PrecompiledPreamble.cpp +++ b/clang/lib/Frontend/PrecompiledPreamble.cpp @@ -412,10 +412,13 @@ llvm::ErrorOr<PrecompiledPreamble> PrecompiledPreamble::Build( std::unique_ptr<PrecompilePreambleAction> Act; Act.reset(new PrecompilePreambleAction( StoreInMemory ? &Storage.asMemory().Data : nullptr, Callbacks)); - Callbacks.BeforeExecute(*Clang); if (!Act->BeginSourceFile(*Clang.get(), Clang->getFrontendOpts().Inputs[0])) return BuildPreambleError::BeginSourceFileFailed; + // Performed after BeginSourceFile to ensure Clang->Preprocessor can be + // referenced in the callback. + Callbacks.BeforeExecute(*Clang); + std::unique_ptr<PPCallbacks> DelegatedPPCallbacks = Callbacks.createPPCallbacks(); if (DelegatedPPCallbacks) diff --git a/clang/lib/Frontend/Rewrite/RewriteModernObjC.cpp b/clang/lib/Frontend/Rewrite/RewriteModernObjC.cpp index 626ec4d71ccd..b4487f004715 100644 --- a/clang/lib/Frontend/Rewrite/RewriteModernObjC.cpp +++ b/clang/lib/Frontend/Rewrite/RewriteModernObjC.cpp @@ -633,7 +633,7 @@ static bool IsHeaderFile(const std::string &Filename) { return false; } - std::string Ext = std::string(Filename.begin()+DotPos+1, Filename.end()); + std::string Ext = Filename.substr(DotPos + 1); // C header: .h // C++ header: .hh or .H; return Ext == "h" || Ext == "hh" || Ext == "H"; diff --git a/clang/lib/Frontend/Rewrite/RewriteObjC.cpp b/clang/lib/Frontend/Rewrite/RewriteObjC.cpp index 0750d36b02ac..b2ecb42c43dd 100644 --- a/clang/lib/Frontend/Rewrite/RewriteObjC.cpp +++ b/clang/lib/Frontend/Rewrite/RewriteObjC.cpp @@ -569,7 +569,7 @@ static bool IsHeaderFile(const std::string &Filename) { return false; } - std::string Ext = std::string(Filename.begin()+DotPos+1, Filename.end()); + std::string Ext = Filename.substr(DotPos + 1); // C header: .h // C++ header: .hh or .H; return Ext == "h" || Ext == "hh" || Ext == "H"; diff --git a/clang/lib/Headers/altivec.h b/clang/lib/Headers/altivec.h index fb808d7b0a4f..55195b0781fb 100644 --- a/clang/lib/Headers/altivec.h +++ b/clang/lib/Headers/altivec.h @@ -19,6 +19,10 @@ #define __CR6_EQ_REV 1 #define __CR6_LT 2 #define __CR6_LT_REV 3 +#define __CR6_GT 4 +#define __CR6_GT_REV 5 +#define __CR6_SO 6 +#define __CR6_SO_REV 7 /* Constants for vec_test_data_class */ #define __VEC_CLASS_FP_SUBNORMAL_N (1 << 0) @@ -8413,9 +8417,20 @@ static __inline__ vector float __ATTRS_o_ai vec_round(vector float __a) { } #ifdef __VSX__ +#ifdef __XL_COMPAT_ALTIVEC__ +static __inline__ vector double __ATTRS_o_ai vec_rint(vector double __a); +static __inline__ vector double __ATTRS_o_ai vec_round(vector double __a) { + double __fpscr = __builtin_readflm(); + __builtin_setrnd(0); + vector double __rounded = vec_rint(__a); + __builtin_setflm(__fpscr); + return __rounded; +} +#else static __inline__ vector double __ATTRS_o_ai vec_round(vector double __a) { return __builtin_vsx_xvrdpi(__a); } +#endif /* vec_rint */ @@ -19026,6 +19041,51 @@ vec_sra(vector signed __int128 __a, vector unsigned __int128 __b) { #endif /* __SIZEOF_INT128__ */ #endif /* __POWER10_VECTOR__ */ +#ifdef __POWER8_VECTOR__ +#define __bcdadd(__a, __b, __ps) __builtin_ppc_bcdadd((__a), (__b), (__ps)) +#define __bcdsub(__a, __b, __ps) __builtin_ppc_bcdsub((__a), (__b), (__ps)) + +static __inline__ long __bcdadd_ofl(vector unsigned char __a, + vector unsigned char __b) { + return __builtin_ppc_bcdadd_p(__CR6_SO, __a, __b); +} + +static __inline__ long __bcdsub_ofl(vector unsigned char __a, + vector unsigned char __b) { + return __builtin_ppc_bcdsub_p(__CR6_SO, __a, __b); +} + +static __inline__ long __bcd_invalid(vector unsigned char __a) { + return __builtin_ppc_bcdsub_p(__CR6_SO, __a, __a); +} + +static __inline__ long __bcdcmpeq(vector unsigned char __a, + vector unsigned char __b) { + return __builtin_ppc_bcdsub_p(__CR6_EQ, __a, __b); +} + +static __inline__ long __bcdcmplt(vector unsigned char __a, + vector unsigned char __b) { + return __builtin_ppc_bcdsub_p(__CR6_LT, __a, __b); +} + +static __inline__ long __bcdcmpgt(vector unsigned char __a, + vector unsigned char __b) { + return __builtin_ppc_bcdsub_p(__CR6_GT, __a, __b); +} + +static __inline__ long __bcdcmple(vector unsigned char __a, + vector unsigned char __b) { + return __builtin_ppc_bcdsub_p(__CR6_GT_REV, __a, __b); +} + +static __inline__ long __bcdcmpge(vector unsigned char __a, + vector unsigned char __b) { + return __builtin_ppc_bcdsub_p(__CR6_LT_REV, __a, __b); +} + +#endif // __POWER8_VECTOR__ + #undef __ATTRS_o_ai #endif /* __ALTIVEC_H */ diff --git a/clang/lib/Headers/ppc_wrappers/emmintrin.h b/clang/lib/Headers/ppc_wrappers/emmintrin.h index 4dcb8485e2e9..82a71788b27a 100644 --- a/clang/lib/Headers/ppc_wrappers/emmintrin.h +++ b/clang/lib/Headers/ppc_wrappers/emmintrin.h @@ -35,7 +35,7 @@ #ifndef EMMINTRIN_H_ #define EMMINTRIN_H_ -#if defined(__linux__) && defined(__ppc64__) +#if defined(__ppc64__) && (defined(__linux__) || defined(__FreeBSD__)) #include <altivec.h> @@ -2319,6 +2319,7 @@ _mm_castsi128_pd(__m128i __A) #else #include_next <emmintrin.h> -#endif /* defined(__linux__) && defined(__ppc64__) */ +#endif /* defined(__ppc64__) && (defined(__linux__) || defined(__FreeBSD__)) \ + */ #endif /* EMMINTRIN_H_ */ diff --git a/clang/lib/Headers/ppc_wrappers/mm_malloc.h b/clang/lib/Headers/ppc_wrappers/mm_malloc.h index 24b14c8e07c0..86cf1a0f7618 100644 --- a/clang/lib/Headers/ppc_wrappers/mm_malloc.h +++ b/clang/lib/Headers/ppc_wrappers/mm_malloc.h @@ -10,7 +10,7 @@ #ifndef _MM_MALLOC_H_INCLUDED #define _MM_MALLOC_H_INCLUDED -#if defined(__linux__) && defined(__ppc64__) +#if defined(__ppc64__) && (defined(__linux__) || defined(__FreeBSD__)) #include <stdlib.h> diff --git a/clang/lib/Headers/ppc_wrappers/mmintrin.h b/clang/lib/Headers/ppc_wrappers/mmintrin.h index c55c44726f00..54e4ee9f4468 100644 --- a/clang/lib/Headers/ppc_wrappers/mmintrin.h +++ b/clang/lib/Headers/ppc_wrappers/mmintrin.h @@ -35,7 +35,7 @@ #ifndef _MMINTRIN_H_INCLUDED #define _MMINTRIN_H_INCLUDED -#if defined(__linux__) && defined(__ppc64__) +#if defined(__ppc64__) && (defined(__linux__) || defined(__FreeBSD__)) #include <altivec.h> /* The Intel API is flexible enough that we must allow aliasing with other @@ -1445,6 +1445,7 @@ extern __inline __m64 #else #include_next <mmintrin.h> -#endif /* defined(__linux__) && defined(__ppc64__) */ +#endif /* defined(__ppc64__) && (defined(__linux__) || defined(__FreeBSD__)) \ + */ #endif /* _MMINTRIN_H_INCLUDED */ diff --git a/clang/lib/Headers/ppc_wrappers/pmmintrin.h b/clang/lib/Headers/ppc_wrappers/pmmintrin.h index 6d93383d5412..8d4046bd43f1 100644 --- a/clang/lib/Headers/ppc_wrappers/pmmintrin.h +++ b/clang/lib/Headers/ppc_wrappers/pmmintrin.h @@ -38,7 +38,7 @@ #ifndef PMMINTRIN_H_ #define PMMINTRIN_H_ -#if defined(__linux__) && defined(__ppc64__) +#if defined(__ppc64__) && (defined(__linux__) || defined(__FreeBSD__)) /* We need definitions from the SSE2 and SSE header files*/ #include <emmintrin.h> @@ -145,6 +145,7 @@ _mm_lddqu_si128 (__m128i const *__P) #else #include_next <pmmintrin.h> -#endif /* defined(__linux__) && defined(__ppc64__) */ +#endif /* defined(__ppc64__) && (defined(__linux__) || defined(__FreeBSD__)) \ + */ #endif /* PMMINTRIN_H_ */ diff --git a/clang/lib/Headers/ppc_wrappers/smmintrin.h b/clang/lib/Headers/ppc_wrappers/smmintrin.h index f41264b27584..674703245a69 100644 --- a/clang/lib/Headers/ppc_wrappers/smmintrin.h +++ b/clang/lib/Headers/ppc_wrappers/smmintrin.h @@ -29,7 +29,7 @@ #ifndef SMMINTRIN_H_ #define SMMINTRIN_H_ -#if defined(__linux__) && defined(__ppc64__) +#if defined(__ppc64__) && (defined(__linux__) || defined(__FreeBSD__)) #include <altivec.h> #include <tmmintrin.h> @@ -104,6 +104,7 @@ extern __inline __m128i #else #include_next <smmintrin.h> -#endif /* defined(__linux__) && defined(__ppc64__) */ +#endif /* defined(__ppc64__) && (defined(__linux__) || defined(__FreeBSD__)) \ + */ #endif /* _SMMINTRIN_H_ */ diff --git a/clang/lib/Headers/ppc_wrappers/tmmintrin.h b/clang/lib/Headers/ppc_wrappers/tmmintrin.h index b5a935d5e47e..ebef7b8192d7 100644 --- a/clang/lib/Headers/ppc_wrappers/tmmintrin.h +++ b/clang/lib/Headers/ppc_wrappers/tmmintrin.h @@ -25,7 +25,7 @@ #ifndef TMMINTRIN_H_ #define TMMINTRIN_H_ -#if defined(__linux__) && defined(__ppc64__) +#if defined(__ppc64__) && (defined(__linux__) || defined(__FreeBSD__)) #include <altivec.h> @@ -490,6 +490,7 @@ _mm_mulhrs_pi16 (__m64 __A, __m64 __B) #else #include_next <tmmintrin.h> -#endif /* defined(__linux__) && defined(__ppc64__) */ +#endif /* defined(__ppc64__) && (defined(__linux__) || defined(__FreeBSD__)) \ + */ #endif /* TMMINTRIN_H_ */ diff --git a/clang/lib/Headers/ppc_wrappers/xmmintrin.h b/clang/lib/Headers/ppc_wrappers/xmmintrin.h index 0e45b96769f8..956603d36408 100644 --- a/clang/lib/Headers/ppc_wrappers/xmmintrin.h +++ b/clang/lib/Headers/ppc_wrappers/xmmintrin.h @@ -34,7 +34,7 @@ #ifndef _XMMINTRIN_H_INCLUDED #define _XMMINTRIN_H_INCLUDED -#if defined(__linux__) && defined(__ppc64__) +#if defined(__ppc64__) && (defined(__linux__) || defined(__FreeBSD__)) /* Define four value permute mask */ #define _MM_SHUFFLE(w,x,y,z) (((w) << 6) | ((x) << 4) | ((y) << 2) | (z)) @@ -1838,6 +1838,7 @@ do { \ #else #include_next <xmmintrin.h> -#endif /* defined(__linux__) && defined(__ppc64__) */ +#endif /* defined(__ppc64__) && (defined(__linux__) || defined(__FreeBSD__)) \ + */ #endif /* _XMMINTRIN_H_INCLUDED */ diff --git a/clang/lib/Headers/stdatomic.h b/clang/lib/Headers/stdatomic.h index 665551ea69a4..1e47bcb2bacf 100644 --- a/clang/lib/Headers/stdatomic.h +++ b/clang/lib/Headers/stdatomic.h @@ -12,8 +12,12 @@ /* If we're hosted, fall back to the system's stdatomic.h. FreeBSD, for * example, already has a Clang-compatible stdatomic.h header. + * + * Exclude the MSVC path as well as the MSVC header as of the 14.31.30818 + * explicitly disallows `stdatomic.h` in the C mode via an `#error`. Fallback + * to the clang resource header until that is fully supported. */ -#if __STDC_HOSTED__ && __has_include_next(<stdatomic.h>) +#if __STDC_HOSTED__ && __has_include_next(<stdatomic.h>) && !defined(_MSC_VER) # include_next <stdatomic.h> #else diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp index a0871062395e..1bdeccc4cbf5 100644 --- a/clang/lib/Parse/ParseDecl.cpp +++ b/clang/lib/Parse/ParseDecl.cpp @@ -6978,13 +6978,13 @@ void Parser::ParseParameterDeclarationClause( // // We care about case 1) where the declarator type should be known, and // the identifier should be null. - if (!ParmDeclarator.isInvalidType() && !ParmDeclarator.hasName()) { - if (Tok.getIdentifierInfo() && - Tok.getIdentifierInfo()->isKeyword(getLangOpts())) { - Diag(Tok, diag::err_keyword_as_parameter) << PP.getSpelling(Tok); - // Consume the keyword. - ConsumeToken(); - } + if (!ParmDeclarator.isInvalidType() && !ParmDeclarator.hasName() && + Tok.isNot(tok::raw_identifier) && !Tok.isAnnotation() && + Tok.getIdentifierInfo() && + Tok.getIdentifierInfo()->isKeyword(getLangOpts())) { + Diag(Tok, diag::err_keyword_as_parameter) << PP.getSpelling(Tok); + // Consume the keyword. + ConsumeToken(); } // Inform the actions module about the parameter declarator, so it gets // added to the current scope. diff --git a/clang/lib/Parse/ParseStmt.cpp b/clang/lib/Parse/ParseStmt.cpp index bb8718671bb0..292ab03e8614 100644 --- a/clang/lib/Parse/ParseStmt.cpp +++ b/clang/lib/Parse/ParseStmt.cpp @@ -2108,6 +2108,9 @@ StmtResult Parser::ParseForStatement(SourceLocation *TrailingElseLoc) { CoawaitLoc = SourceLocation(); } + if (CoawaitLoc.isValid() && getLangOpts().CPlusPlus20) + Diag(CoawaitLoc, diag::warn_deprecated_for_co_await); + // We need to perform most of the semantic analysis for a C++0x for-range // statememt before parsing the body, in order to be able to deduce the type // of an auto-typed loop variable. diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp b/clang/lib/Sema/AnalysisBasedWarnings.cpp index 8544a4fccf4c..b4dcc9759b99 100644 --- a/clang/lib/Sema/AnalysisBasedWarnings.cpp +++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp @@ -464,7 +464,7 @@ static ControlFlowKind CheckFallThrough(AnalysisDeclContext &AC) { // No more CFGElements in the block? if (ri == re) { const Stmt *Term = B.getTerminatorStmt(); - if (Term && isa<CXXTryStmt>(Term)) { + if (Term && (isa<CXXTryStmt>(Term) || isa<ObjCAtTryStmt>(Term))) { HasAbnormalEdge = true; continue; } diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index a0f6702a5f82..33e2b3b5027d 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -5297,6 +5297,7 @@ static bool isValidOrderingForOp(int64_t Ordering, AtomicExpr::AtomicOp Op) { case AtomicExpr::AO__c11_atomic_load: case AtomicExpr::AO__opencl_atomic_load: + case AtomicExpr::AO__hip_atomic_load: case AtomicExpr::AO__atomic_load_n: case AtomicExpr::AO__atomic_load: return OrderingCABI != llvm::AtomicOrderingCABI::release && @@ -5304,6 +5305,7 @@ static bool isValidOrderingForOp(int64_t Ordering, AtomicExpr::AtomicOp Op) { case AtomicExpr::AO__c11_atomic_store: case AtomicExpr::AO__opencl_atomic_store: + case AtomicExpr::AO__hip_atomic_store: case AtomicExpr::AO__atomic_store: case AtomicExpr::AO__atomic_store_n: return OrderingCABI != llvm::AtomicOrderingCABI::consume && @@ -5380,6 +5382,8 @@ ExprResult Sema::BuildAtomicExpr(SourceRange CallRange, SourceRange ExprRange, "need to update code for modified C11 atomics"); bool IsOpenCL = Op >= AtomicExpr::AO__opencl_atomic_init && Op <= AtomicExpr::AO__opencl_atomic_fetch_max; + bool IsHIP = Op >= AtomicExpr::AO__hip_atomic_load && + Op <= AtomicExpr::AO__hip_atomic_fetch_max; bool IsC11 = (Op >= AtomicExpr::AO__c11_atomic_init && Op <= AtomicExpr::AO__c11_atomic_fetch_min) || IsOpenCL; @@ -5397,6 +5401,7 @@ ExprResult Sema::BuildAtomicExpr(SourceRange CallRange, SourceRange ExprRange, case AtomicExpr::AO__c11_atomic_load: case AtomicExpr::AO__opencl_atomic_load: + case AtomicExpr::AO__hip_atomic_load: case AtomicExpr::AO__atomic_load_n: Form = Load; break; @@ -5407,11 +5412,14 @@ ExprResult Sema::BuildAtomicExpr(SourceRange CallRange, SourceRange ExprRange, case AtomicExpr::AO__c11_atomic_store: case AtomicExpr::AO__opencl_atomic_store: + case AtomicExpr::AO__hip_atomic_store: case AtomicExpr::AO__atomic_store: case AtomicExpr::AO__atomic_store_n: Form = Copy; break; - + case AtomicExpr::AO__hip_atomic_fetch_add: + case AtomicExpr::AO__hip_atomic_fetch_min: + case AtomicExpr::AO__hip_atomic_fetch_max: case AtomicExpr::AO__c11_atomic_fetch_add: case AtomicExpr::AO__c11_atomic_fetch_sub: case AtomicExpr::AO__opencl_atomic_fetch_add: @@ -5426,6 +5434,9 @@ ExprResult Sema::BuildAtomicExpr(SourceRange CallRange, SourceRange ExprRange, case AtomicExpr::AO__c11_atomic_fetch_and: case AtomicExpr::AO__c11_atomic_fetch_or: case AtomicExpr::AO__c11_atomic_fetch_xor: + case AtomicExpr::AO__hip_atomic_fetch_and: + case AtomicExpr::AO__hip_atomic_fetch_or: + case AtomicExpr::AO__hip_atomic_fetch_xor: case AtomicExpr::AO__c11_atomic_fetch_nand: case AtomicExpr::AO__opencl_atomic_fetch_and: case AtomicExpr::AO__opencl_atomic_fetch_or: @@ -5452,6 +5463,7 @@ ExprResult Sema::BuildAtomicExpr(SourceRange CallRange, SourceRange ExprRange, break; case AtomicExpr::AO__c11_atomic_exchange: + case AtomicExpr::AO__hip_atomic_exchange: case AtomicExpr::AO__opencl_atomic_exchange: case AtomicExpr::AO__atomic_exchange_n: Form = Xchg; @@ -5463,8 +5475,10 @@ ExprResult Sema::BuildAtomicExpr(SourceRange CallRange, SourceRange ExprRange, case AtomicExpr::AO__c11_atomic_compare_exchange_strong: case AtomicExpr::AO__c11_atomic_compare_exchange_weak: + case AtomicExpr::AO__hip_atomic_compare_exchange_strong: case AtomicExpr::AO__opencl_atomic_compare_exchange_strong: case AtomicExpr::AO__opencl_atomic_compare_exchange_weak: + case AtomicExpr::AO__hip_atomic_compare_exchange_weak: Form = C11CmpXchg; break; @@ -5475,7 +5489,7 @@ ExprResult Sema::BuildAtomicExpr(SourceRange CallRange, SourceRange ExprRange, } unsigned AdjustedNumArgs = NumArgs[Form]; - if (IsOpenCL && Op != AtomicExpr::AO__opencl_atomic_init) + if ((IsOpenCL || IsHIP) && Op != AtomicExpr::AO__opencl_atomic_init) ++AdjustedNumArgs; // Check we have the right number of arguments. if (Args.size() < AdjustedNumArgs) { @@ -5532,8 +5546,8 @@ ExprResult Sema::BuildAtomicExpr(SourceRange CallRange, SourceRange ExprRange, // For an arithmetic operation, the implied arithmetic must be well-formed. if (Form == Arithmetic) { - // GCC does not enforce these rules for GNU atomics, but we do, because if - // we didn't it would be very confusing. FIXME: For whom? How so? + // GCC does not enforce these rules for GNU atomics, but we do to help catch + // trivial type errors. auto IsAllowedValueType = [&](QualType ValType) { if (ValType->isIntegerType()) return true; @@ -5574,8 +5588,9 @@ ExprResult Sema::BuildAtomicExpr(SourceRange CallRange, SourceRange ExprRange, if (!IsC11 && !AtomTy.isTriviallyCopyableType(Context) && !AtomTy->isScalarType()) { // For GNU atomics, require a trivially-copyable type. This is not part of - // the GNU atomics specification, but we enforce it, because if we didn't it - // would be very confusing. FIXME: For whom? How so? + // the GNU atomics specification but we enforce it for consistency with + // other atomics which generally all require a trivially-copyable type. This + // is because atomics just copy bits. Diag(ExprRange.getBegin(), diag::err_atomic_op_needs_trivial_copy) << Ptr->getType() << Ptr->getSourceRange(); return ExprError(); @@ -5614,7 +5629,7 @@ ExprResult Sema::BuildAtomicExpr(SourceRange CallRange, SourceRange ExprRange, // arguments are actually passed as pointers. QualType ByValType = ValType; // 'CP' bool IsPassedByAddress = false; - if (!IsC11 && !IsN) { + if (!IsC11 && !IsHIP && !IsN) { ByValType = Ptr->getType(); IsPassedByAddress = true; } @@ -5793,11 +5808,14 @@ ExprResult Sema::BuildAtomicExpr(SourceRange CallRange, SourceRange ExprRange, if ((Op == AtomicExpr::AO__c11_atomic_load || Op == AtomicExpr::AO__c11_atomic_store || Op == AtomicExpr::AO__opencl_atomic_load || - Op == AtomicExpr::AO__opencl_atomic_store ) && + Op == AtomicExpr::AO__hip_atomic_load || + Op == AtomicExpr::AO__opencl_atomic_store || + Op == AtomicExpr::AO__hip_atomic_store) && Context.AtomicUsesUnsupportedLibcall(AE)) Diag(AE->getBeginLoc(), diag::err_atomic_load_store_uses_lib) << ((Op == AtomicExpr::AO__c11_atomic_load || - Op == AtomicExpr::AO__opencl_atomic_load) + Op == AtomicExpr::AO__opencl_atomic_load || + Op == AtomicExpr::AO__hip_atomic_load) ? 0 : 1); diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index af174ac1ca1a..7be71ca49ea2 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -10268,13 +10268,9 @@ static bool checkNonMultiVersionCompatAttributes(Sema &S, const FunctionDecl *FD, const FunctionDecl *CausedFD, MultiVersionKind MVType) { - bool IsCPUSpecificCPUDispatchMVType = - MVType == MultiVersionKind::CPUDispatch || - MVType == MultiVersionKind::CPUSpecific; - const auto Diagnose = [FD, CausedFD, IsCPUSpecificCPUDispatchMVType]( - Sema &S, const Attr *A) { + const auto Diagnose = [FD, CausedFD, MVType](Sema &S, const Attr *A) { S.Diag(FD->getLocation(), diag::err_multiversion_disallowed_other_attr) - << IsCPUSpecificCPUDispatchMVType << A; + << static_cast<unsigned>(MVType) << A; if (CausedFD) S.Diag(CausedFD->getLocation(), diag::note_multiversioning_caused_here); return true; @@ -10292,6 +10288,10 @@ static bool checkNonMultiVersionCompatAttributes(Sema &S, if (MVType != MultiVersionKind::Target) return Diagnose(S, A); break; + case attr::TargetClones: + if (MVType != MultiVersionKind::TargetClones) + return Diagnose(S, A); + break; default: if (!AttrCompatibleWithMultiVersion(A->getKind(), MVType)) return Diagnose(S, A); @@ -10318,6 +10318,7 @@ bool Sema::areMultiversionVariantFunctionsCompatible( DefaultedFuncs = 6, ConstexprFuncs = 7, ConstevalFuncs = 8, + Lambda = 9, }; enum Different { CallingConv = 0, @@ -10445,7 +10446,7 @@ static bool CheckMultiVersionAdditionalRules(Sema &S, const FunctionDecl *OldFD, S.PDiag(diag::note_multiversioning_caused_here)), PartialDiagnosticAt(NewFD->getLocation(), S.PDiag(diag::err_multiversion_doesnt_support) - << IsCPUSpecificCPUDispatchMVType), + << static_cast<unsigned>(MVType)), PartialDiagnosticAt(NewFD->getLocation(), S.PDiag(diag::err_multiversion_diff)), /*TemplatesSupported=*/false, @@ -10574,21 +10575,30 @@ static bool CheckTargetCausesMultiVersioning( return false; } +static bool MultiVersionTypesCompatible(MultiVersionKind Old, + MultiVersionKind New) { + if (Old == New || Old == MultiVersionKind::None || + New == MultiVersionKind::None) + return true; + + return (Old == MultiVersionKind::CPUDispatch && + New == MultiVersionKind::CPUSpecific) || + (Old == MultiVersionKind::CPUSpecific && + New == MultiVersionKind::CPUDispatch); +} + /// Check the validity of a new function declaration being added to an existing /// multiversioned declaration collection. static bool CheckMultiVersionAdditionalDecl( Sema &S, FunctionDecl *OldFD, FunctionDecl *NewFD, MultiVersionKind NewMVType, const TargetAttr *NewTA, const CPUDispatchAttr *NewCPUDisp, const CPUSpecificAttr *NewCPUSpec, - bool &Redeclaration, NamedDecl *&OldDecl, bool &MergeTypeWithPrevious, - LookupResult &Previous) { + const TargetClonesAttr *NewClones, bool &Redeclaration, NamedDecl *&OldDecl, + bool &MergeTypeWithPrevious, LookupResult &Previous) { MultiVersionKind OldMVType = OldFD->getMultiVersionKind(); // Disallow mixing of multiversioning types. - if ((OldMVType == MultiVersionKind::Target && - NewMVType != MultiVersionKind::Target) || - (NewMVType == MultiVersionKind::Target && - OldMVType != MultiVersionKind::Target)) { + if (!MultiVersionTypesCompatible(OldMVType, NewMVType)) { S.Diag(NewFD->getLocation(), diag::err_multiversion_types_mixed); S.Diag(OldFD->getLocation(), diag::note_previous_declaration); NewFD->setInvalidDecl(); @@ -10613,7 +10623,12 @@ static bool CheckMultiVersionAdditionalDecl( if (S.IsOverload(NewFD, CurFD, UseMemberUsingDeclRules)) continue; - if (NewMVType == MultiVersionKind::Target) { + switch (NewMVType) { + case MultiVersionKind::None: + assert(OldMVType == MultiVersionKind::TargetClones && + "Only target_clones can be omitted in subsequent declarations"); + break; + case MultiVersionKind::Target: { const auto *CurTA = CurFD->getAttr<TargetAttr>(); if (CurTA->getFeaturesStr() == NewTA->getFeaturesStr()) { NewFD->setIsMultiVersion(); @@ -10629,7 +10644,30 @@ static bool CheckMultiVersionAdditionalDecl( NewFD->setInvalidDecl(); return true; } - } else { + break; + } + case MultiVersionKind::TargetClones: { + const auto *CurClones = CurFD->getAttr<TargetClonesAttr>(); + Redeclaration = true; + OldDecl = CurFD; + MergeTypeWithPrevious = true; + NewFD->setIsMultiVersion(); + + if (CurClones && NewClones && + (CurClones->featuresStrs_size() != NewClones->featuresStrs_size() || + !std::equal(CurClones->featuresStrs_begin(), + CurClones->featuresStrs_end(), + NewClones->featuresStrs_begin()))) { + S.Diag(NewFD->getLocation(), diag::err_target_clone_doesnt_match); + S.Diag(CurFD->getLocation(), diag::note_previous_declaration); + NewFD->setInvalidDecl(); + return true; + } + + return false; + } + case MultiVersionKind::CPUSpecific: + case MultiVersionKind::CPUDispatch: { const auto *CurCPUSpec = CurFD->getAttr<CPUSpecificAttr>(); const auto *CurCPUDisp = CurFD->getAttr<CPUDispatchAttr>(); // Handle CPUDispatch/CPUSpecific versions. @@ -10684,8 +10722,8 @@ static bool CheckMultiVersionAdditionalDecl( } } } - // If the two decls aren't the same MVType, there is no possible error - // condition. + break; + } } } @@ -10721,7 +10759,6 @@ static bool CheckMultiVersionAdditionalDecl( return false; } - /// Check the validity of a mulitversion function declaration. /// Also sets the multiversion'ness' of the function itself. /// @@ -10735,23 +10772,14 @@ static bool CheckMultiVersionFunction(Sema &S, FunctionDecl *NewFD, const auto *NewTA = NewFD->getAttr<TargetAttr>(); const auto *NewCPUDisp = NewFD->getAttr<CPUDispatchAttr>(); const auto *NewCPUSpec = NewFD->getAttr<CPUSpecificAttr>(); - - // Mixing Multiversioning types is prohibited. - if ((NewTA && NewCPUDisp) || (NewTA && NewCPUSpec) || - (NewCPUDisp && NewCPUSpec)) { - S.Diag(NewFD->getLocation(), diag::err_multiversion_types_mixed); - NewFD->setInvalidDecl(); - return true; - } - - MultiVersionKind MVType = NewFD->getMultiVersionKind(); + const auto *NewClones = NewFD->getAttr<TargetClonesAttr>(); + MultiVersionKind MVType = NewFD->getMultiVersionKind(); // Main isn't allowed to become a multiversion function, however it IS // permitted to have 'main' be marked with the 'target' optimization hint. if (NewFD->isMain()) { - if ((MVType == MultiVersionKind::Target && NewTA->isDefaultVersion()) || - MVType == MultiVersionKind::CPUDispatch || - MVType == MultiVersionKind::CPUSpecific) { + if (MVType != MultiVersionKind::None && + !(MVType == MultiVersionKind::Target && !NewTA->isDefaultVersion())) { S.Diag(NewFD->getLocation(), diag::err_multiversion_not_allowed_on_main); NewFD->setInvalidDecl(); return true; @@ -10774,13 +10802,35 @@ static bool CheckMultiVersionFunction(Sema &S, FunctionDecl *NewFD, if (!OldFD->isMultiVersion() && MVType == MultiVersionKind::None) return false; - if (OldFD->isMultiVersion() && MVType == MultiVersionKind::None) { + // Multiversioned redeclarations aren't allowed to omit the attribute, except + // for target_clones. + if (OldFD->isMultiVersion() && MVType == MultiVersionKind::None && + OldFD->getMultiVersionKind() != MultiVersionKind::TargetClones) { S.Diag(NewFD->getLocation(), diag::err_multiversion_required_in_redecl) << (OldFD->getMultiVersionKind() != MultiVersionKind::Target); NewFD->setInvalidDecl(); return true; } + if (!OldFD->isMultiVersion()) { + switch (MVType) { + case MultiVersionKind::Target: + return CheckTargetCausesMultiVersioning(S, OldFD, NewFD, NewTA, + Redeclaration, OldDecl, + MergeTypeWithPrevious, Previous); + case MultiVersionKind::TargetClones: + if (OldFD->isUsed(false)) { + NewFD->setInvalidDecl(); + return S.Diag(NewFD->getLocation(), diag::err_multiversion_after_used); + } + OldFD->setIsMultiVersion(); + break; + case MultiVersionKind::CPUDispatch: + case MultiVersionKind::CPUSpecific: + case MultiVersionKind::None: + break; + } + } // Handle the target potentially causes multiversioning case. if (!OldFD->isMultiVersion() && MVType == MultiVersionKind::Target) return CheckTargetCausesMultiVersioning(S, OldFD, NewFD, NewTA, @@ -10791,8 +10841,8 @@ static bool CheckMultiVersionFunction(Sema &S, FunctionDecl *NewFD, // appropriate attribute in the current function decl. Resolve that these are // still compatible with previous declarations. return CheckMultiVersionAdditionalDecl( - S, OldFD, NewFD, MVType, NewTA, NewCPUDisp, NewCPUSpec, Redeclaration, - OldDecl, MergeTypeWithPrevious, Previous); + S, OldFD, NewFD, MVType, NewTA, NewCPUDisp, NewCPUSpec, NewClones, + Redeclaration, OldDecl, MergeTypeWithPrevious, Previous); } /// Perform semantic checking of a new function declaration. diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index ef889a36bd55..4df8687aff89 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -1965,6 +1965,28 @@ static void handleRestrictAttr(Sema &S, Decl *D, const ParsedAttr &AL) { } static void handleCPUSpecificAttr(Sema &S, Decl *D, const ParsedAttr &AL) { + // Ensure we don't combine these with themselves, since that causes some + // confusing behavior. + if (AL.getParsedKind() == ParsedAttr::AT_CPUDispatch) { + if (checkAttrMutualExclusion<CPUSpecificAttr>(S, D, AL)) + return; + + if (const auto *Other = D->getAttr<CPUDispatchAttr>()) { + S.Diag(AL.getLoc(), diag::err_disallowed_duplicate_attribute) << AL; + S.Diag(Other->getLocation(), diag::note_conflicting_attribute); + return; + } + } else if (AL.getParsedKind() == ParsedAttr::AT_CPUSpecific) { + if (checkAttrMutualExclusion<CPUDispatchAttr>(S, D, AL)) + return; + + if (const auto *Other = D->getAttr<CPUSpecificAttr>()) { + S.Diag(AL.getLoc(), diag::err_disallowed_duplicate_attribute) << AL; + S.Diag(Other->getLocation(), diag::note_conflicting_attribute); + return; + } + } + FunctionDecl *FD = cast<FunctionDecl>(D); if (const auto *MD = dyn_cast<CXXMethodDecl>(D)) { @@ -3211,54 +3233,57 @@ static void handleCodeSegAttr(Sema &S, Decl *D, const ParsedAttr &AL) { bool Sema::checkTargetAttr(SourceLocation LiteralLoc, StringRef AttrStr) { enum FirstParam { Unsupported, Duplicate, Unknown }; enum SecondParam { None, Architecture, Tune }; + enum ThirdParam { Target, TargetClones }; if (AttrStr.contains("fpmath=")) return Diag(LiteralLoc, diag::warn_unsupported_target_attribute) - << Unsupported << None << "fpmath="; + << Unsupported << None << "fpmath=" << Target; // Diagnose use of tune if target doesn't support it. if (!Context.getTargetInfo().supportsTargetAttributeTune() && AttrStr.contains("tune=")) return Diag(LiteralLoc, diag::warn_unsupported_target_attribute) - << Unsupported << None << "tune="; + << Unsupported << None << "tune=" << Target; ParsedTargetAttr ParsedAttrs = TargetAttr::parse(AttrStr); if (!ParsedAttrs.Architecture.empty() && !Context.getTargetInfo().isValidCPUName(ParsedAttrs.Architecture)) return Diag(LiteralLoc, diag::warn_unsupported_target_attribute) - << Unknown << Architecture << ParsedAttrs.Architecture; + << Unknown << Architecture << ParsedAttrs.Architecture << Target; if (!ParsedAttrs.Tune.empty() && !Context.getTargetInfo().isValidCPUName(ParsedAttrs.Tune)) return Diag(LiteralLoc, diag::warn_unsupported_target_attribute) - << Unknown << Tune << ParsedAttrs.Tune; + << Unknown << Tune << ParsedAttrs.Tune << Target; if (ParsedAttrs.DuplicateArchitecture) return Diag(LiteralLoc, diag::warn_unsupported_target_attribute) - << Duplicate << None << "arch="; + << Duplicate << None << "arch=" << Target; if (ParsedAttrs.DuplicateTune) return Diag(LiteralLoc, diag::warn_unsupported_target_attribute) - << Duplicate << None << "tune="; + << Duplicate << None << "tune=" << Target; for (const auto &Feature : ParsedAttrs.Features) { auto CurFeature = StringRef(Feature).drop_front(); // remove + or -. if (!Context.getTargetInfo().isValidFeatureName(CurFeature)) return Diag(LiteralLoc, diag::warn_unsupported_target_attribute) - << Unsupported << None << CurFeature; + << Unsupported << None << CurFeature << Target; } TargetInfo::BranchProtectionInfo BPI; - StringRef Error; - if (!ParsedAttrs.BranchProtection.empty() && - !Context.getTargetInfo().validateBranchProtection( - ParsedAttrs.BranchProtection, BPI, Error)) { - if (Error.empty()) + StringRef DiagMsg; + if (ParsedAttrs.BranchProtection.empty()) + return false; + if (!Context.getTargetInfo().validateBranchProtection( + ParsedAttrs.BranchProtection, BPI, DiagMsg)) { + if (DiagMsg.empty()) return Diag(LiteralLoc, diag::warn_unsupported_target_attribute) - << Unsupported << None << "branch-protection"; - else - return Diag(LiteralLoc, diag::err_invalid_branch_protection_spec) - << Error; + << Unsupported << None << "branch-protection" << Target; + return Diag(LiteralLoc, diag::err_invalid_branch_protection_spec) + << DiagMsg; } + if (!DiagMsg.empty()) + Diag(LiteralLoc, diag::warn_unsupported_branch_protection_spec) << DiagMsg; return false; } @@ -3274,6 +3299,107 @@ static void handleTargetAttr(Sema &S, Decl *D, const ParsedAttr &AL) { D->addAttr(NewAttr); } +bool Sema::checkTargetClonesAttrString(SourceLocation LiteralLoc, StringRef Str, + const StringLiteral *Literal, + bool &HasDefault, bool &HasCommas, + SmallVectorImpl<StringRef> &Strings) { + enum FirstParam { Unsupported, Duplicate, Unknown }; + enum SecondParam { None, Architecture, Tune }; + enum ThirdParam { Target, TargetClones }; + HasCommas = HasCommas || Str.contains(','); + // Warn on empty at the beginning of a string. + if (Str.size() == 0) + return Diag(LiteralLoc, diag::warn_unsupported_target_attribute) + << Unsupported << None << "" << TargetClones; + + std::pair<StringRef, StringRef> Parts = {{}, Str}; + while (!Parts.second.empty()) { + Parts = Parts.second.split(','); + StringRef Cur = Parts.first.trim(); + SourceLocation CurLoc = Literal->getLocationOfByte( + Cur.data() - Literal->getString().data(), getSourceManager(), + getLangOpts(), Context.getTargetInfo()); + + bool DefaultIsDupe = false; + if (Cur.empty()) + return Diag(CurLoc, diag::warn_unsupported_target_attribute) + << Unsupported << None << "" << TargetClones; + + if (Cur.startswith("arch=")) { + if (!Context.getTargetInfo().isValidCPUName( + Cur.drop_front(sizeof("arch=") - 1))) + return Diag(CurLoc, diag::warn_unsupported_target_attribute) + << Unsupported << Architecture + << Cur.drop_front(sizeof("arch=") - 1) << TargetClones; + } else if (Cur == "default") { + DefaultIsDupe = HasDefault; + HasDefault = true; + } else if (!Context.getTargetInfo().isValidFeatureName(Cur)) + return Diag(CurLoc, diag::warn_unsupported_target_attribute) + << Unsupported << None << Cur << TargetClones; + + if (llvm::find(Strings, Cur) != Strings.end() || DefaultIsDupe) + Diag(CurLoc, diag::warn_target_clone_duplicate_options); + // Note: Add even if there are duplicates, since it changes name mangling. + Strings.push_back(Cur); + } + + if (Str.rtrim().endswith(",")) + return Diag(LiteralLoc, diag::warn_unsupported_target_attribute) + << Unsupported << None << "" << TargetClones; + return false; +} + +static void handleTargetClonesAttr(Sema &S, Decl *D, const ParsedAttr &AL) { + // Ensure we don't combine these with themselves, since that causes some + // confusing behavior. + if (const auto *Other = D->getAttr<TargetClonesAttr>()) { + S.Diag(AL.getLoc(), diag::err_disallowed_duplicate_attribute) << AL; + S.Diag(Other->getLocation(), diag::note_conflicting_attribute); + return; + } + if (checkAttrMutualExclusion<TargetClonesAttr>(S, D, AL)) + return; + + SmallVector<StringRef, 2> Strings; + bool HasCommas = false, HasDefault = false; + + for (unsigned I = 0, E = AL.getNumArgs(); I != E; ++I) { + StringRef CurStr; + SourceLocation LiteralLoc; + if (!S.checkStringLiteralArgumentAttr(AL, I, CurStr, &LiteralLoc) || + S.checkTargetClonesAttrString( + LiteralLoc, CurStr, + cast<StringLiteral>(AL.getArgAsExpr(I)->IgnoreParenCasts()), + HasDefault, HasCommas, Strings)) + return; + } + + if (HasCommas && AL.getNumArgs() > 1) + S.Diag(AL.getLoc(), diag::warn_target_clone_mixed_values); + + if (!HasDefault) { + S.Diag(AL.getLoc(), diag::err_target_clone_must_have_default); + return; + } + + // FIXME: We could probably figure out how to get this to work for lambdas + // someday. + if (const auto *MD = dyn_cast<CXXMethodDecl>(D)) { + if (MD->getParent()->isLambda()) { + S.Diag(D->getLocation(), diag::err_multiversion_doesnt_support) + << static_cast<unsigned>(MultiVersionKind::TargetClones) + << /*Lambda*/ 9; + return; + } + } + + cast<FunctionDecl>(D)->setIsMultiVersion(); + TargetClonesAttr *NewAttr = ::new (S.Context) + TargetClonesAttr(S.Context, AL, Strings.data(), Strings.size()); + D->addAttr(NewAttr); +} + static void handleMinVectorWidthAttr(Sema &S, Decl *D, const ParsedAttr &AL) { Expr *E = AL.getArgAsExpr(0); uint32_t VecWidth; @@ -8217,6 +8343,9 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, case ParsedAttr::AT_Target: handleTargetAttr(S, D, AL); break; + case ParsedAttr::AT_TargetClones: + handleTargetClonesAttr(S, D, AL); + break; case ParsedAttr::AT_MinVectorWidth: handleMinVectorWidthAttr(S, D, AL); break; diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 8592335e20d3..b305d4e5b92f 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -16566,6 +16566,17 @@ Sema::PushExpressionEvaluationContext( ExpressionEvaluationContextRecord::ExpressionKind ExprContext) { ExprEvalContexts.emplace_back(NewContext, ExprCleanupObjects.size(), Cleanup, LambdaContextDecl, ExprContext); + + // Discarded statements and immediate contexts nested in other + // discarded statements or immediate context are themselves + // a discarded statement or an immediate context, respectively. + ExprEvalContexts.back().InDiscardedStatement = + ExprEvalContexts[ExprEvalContexts.size() - 2] + .isDiscardedStatementContext(); + ExprEvalContexts.back().InImmediateFunctionContext = + ExprEvalContexts[ExprEvalContexts.size() - 2] + .isImmediateFunctionContext(); + Cleanup.reset(); if (!MaybeODRUseExprs.empty()) std::swap(MaybeODRUseExprs, ExprEvalContexts.back().SavedMaybeODRUseExprs); @@ -18965,6 +18976,10 @@ bool Sema::DiagIfReachable(SourceLocation Loc, ArrayRef<const Stmt *> Stmts, /// during overload resolution or within sizeof/alignof/typeof/typeid. bool Sema::DiagRuntimeBehavior(SourceLocation Loc, ArrayRef<const Stmt*> Stmts, const PartialDiagnostic &PD) { + + if (ExprEvalContexts.back().isDiscardedStatementContext()) + return false; + switch (ExprEvalContexts.back().Context) { case ExpressionEvaluationContext::Unevaluated: case ExpressionEvaluationContext::UnevaluatedList: diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp index 635252584562..d25f329f85e4 100644 --- a/clang/lib/Sema/SemaExprCXX.cpp +++ b/clang/lib/Sema/SemaExprCXX.cpp @@ -1508,8 +1508,9 @@ Sema::BuildCXXTypeConstructExpr(TypeSourceInfo *TInfo, } // Only construct objects with object types. - // There doesn't seem to be an explicit rule for this but functions are - // not objects, so they cannot take initializers. + // The standard doesn't explicitly forbid function types here, but that's an + // obvious oversight, as there's no way to dynamically construct a function + // in general. if (Ty->isFunctionType()) return ExprError(Diag(TyBeginLoc, diag::err_init_for_function_type) << Ty << FullRange); diff --git a/clang/lib/Sema/SemaStmt.cpp b/clang/lib/Sema/SemaStmt.cpp index 3c820829864d..1d90759f2406 100644 --- a/clang/lib/Sema/SemaStmt.cpp +++ b/clang/lib/Sema/SemaStmt.cpp @@ -3563,8 +3563,7 @@ StmtResult Sema::ActOnCapScopeReturnStmt(SourceLocation ReturnLoc, bool HasDeducedReturnType = CurLambda && hasDeducedReturnType(CurLambda->CallOperator); - if (ExprEvalContexts.back().Context == - ExpressionEvaluationContext::DiscardedStatement && + if (ExprEvalContexts.back().isDiscardedStatementContext() && (HasDeducedReturnType || CurCap->HasImplicitReturnType)) { if (RetValExp) { ExprResult ER = @@ -3880,8 +3879,7 @@ Sema::ActOnReturnStmt(SourceLocation ReturnLoc, Expr *RetValExp, if (RetVal.isInvalid()) return StmtError(); StmtResult R = BuildReturnStmt(ReturnLoc, RetVal.get()); - if (R.isInvalid() || ExprEvalContexts.back().Context == - ExpressionEvaluationContext::DiscardedStatement) + if (R.isInvalid() || ExprEvalContexts.back().isDiscardedStatementContext()) return R; if (VarDecl *VD = @@ -3966,8 +3964,7 @@ StmtResult Sema::BuildReturnStmt(SourceLocation ReturnLoc, Expr *RetValExp) { // C++1z: discarded return statements are not considered when deducing a // return type. - if (ExprEvalContexts.back().Context == - ExpressionEvaluationContext::DiscardedStatement && + if (ExprEvalContexts.back().isDiscardedStatementContext() && FnRetType->getContainedAutoType()) { if (RetValExp) { ExprResult ER = diff --git a/clang/lib/StaticAnalyzer/Core/CoreEngine.cpp b/clang/lib/StaticAnalyzer/Core/CoreEngine.cpp index bc939d252800..d57bab154b61 100644 --- a/clang/lib/StaticAnalyzer/Core/CoreEngine.cpp +++ b/clang/lib/StaticAnalyzer/Core/CoreEngine.cpp @@ -686,8 +686,8 @@ SwitchNodeBuilder::generateDefaultCaseNode(ProgramStateRef St, assert(Src->succ_rbegin() != Src->succ_rend()); CFGBlock *DefaultBlock = *Src->succ_rbegin(); - // Sanity check for default blocks that are unreachable and not caught - // by earlier stages. + // Basic correctness check for default blocks that are unreachable and not + // caught by earlier stages. if (!DefaultBlock) return nullptr; diff --git a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp index 74403a160b8e..23c67c64f975 100644 --- a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp +++ b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp @@ -2191,6 +2191,42 @@ LLVM_NODISCARD ProgramStateRef reAssume(ProgramStateRef State, Constraint->getMaxValue(), true); } +// Simplify the given symbol with the help of the SValBuilder. In +// SValBuilder::symplifySval, we traverse the symbol tree and query the +// constraint values for the sub-trees and if a value is a constant we do the +// constant folding. Compound symbols might collapse to simpler symbol tree +// that is still possible to further simplify. Thus, we do the simplification on +// a new symbol tree until we reach the simplest form, i.e. the fixpoint. +// +// Consider the following symbol `(b * b) * b * b` which has this tree: +// * +// / \ +// * b +// / \ +// / b +// (b * b) +// Now, if the `b * b == 1` new constraint is added then during the first +// iteration we have the following transformations: +// * * +// / \ / \ +// * b --> b b +// / \ +// / b +// 1 +// We need another iteration to reach the final result `1`. +LLVM_NODISCARD +static SVal simplifyUntilFixpoint(SValBuilder &SVB, ProgramStateRef State, + const SymbolRef Sym) { + SVal Val = SVB.makeSymbolVal(Sym); + SVal SimplifiedVal = SVB.simplifySVal(State, Val); + // Do the simplification until we can. + while (SimplifiedVal != Val) { + Val = SimplifiedVal; + SimplifiedVal = SVB.simplifySVal(State, Val); + } + return SimplifiedVal; +} + // Iterate over all symbols and try to simplify them. Once a symbol is // simplified then we check if we can merge the simplified symbol's equivalence // class to this class. This way, we simplify not just the symbols but the @@ -2202,7 +2238,8 @@ EquivalenceClass::simplify(SValBuilder &SVB, RangeSet::Factory &F, SymbolSet ClassMembers = Class.getClassMembers(State); for (const SymbolRef &MemberSym : ClassMembers) { - const SVal SimplifiedMemberVal = simplifyToSVal(State, MemberSym); + const SVal SimplifiedMemberVal = + simplifyUntilFixpoint(SVB, State, MemberSym); const SymbolRef SimplifiedMemberSym = SimplifiedMemberVal.getAsSymbol(); // The symbol is collapsed to a constant, check if the current State is diff --git a/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp b/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp index 681a1f64eadc..4ca35dd06ae5 100644 --- a/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp +++ b/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp @@ -372,6 +372,15 @@ SVal SimpleSValBuilder::evalBinOpNN(ProgramStateRef state, NonLoc InputLHS = lhs; NonLoc InputRHS = rhs; + // Constraints may have changed since the creation of a bound SVal. Check if + // the values can be simplified based on those new constraints. + SVal simplifiedLhs = simplifySVal(state, lhs); + SVal simplifiedRhs = simplifySVal(state, rhs); + if (auto simplifiedLhsAsNonLoc = simplifiedLhs.getAs<NonLoc>()) + lhs = *simplifiedLhsAsNonLoc; + if (auto simplifiedRhsAsNonLoc = simplifiedRhs.getAs<NonLoc>()) + rhs = *simplifiedRhsAsNonLoc; + // Handle trivial case where left-side and right-side are the same. if (lhs == rhs) switch (op) { @@ -619,16 +628,6 @@ SVal SimpleSValBuilder::evalBinOpNN(ProgramStateRef state, } } - // Does the symbolic expression simplify to a constant? - // If so, "fold" the constant by setting 'lhs' to a ConcreteInt - // and try again. - SVal simplifiedLhs = simplifySVal(state, lhs); - if (simplifiedLhs != lhs) - if (auto simplifiedLhsAsNonLoc = simplifiedLhs.getAs<NonLoc>()) { - lhs = *simplifiedLhsAsNonLoc; - continue; - } - // Is the RHS a constant? if (const llvm::APSInt *RHSValue = getKnownValue(state, rhs)) return MakeSymIntVal(Sym, op, *RHSValue, resultTy); @@ -1103,7 +1102,6 @@ const llvm::APSInt *SimpleSValBuilder::getKnownValue(ProgramStateRef state, if (SymbolRef Sym = V.getAsSymbol()) return state->getConstraintManager().getSymVal(state, Sym); - // FIXME: Add support for SymExprs. return nullptr; } @@ -1135,6 +1133,24 @@ SVal SimpleSValBuilder::simplifySVal(ProgramStateRef State, SVal V) { return cache(Sym, SVB.makeSymbolVal(Sym)); } + // Return the known const value for the Sym if available, or return Undef + // otherwise. + SVal getConst(SymbolRef Sym) { + const llvm::APSInt *Const = + State->getConstraintManager().getSymVal(State, Sym); + if (Const) + return Loc::isLocType(Sym->getType()) ? (SVal)SVB.makeIntLocVal(*Const) + : (SVal)SVB.makeIntVal(*Const); + return UndefinedVal(); + } + + SVal getConstOrVisit(SymbolRef Sym) { + const SVal Ret = getConst(Sym); + if (Ret.isUndef()) + return Visit(Sym); + return Ret; + } + public: Simplifier(ProgramStateRef State) : State(State), SVB(State->getStateManager().getSValBuilder()) {} @@ -1148,15 +1164,14 @@ SVal SimpleSValBuilder::simplifySVal(ProgramStateRef State, SVal V) { return SVB.makeSymbolVal(S); } - // TODO: Support SymbolCast. Support IntSymExpr when/if we actually - // start producing them. + // TODO: Support SymbolCast. SVal VisitSymIntExpr(const SymIntExpr *S) { auto I = Cached.find(S); if (I != Cached.end()) return I->second; - SVal LHS = Visit(S->getLHS()); + SVal LHS = getConstOrVisit(S->getLHS()); if (isUnchanged(S->getLHS(), LHS)) return skip(S); @@ -1183,6 +1198,20 @@ SVal SimpleSValBuilder::simplifySVal(ProgramStateRef State, SVal V) { S, SVB.evalBinOp(State, S->getOpcode(), LHS, RHS, S->getType())); } + SVal VisitIntSymExpr(const IntSymExpr *S) { + auto I = Cached.find(S); + if (I != Cached.end()) + return I->second; + + SVal RHS = getConstOrVisit(S->getRHS()); + if (isUnchanged(S->getRHS(), RHS)) + return skip(S); + + SVal LHS = SVB.makeIntVal(S->getLHS()); + return cache( + S, SVB.evalBinOp(State, S->getOpcode(), LHS, RHS, S->getType())); + } + SVal VisitSymSymExpr(const SymSymExpr *S) { auto I = Cached.find(S); if (I != Cached.end()) @@ -1196,8 +1225,9 @@ SVal SimpleSValBuilder::simplifySVal(ProgramStateRef State, SVal V) { Loc::isLocType(S->getRHS()->getType())) return skip(S); - SVal LHS = Visit(S->getLHS()); - SVal RHS = Visit(S->getRHS()); + SVal LHS = getConstOrVisit(S->getLHS()); + SVal RHS = getConstOrVisit(S->getRHS()); + if (isUnchanged(S->getLHS(), LHS) && isUnchanged(S->getRHS(), RHS)) return skip(S); diff --git a/clang/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp b/clang/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp index 31de49033ac2..f692c68045ee 100644 --- a/clang/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp +++ b/clang/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp @@ -591,16 +591,24 @@ AnalysisConsumer::getModeForDecl(Decl *D, AnalysisMode Mode) { // - Main source file: run both path-sensitive and non-path-sensitive checks. // - Header files: run non-path-sensitive checks only. // - System headers: don't run any checks. - SourceManager &SM = Ctx->getSourceManager(); - const Stmt *Body = D->getBody(); - SourceLocation SL = Body ? Body->getBeginLoc() : D->getLocation(); - SL = SM.getExpansionLoc(SL); - - if (!Opts->AnalyzeAll && !Mgr->isInCodeFile(SL)) { - if (SL.isInvalid() || SM.isInSystemHeader(SL)) - return AM_None; + if (Opts->AnalyzeAll) + return Mode; + + const SourceManager &SM = Ctx->getSourceManager(); + + const SourceLocation Loc = [&SM](Decl *D) -> SourceLocation { + const Stmt *Body = D->getBody(); + SourceLocation SL = Body ? Body->getBeginLoc() : D->getLocation(); + return SM.getExpansionLoc(SL); + }(D); + + // Ignore system headers. + if (Loc.isInvalid() || SM.isInSystemHeader(Loc)) + return AM_None; + + // Disable path sensitive analysis in user-headers. + if (!Mgr->isInCodeFile(Loc)) return Mode & ~AM_Path; - } return Mode; } diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp index 40e8bd2b8776..f7c711690d7e 100644 --- a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp +++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp @@ -129,7 +129,7 @@ DependencyScanningFilesystemSharedCache::get(StringRef Key, bool Minimized) { /// /// This is kinda hacky, it would be better if we knew what kind of file Clang /// was expecting instead. -static bool shouldMinimize(StringRef Filename) { +static bool shouldMinimizeBasedOnExtension(StringRef Filename) { StringRef Ext = llvm::sys::path::extension(Filename); if (Ext.empty()) return true; // C++ standard library @@ -147,26 +147,43 @@ static bool shouldCacheStatFailures(StringRef Filename) { StringRef Ext = llvm::sys::path::extension(Filename); if (Ext.empty()) return false; // This may be the module cache directory. - return shouldMinimize(Filename); // Only cache stat failures on source files. + // Only cache stat failures on source files. + return shouldMinimizeBasedOnExtension(Filename); } -void DependencyScanningWorkerFilesystem::ignoreFile(StringRef RawFilename) { +void DependencyScanningWorkerFilesystem::disableMinimization( + StringRef RawFilename) { llvm::SmallString<256> Filename; llvm::sys::path::native(RawFilename, Filename); - IgnoredFiles.insert(Filename); + NotToBeMinimized.insert(Filename); } -bool DependencyScanningWorkerFilesystem::shouldIgnoreFile( - StringRef RawFilename) { +bool DependencyScanningWorkerFilesystem::shouldMinimize(StringRef RawFilename) { + if (!shouldMinimizeBasedOnExtension(RawFilename)) + return false; + llvm::SmallString<256> Filename; llvm::sys::path::native(RawFilename, Filename); - return IgnoredFiles.contains(Filename); + return !NotToBeMinimized.contains(Filename); +} + +CachedFileSystemEntry DependencyScanningWorkerFilesystem::createFileSystemEntry( + llvm::ErrorOr<llvm::vfs::Status> &&MaybeStatus, StringRef Filename, + bool ShouldMinimize) { + if (!MaybeStatus) + return CachedFileSystemEntry(MaybeStatus.getError()); + + if (MaybeStatus->isDirectory()) + return CachedFileSystemEntry::createDirectoryEntry(std::move(*MaybeStatus)); + + return CachedFileSystemEntry::createFileEntry(Filename, getUnderlyingFS(), + ShouldMinimize); } llvm::ErrorOr<const CachedFileSystemEntry *> DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry( const StringRef Filename) { - bool ShouldMinimize = !shouldIgnoreFile(Filename) && shouldMinimize(Filename); + bool ShouldMinimize = shouldMinimize(Filename); if (const auto *Entry = Cache.getCachedEntry(Filename, ShouldMinimize)) return Entry; @@ -182,23 +199,15 @@ DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry( CachedFileSystemEntry &CacheEntry = SharedCacheEntry.Value; if (!CacheEntry.isValid()) { - llvm::vfs::FileSystem &FS = getUnderlyingFS(); - auto MaybeStatus = FS.status(Filename); - if (!MaybeStatus) { - if (!shouldCacheStatFailures(Filename)) - // HACK: We need to always restat non source files if the stat fails. - // This is because Clang first looks up the module cache and module - // files before building them, and then looks for them again. If we - // cache the stat failure, it won't see them the second time. - return MaybeStatus.getError(); - else - CacheEntry = CachedFileSystemEntry(MaybeStatus.getError()); - } else if (MaybeStatus->isDirectory()) - CacheEntry = CachedFileSystemEntry::createDirectoryEntry( - std::move(*MaybeStatus)); - else - CacheEntry = CachedFileSystemEntry::createFileEntry(Filename, FS, - ShouldMinimize); + auto MaybeStatus = getUnderlyingFS().status(Filename); + if (!MaybeStatus && !shouldCacheStatFailures(Filename)) + // HACK: We need to always restat non source files if the stat fails. + // This is because Clang first looks up the module cache and module + // files before building them, and then looks for them again. If we + // cache the stat failure, it won't see them the second time. + return MaybeStatus.getError(); + CacheEntry = createFileSystemEntry(std::move(MaybeStatus), Filename, + ShouldMinimize); } Result = &CacheEntry; diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp index 7fdc49271791..70bb6c5caf87 100644 --- a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp +++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp @@ -193,20 +193,19 @@ public: // Use the dependency scanning optimized file system if requested to do so. if (DepFS) { - DepFS->clearIgnoredFiles(); - // Ignore any files that contributed to prebuilt modules. The implicit - // build validates the modules by comparing the reported sizes of their - // inputs to the current state of the filesystem. Minimization would throw - // this mechanism off. + DepFS->enableMinimizationOfAllFiles(); + // Don't minimize any files that contributed to prebuilt modules. The + // implicit build validates the modules by comparing the reported sizes of + // their inputs to the current state of the filesystem. Minimization would + // throw this mechanism off. for (const auto &File : PrebuiltModulesInputFiles) - DepFS->ignoreFile(File.getKey()); - // Add any filenames that were explicity passed in the build settings and - // that might be opened, as we want to ensure we don't run source - // minimization on them. + DepFS->disableMinimization(File.getKey()); + // Don't minimize any files that were explicitly passed in the build + // settings and that might be opened. for (const auto &E : ScanInstance.getHeaderSearchOpts().UserEntries) - DepFS->ignoreFile(E.Path); + DepFS->disableMinimization(E.Path); for (const auto &F : ScanInstance.getHeaderSearchOpts().VFSOverlayFiles) - DepFS->ignoreFile(F); + DepFS->disableMinimization(F); // Support for virtual file system overlays on top of the caching // filesystem. diff --git a/clang/utils/TableGen/ASTTableGen.cpp b/clang/utils/TableGen/ASTTableGen.cpp index 3f6da40964e0..6aa8b28a942f 100644 --- a/clang/utils/TableGen/ASTTableGen.cpp +++ b/clang/utils/TableGen/ASTTableGen.cpp @@ -107,7 +107,7 @@ static void visitASTNodeRecursive(ASTNode node, ASTNode base, static void visitHierarchy(RecordKeeper &records, StringRef nodeClassName, ASTNodeHierarchyVisitor<ASTNode> visit) { - // Check for the node class, just as a sanity check. + // Check for the node class, just as a basic correctness check. if (!records.getClass(nodeClassName)) { PrintFatalError(Twine("cannot find definition for node class ") + nodeClassName); diff --git a/compiler-rt/include/profile/MemProfData.inc b/compiler-rt/include/profile/MemProfData.inc new file mode 100644 index 000000000000..d64227e4ba31 --- /dev/null +++ b/compiler-rt/include/profile/MemProfData.inc @@ -0,0 +1,61 @@ +#ifndef MEMPROF_DATA_INC +#define MEMPROF_DATA_INC +/*===-- MemProfData.inc - MemProf profiling runtime structures -*- C++ -*-=== *\ +|* +|* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +|* See https://llvm.org/LICENSE.txt for license information. +|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +|* +\*===----------------------------------------------------------------------===*/ +/* + * This is the main file that defines all the data structure, signature, + * constant literals that are shared across profiling runtime library, + * and host tools (reader/writer). + * + * This file has two identical copies. The primary copy lives in LLVM and + * the other one sits in compiler-rt/include/profile directory. To make changes + * in this file, first modify the primary copy and copy it over to compiler-rt. + * Testing of any change in this file can start only after the two copies are + * synced up. + * +\*===----------------------------------------------------------------------===*/ + + +#ifdef _MSC_VER +#define PACKED(__decl__) __pragma(pack(push,1)) __decl__ __pragma(pack(pop)) +#else +#define PACKED(__decl__) __decl__ __attribute__((__packed__)) +#endif + +// A 64-bit magic number to uniquely identify the raw binary memprof profile file. +#define MEMPROF_RAW_MAGIC_64 \ + ((uint64_t)255 << 56 | (uint64_t)'m' << 48 | (uint64_t)'p' << 40 | (uint64_t)'r' << 32 | \ + (uint64_t)'o' << 24 | (uint64_t)'f' << 16 | (uint64_t)'r' << 8 | (uint64_t)129) + +// The version number of the raw binary format. +#define MEMPROF_RAW_VERSION 1ULL + +namespace llvm { +namespace memprof { +// A struct describing the header used for the raw binary memprof profile format. +PACKED(struct Header { + uint64_t Magic; + uint64_t Version; + uint64_t TotalSize; + uint64_t SegmentOffset; + uint64_t MIBOffset; + uint64_t StackOffset; +}); + +// A struct describing the information necessary to describe a /proc/maps +// segment entry for a particular binary/library identified by its build id. +PACKED(struct SegmentEntry { + uint64_t Start; + uint64_t End; + uint64_t Offset; + uint8_t BuildId[32]; +}); +} // namespace memprof +} // namespace llvm + +#endif diff --git a/compiler-rt/lib/asan/asan_interceptors.cpp b/compiler-rt/lib/asan/asan_interceptors.cpp index b28909152e20..2ff314a5a9cb 100644 --- a/compiler-rt/lib/asan/asan_interceptors.cpp +++ b/compiler-rt/lib/asan/asan_interceptors.cpp @@ -130,23 +130,24 @@ DECLARE_REAL_AND_INTERCEPTOR(void, free, void *) #define COMMON_INTERCEPTOR_BLOCK_REAL(name) REAL(name) // Strict init-order checking is dlopen-hostile: // https://github.com/google/sanitizers/issues/178 -#define COMMON_INTERCEPTOR_ON_DLOPEN(filename, flag) \ - do { \ - if (flags()->strict_init_order) \ - StopInitOrderChecking(); \ - CheckNoDeepBind(filename, flag); \ - } while (false) -#define COMMON_INTERCEPTOR_ON_EXIT(ctx) OnExit() -#define COMMON_INTERCEPTOR_LIBRARY_LOADED(filename, handle) -#define COMMON_INTERCEPTOR_LIBRARY_UNLOADED() -#define COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED (!asan_inited) -#define COMMON_INTERCEPTOR_GET_TLS_RANGE(begin, end) \ - if (AsanThread *t = GetCurrentThread()) { \ - *begin = t->tls_begin(); \ - *end = t->tls_end(); \ - } else { \ - *begin = *end = 0; \ - } +# define COMMON_INTERCEPTOR_DLOPEN(filename, flag) \ + ({ \ + if (flags()->strict_init_order) \ + StopInitOrderChecking(); \ + CheckNoDeepBind(filename, flag); \ + REAL(dlopen)(filename, flag); \ + }) +# define COMMON_INTERCEPTOR_ON_EXIT(ctx) OnExit() +# define COMMON_INTERCEPTOR_LIBRARY_LOADED(filename, handle) +# define COMMON_INTERCEPTOR_LIBRARY_UNLOADED() +# define COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED (!asan_inited) +# define COMMON_INTERCEPTOR_GET_TLS_RANGE(begin, end) \ + if (AsanThread *t = GetCurrentThread()) { \ + *begin = t->tls_begin(); \ + *end = t->tls_end(); \ + } else { \ + *begin = *end = 0; \ + } #define COMMON_INTERCEPTOR_MEMMOVE_IMPL(ctx, to, from, size) \ do { \ diff --git a/compiler-rt/lib/asan/asan_report.cpp b/compiler-rt/lib/asan/asan_report.cpp index 1f266334b311..2a38fabaf220 100644 --- a/compiler-rt/lib/asan/asan_report.cpp +++ b/compiler-rt/lib/asan/asan_report.cpp @@ -460,6 +460,10 @@ static bool SuppressErrorReport(uptr pc) { void ReportGenericError(uptr pc, uptr bp, uptr sp, uptr addr, bool is_write, uptr access_size, u32 exp, bool fatal) { + if (__asan_test_only_reported_buggy_pointer) { + *__asan_test_only_reported_buggy_pointer = addr; + return; + } if (!fatal && SuppressErrorReport(pc)) return; ENABLE_FRAME_POINTER; diff --git a/compiler-rt/lib/asan/asan_rtl.cpp b/compiler-rt/lib/asan/asan_rtl.cpp index 1b150b393cfe..5be8ef0f6d1c 100644 --- a/compiler-rt/lib/asan/asan_rtl.cpp +++ b/compiler-rt/lib/asan/asan_rtl.cpp @@ -85,12 +85,8 @@ void ShowStatsAndAbort() { NOINLINE static void ReportGenericErrorWrapper(uptr addr, bool is_write, int size, int exp_arg, bool fatal) { - if (__asan_test_only_reported_buggy_pointer) { - *__asan_test_only_reported_buggy_pointer = addr; - } else { - GET_CALLER_PC_BP_SP; - ReportGenericError(pc, bp, sp, addr, is_write, size, exp_arg, fatal); - } + GET_CALLER_PC_BP_SP; + ReportGenericError(pc, bp, sp, addr, is_write, size, exp_arg, fatal); } // --------------- LowLevelAllocateCallbac ---------- {{{1 diff --git a/compiler-rt/lib/cfi/cfi.cpp b/compiler-rt/lib/cfi/cfi.cpp index 95853208f951..65a10c999cc6 100644 --- a/compiler-rt/lib/cfi/cfi.cpp +++ b/compiler-rt/lib/cfi/cfi.cpp @@ -230,7 +230,7 @@ uptr find_cfi_check_in_dso(dl_phdr_info *info) { } if (symtab > strtab) { - VReport(1, "Can not handle: symtab > strtab (%p > %zx)\n", symtab, strtab); + VReport(1, "Can not handle: symtab > strtab (%zx > %zx)\n", symtab, strtab); return 0; } @@ -250,7 +250,7 @@ uptr find_cfi_check_in_dso(dl_phdr_info *info) { if (phdr_idx == info->dlpi_phnum) { // Nope, either different segments or just bogus pointers. // Can not handle this. - VReport(1, "Can not handle: symtab %p, strtab %zx\n", symtab, strtab); + VReport(1, "Can not handle: symtab %zx, strtab %zx\n", symtab, strtab); return 0; } diff --git a/compiler-rt/lib/memprof/memprof_allocator.cpp b/compiler-rt/lib/memprof/memprof_allocator.cpp index 696f64d8c324..059ce283b8c9 100644 --- a/compiler-rt/lib/memprof/memprof_allocator.cpp +++ b/compiler-rt/lib/memprof/memprof_allocator.cpp @@ -252,6 +252,8 @@ struct Allocator { InsertLiveBlocks(); if (print_text) { + if (!flags()->print_terse) + Printf("Recorded MIBs (incl. live on exit):\n"); MIBMap.ForEach(PrintCallback, reinterpret_cast<void *>(flags()->print_terse)); StackDepotPrintAll(); @@ -271,9 +273,6 @@ struct Allocator { // Inserts any blocks which have been allocated but not yet deallocated. void InsertLiveBlocks() { - if (print_text && !flags()->print_terse) - Printf("Live on exit:\n"); - allocator.ForEachChunk( [](uptr chunk, void *alloc) { u64 user_requested_size; diff --git a/compiler-rt/lib/memprof/memprof_interceptors.cpp b/compiler-rt/lib/memprof/memprof_interceptors.cpp index 5575ae2fe444..459ad03e8dfe 100644 --- a/compiler-rt/lib/memprof/memprof_interceptors.cpp +++ b/compiler-rt/lib/memprof/memprof_interceptors.cpp @@ -93,10 +93,6 @@ DECLARE_REAL_AND_INTERCEPTOR(void, free, void *) do { \ } while (false) #define COMMON_INTERCEPTOR_BLOCK_REAL(name) REAL(name) -#define COMMON_INTERCEPTOR_ON_DLOPEN(filename, flag) \ - do { \ - CheckNoDeepBind(filename, flag); \ - } while (false) #define COMMON_INTERCEPTOR_ON_EXIT(ctx) OnExit() #define COMMON_INTERCEPTOR_LIBRARY_LOADED(filename, handle) #define COMMON_INTERCEPTOR_LIBRARY_UNLOADED() diff --git a/compiler-rt/lib/memprof/memprof_rawprofile.cpp b/compiler-rt/lib/memprof/memprof_rawprofile.cpp index 96f315f95b24..c4800a6df34c 100644 --- a/compiler-rt/lib/memprof/memprof_rawprofile.cpp +++ b/compiler-rt/lib/memprof/memprof_rawprofile.cpp @@ -1,6 +1,12 @@ -#include "memprof_rawprofile.h" +#include <stdint.h> +#include <stdlib.h> +#include <string.h> + #include "memprof_meminfoblock.h" +#include "memprof_rawprofile.h" +#include "profile/MemProfData.inc" #include "sanitizer_common/sanitizer_allocator_internal.h" +#include "sanitizer_common/sanitizer_common.h" #include "sanitizer_common/sanitizer_linux.h" #include "sanitizer_common/sanitizer_procmaps.h" #include "sanitizer_common/sanitizer_stackdepot.h" @@ -8,29 +14,12 @@ #include "sanitizer_common/sanitizer_stacktrace.h" #include "sanitizer_common/sanitizer_vector.h" -#include <stdlib.h> -#include <string.h> - namespace __memprof { using ::__sanitizer::Vector; +using SegmentEntry = ::llvm::memprof::SegmentEntry; +using Header = ::llvm::memprof::Header; namespace { -typedef struct __attribute__((__packed__)) { - u64 start; - u64 end; - u64 offset; - u8 buildId[32]; -} SegmentEntry; - -typedef struct __attribute__((__packed__)) { - u64 magic; - u64 version; - u64 total_size; - u64 segment_offset; - u64 mib_offset; - u64 stack_offset; -} Header; - template <class T> char *WriteBytes(T Pod, char *&Buffer) { *(T *)Buffer = Pod; return Buffer + sizeof(T); @@ -76,12 +65,12 @@ void SerializeSegmentsToBuffer(MemoryMappingLayoutBase &Layout, for (Layout.Reset(); Layout.Next(&segment);) { if (segment.IsReadable() && segment.IsExecutable()) { - SegmentEntry entry{}; - entry.start = segment.start; - entry.end = segment.end; - entry.offset = segment.offset; - memcpy(entry.buildId, segment.uuid, sizeof(segment.uuid)); - memcpy(Ptr, &entry, sizeof(SegmentEntry)); + SegmentEntry Entry{}; + Entry.Start = segment.start; + Entry.End = segment.end; + Entry.Offset = segment.offset; + memcpy(Entry.BuildId, segment.uuid, sizeof(segment.uuid)); + memcpy(Ptr, &Entry, sizeof(SegmentEntry)); Ptr += sizeof(SegmentEntry); NumSegmentsRecorded++; } @@ -89,7 +78,7 @@ void SerializeSegmentsToBuffer(MemoryMappingLayoutBase &Layout, // Store the number of segments we recorded in the space we reserved. *((u64 *)Buffer) = NumSegmentsRecorded; - CHECK(ExpectedNumBytes == static_cast<u64>(Ptr - Buffer) && + CHECK(ExpectedNumBytes >= static_cast<u64>(Ptr - Buffer) && "Expected num bytes != actual bytes written"); } @@ -144,7 +133,7 @@ void SerializeStackToBuffer(const Vector<u64> &StackIds, *(u64 *)(Ptr - (Count + 1) * sizeof(u64)) = Count; } - CHECK(ExpectedNumBytes == static_cast<u64>(Ptr - Buffer) && + CHECK(ExpectedNumBytes >= static_cast<u64>(Ptr - Buffer) && "Expected num bytes != actual bytes written"); } @@ -172,7 +161,7 @@ void SerializeMIBInfoToBuffer(MIBMapTy &MIBMap, const Vector<u64> &StackIds, Ptr = WriteBytes((*h)->mib, Ptr); } - CHECK(ExpectedNumBytes == static_cast<u64>(Ptr - Buffer) && + CHECK(ExpectedNumBytes >= static_cast<u64>(Ptr - Buffer) && "Expected num bytes != actual bytes written"); } @@ -193,11 +182,15 @@ void SerializeMIBInfoToBuffer(MIBMapTy &MIBMap, const Vector<u64> &StackIds, // BuildID 32B // ---------- // ... +// ---------- +// Optional Padding Bytes // ---------- MIB Info // Num Entries // ---------- MIB Entry // Alloc Count // ... +// ---------- +// Optional Padding Bytes // ---------- Stack Info // Num Entries // ---------- Stack Entry @@ -206,23 +199,29 @@ void SerializeMIBInfoToBuffer(MIBMapTy &MIBMap, const Vector<u64> &StackIds, // PC2 // ... // ---------- +// Optional Padding Bytes // ... u64 SerializeToRawProfile(MIBMapTy &MIBMap, MemoryMappingLayoutBase &Layout, char *&Buffer) { - const u64 NumSegmentBytes = SegmentSizeBytes(Layout); + // Each section size is rounded up to 8b since the first entry in each section + // is a u64 which holds the number of entries in the section by convention. + const u64 NumSegmentBytes = RoundUpTo(SegmentSizeBytes(Layout), 8); Vector<u64> StackIds; MIBMap.ForEach(RecordStackId, reinterpret_cast<void *>(&StackIds)); // The first 8b are for the total number of MIB records. Each MIB record is // preceded by a 8b stack id which is associated with stack frames in the next // section. - const u64 NumMIBInfoBytes = - sizeof(u64) + StackIds.Size() * (sizeof(u64) + sizeof(MemInfoBlock)); + const u64 NumMIBInfoBytes = RoundUpTo( + sizeof(u64) + StackIds.Size() * (sizeof(u64) + sizeof(MemInfoBlock)), 8); - const u64 NumStackBytes = StackSizeBytes(StackIds); + const u64 NumStackBytes = RoundUpTo(StackSizeBytes(StackIds), 8); - const u64 TotalSizeBytes = - sizeof(Header) + NumSegmentBytes + NumStackBytes + NumMIBInfoBytes; + // Ensure that the profile is 8b aligned. We allow for some optional padding + // at the end so that any subsequent profile serialized to the same file does + // not incur unaligned accesses. + const u64 TotalSizeBytes = RoundUpTo( + sizeof(Header) + NumSegmentBytes + NumStackBytes + NumMIBInfoBytes, 8); // Allocate the memory for the entire buffer incl. info blocks. Buffer = (char *)InternalAlloc(TotalSizeBytes); diff --git a/compiler-rt/lib/memprof/memprof_rawprofile.h b/compiler-rt/lib/memprof/memprof_rawprofile.h index 052bac3267f1..575104e7e34e 100644 --- a/compiler-rt/lib/memprof/memprof_rawprofile.h +++ b/compiler-rt/lib/memprof/memprof_rawprofile.h @@ -5,17 +5,10 @@ #include "sanitizer_common/sanitizer_procmaps.h" namespace __memprof { - -// TODO: pull these in from MemProfData.inc -#define MEMPROF_RAW_MAGIC_64 \ - (u64)255 << 56 | (u64)'m' << 48 | (u64)'p' << 40 | (u64)'r' << 32 | \ - (u64)'o' << 24 | (u64)'f' << 16 | (u64)'r' << 8 | (u64)129 - -#define MEMPROF_RAW_VERSION 1ULL - +// Serialize the in-memory representation of the memprof profile to the raw +// binary format. The format itself is documented memprof_rawprofile.cpp. u64 SerializeToRawProfile(MIBMapTy &BlockCache, MemoryMappingLayoutBase &Layout, char *&Buffer); - } // namespace __memprof #endif // MEMPROF_RAWPROFILE_H_ diff --git a/compiler-rt/lib/memprof/tests/rawprofile.cpp b/compiler-rt/lib/memprof/tests/rawprofile.cpp index 4404ab86092e..829e18370737 100644 --- a/compiler-rt/lib/memprof/tests/rawprofile.cpp +++ b/compiler-rt/lib/memprof/tests/rawprofile.cpp @@ -1,6 +1,10 @@ #include "memprof/memprof_rawprofile.h" +#include <cstdint> +#include <memory> + #include "memprof/memprof_meminfoblock.h" +#include "profile/MemProfData.inc" #include "sanitizer_common/sanitizer_common.h" #include "sanitizer_common/sanitizer_procmaps.h" #include "sanitizer_common/sanitizer_stackdepot.h" @@ -8,8 +12,6 @@ #include "gmock/gmock.h" #include "gtest/gtest.h" -#include <memory> - namespace { using ::__memprof::MemInfoBlock; @@ -47,6 +49,8 @@ u64 PopulateFakeMap(const MemInfoBlock &FakeMIB, uptr StackPCBegin, template <class T = u64> T Read(char *&Buffer) { static_assert(std::is_pod<T>::value, "Must be a POD type."); + assert(reinterpret_cast<size_t>(Buffer) % sizeof(T) == 0 && + "Unaligned read!"); T t = *reinterpret_cast<T *>(Buffer); Buffer += sizeof(T); return t; @@ -101,8 +105,9 @@ TEST(MemProf, Basic) { const u64 MIBOffset = Read(Ptr); const u64 StackOffset = Read(Ptr); - // ============= Check sizes. + // ============= Check sizes and padding. EXPECT_EQ(TotalSize, NumBytes); + EXPECT_EQ(TotalSize % 8, 0ULL); // Should be equal to the size of the raw profile header. EXPECT_EQ(SegmentOffset, 48ULL); @@ -118,8 +123,10 @@ TEST(MemProf, Basic) { EXPECT_EQ(StackOffset, 336ULL); // We expect 2 stack entries, with 5 frames - 8b for total count, - // 2 * (8b for id, 8b for frame count and 5*8b for fake frames) - EXPECT_EQ(TotalSize - StackOffset, 8ULL + 2 * (8 + 8 + 5 * 8)); + // 2 * (8b for id, 8b for frame count and 5*8b for fake frames). + // Since this is the last section, there may be additional padding at the end + // to make the total profile size 8b aligned. + EXPECT_GE(TotalSize - StackOffset, 8ULL + 2 * (8 + 8 + 5 * 8)); // ============= Check contents. unsigned char ExpectedSegmentBytes[64] = { diff --git a/compiler-rt/lib/profile/InstrProfilingMerge.c b/compiler-rt/lib/profile/InstrProfilingMerge.c index 674b1898b046..80db2527461e 100644 --- a/compiler-rt/lib/profile/InstrProfilingMerge.c +++ b/compiler-rt/lib/profile/InstrProfilingMerge.c @@ -34,7 +34,8 @@ uint64_t lprofGetLoadModuleSignature() { const __llvm_profile_data *FirstD = __llvm_profile_begin_data(); return (NamesSize << 40) + (CounterSize << 30) + (DataSize << 20) + - (NumVnodes << 10) + (DataSize > 0 ? FirstD->NameRef : 0) + Version; + (NumVnodes << 10) + (DataSize > 0 ? FirstD->NameRef : 0) + Version + + __llvm_profile_get_magic(); } /* Returns 1 if profile is not structurally compatible. */ diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_allocator.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_allocator.cpp index bcb7370a7906..af0b0949a88e 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_allocator.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_allocator.cpp @@ -24,66 +24,6 @@ namespace __sanitizer { const char *PrimaryAllocatorName = "SizeClassAllocator"; const char *SecondaryAllocatorName = "LargeMmapAllocator"; -// ThreadSanitizer for Go uses libc malloc/free. -#if defined(SANITIZER_USE_MALLOC) -# if SANITIZER_LINUX && !SANITIZER_ANDROID -extern "C" void *__libc_malloc(uptr size); -# if !SANITIZER_GO -extern "C" void *__libc_memalign(uptr alignment, uptr size); -# endif -extern "C" void *__libc_realloc(void *ptr, uptr size); -extern "C" void __libc_free(void *ptr); -# else -# include <stdlib.h> -# define __libc_malloc malloc -# if !SANITIZER_GO -static void *__libc_memalign(uptr alignment, uptr size) { - void *p; - uptr error = posix_memalign(&p, alignment, size); - if (error) return nullptr; - return p; -} -# endif -# define __libc_realloc realloc -# define __libc_free free -# endif - -static void *RawInternalAlloc(uptr size, InternalAllocatorCache *cache, - uptr alignment) { - (void)cache; -#if !SANITIZER_GO - if (alignment == 0) - return __libc_malloc(size); - else - return __libc_memalign(alignment, size); -#else - // Windows does not provide __libc_memalign/posix_memalign. It provides - // __aligned_malloc, but the allocated blocks can't be passed to free, - // they need to be passed to __aligned_free. InternalAlloc interface does - // not account for such requirement. Alignemnt does not seem to be used - // anywhere in runtime, so just call __libc_malloc for now. - DCHECK_EQ(alignment, 0); - return __libc_malloc(size); -#endif -} - -static void *RawInternalRealloc(void *ptr, uptr size, - InternalAllocatorCache *cache) { - (void)cache; - return __libc_realloc(ptr, size); -} - -static void RawInternalFree(void *ptr, InternalAllocatorCache *cache) { - (void)cache; - __libc_free(ptr); -} - -InternalAllocator *internal_allocator() { - return 0; -} - -#else // SANITIZER_GO || defined(SANITIZER_USE_MALLOC) - static ALIGNED(64) char internal_alloc_placeholder[sizeof(InternalAllocator)]; static atomic_uint8_t internal_allocator_initialized; static StaticSpinMutex internal_alloc_init_mu; @@ -135,8 +75,6 @@ static void RawInternalFree(void *ptr, InternalAllocatorCache *cache) { internal_allocator()->Deallocate(cache, ptr); } -#endif // SANITIZER_GO || defined(SANITIZER_USE_MALLOC) - static void NORETURN ReportInternalAllocatorOutOfMemory(uptr requested_size) { SetAllocatorOutOfMemory(); Report("FATAL: %s: internal allocator is out of memory trying to allocate " @@ -187,6 +125,16 @@ void InternalFree(void *addr, InternalAllocatorCache *cache) { RawInternalFree(addr, cache); } +void InternalAllocatorLock() NO_THREAD_SAFETY_ANALYSIS { + internal_allocator_cache_mu.Lock(); + internal_allocator()->ForceLock(); +} + +void InternalAllocatorUnlock() NO_THREAD_SAFETY_ANALYSIS { + internal_allocator()->ForceUnlock(); + internal_allocator_cache_mu.Unlock(); +} + // LowLevelAllocator constexpr uptr kLowLevelAllocatorDefaultAlignment = 8; static uptr low_level_alloc_min_alignment = kLowLevelAllocatorDefaultAlignment; diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_internal.h b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_internal.h index 32849036fd04..38994736877a 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_internal.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_internal.h @@ -48,6 +48,8 @@ void *InternalReallocArray(void *p, uptr count, uptr size, void *InternalCalloc(uptr count, uptr size, InternalAllocatorCache *cache = nullptr); void InternalFree(void *p, InternalAllocatorCache *cache = nullptr); +void InternalAllocatorLock(); +void InternalAllocatorUnlock(); InternalAllocator *internal_allocator(); } // namespace __sanitizer diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common.h b/compiler-rt/lib/sanitizer_common/sanitizer_common.h index 065154496eb5..6ec6bb4bd856 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_common.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_common.h @@ -460,6 +460,10 @@ template <class T> constexpr T Max(T a, T b) { return a > b ? a : b; } +template <class T> +constexpr T Abs(T a) { + return a < 0 ? -a : a; +} template<class T> void Swap(T& a, T& b) { T tmp = a; a = b; diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc index abb38ccfa15d..d219734fa0a3 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc +++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc @@ -21,7 +21,7 @@ // COMMON_INTERCEPTOR_FD_RELEASE // COMMON_INTERCEPTOR_FD_ACCESS // COMMON_INTERCEPTOR_SET_THREAD_NAME -// COMMON_INTERCEPTOR_ON_DLOPEN +// COMMON_INTERCEPTOR_DLOPEN // COMMON_INTERCEPTOR_ON_EXIT // COMMON_INTERCEPTOR_MUTEX_PRE_LOCK // COMMON_INTERCEPTOR_MUTEX_POST_LOCK @@ -206,9 +206,9 @@ extern const short *_tolower_tab_; COMMON_INTERCEPTOR_READ_RANGE((ctx), (s), \ common_flags()->strict_string_checks ? (internal_strlen(s)) + 1 : (n) ) -#ifndef COMMON_INTERCEPTOR_ON_DLOPEN -#define COMMON_INTERCEPTOR_ON_DLOPEN(filename, flag) \ - CheckNoDeepBind(filename, flag); +#ifndef COMMON_INTERCEPTOR_DLOPEN +#define COMMON_INTERCEPTOR_DLOPEN(filename, flag) \ + ({ CheckNoDeepBind(filename, flag); REAL(dlopen)(filename, flag); }) #endif #ifndef COMMON_INTERCEPTOR_GET_TLS_RANGE @@ -6380,8 +6380,7 @@ INTERCEPTOR(void*, dlopen, const char *filename, int flag) { void *ctx; COMMON_INTERCEPTOR_ENTER_NOIGNORE(ctx, dlopen, filename, flag); if (filename) COMMON_INTERCEPTOR_READ_STRING(ctx, filename, 0); - COMMON_INTERCEPTOR_ON_DLOPEN(filename, flag); - void *res = REAL(dlopen)(filename, flag); + void *res = COMMON_INTERCEPTOR_DLOPEN(filename, flag); Symbolizer::GetOrInit()->InvalidateModuleList(); COMMON_INTERCEPTOR_LIBRARY_LOADED(filename, res); return res; diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_dense_map.h b/compiler-rt/lib/sanitizer_common/sanitizer_dense_map.h index 3fa6af76ce29..046d77dddc9c 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_dense_map.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_dense_map.h @@ -226,28 +226,26 @@ class DenseMapBase { return FindAndConstruct(__sanitizer::move(Key)).second; } - /// Equality comparison for DenseMap. + /// Iterate over active entries of the container. /// - /// Iterates over elements of LHS confirming that each (key, value) pair in - /// LHS is also in RHS, and that no additional pairs are in RHS. Equivalent to - /// N calls to RHS.find and N value comparisons. Amortized complexity is - /// linear, worst case is O(N^2) (if every hash collides). - bool operator==(const DenseMapBase &RHS) const { - if (size() != RHS.size()) - return false; - + /// Function can return fast to stop the process. + template <class Fn> + void forEach(Fn fn) { const KeyT EmptyKey = getEmptyKey(), TombstoneKey = getTombstoneKey(); for (auto *P = getBuckets(), *E = getBucketsEnd(); P != E; ++P) { const KeyT K = P->getFirst(); if (!KeyInfoT::isEqual(K, EmptyKey) && !KeyInfoT::isEqual(K, TombstoneKey)) { - const auto *I = RHS.find(K); - if (!I || P->getSecond() != I->getSecond()) - return false; + if (!fn(*P)) + return; } } + } - return true; + template <class Fn> + void forEach(Fn fn) const { + const_cast<DenseMapBase *>(this)->forEach( + [&](const value_type &KV) { return fn(KV); }); } protected: @@ -524,6 +522,35 @@ class DenseMapBase { } }; +/// Equality comparison for DenseMap. +/// +/// Iterates over elements of LHS confirming that each (key, value) pair in LHS +/// is also in RHS, and that no additional pairs are in RHS. +/// Equivalent to N calls to RHS.find and N value comparisons. Amortized +/// complexity is linear, worst case is O(N^2) (if every hash collides). +template <typename DerivedT, typename KeyT, typename ValueT, typename KeyInfoT, + typename BucketT> +bool operator==( + const DenseMapBase<DerivedT, KeyT, ValueT, KeyInfoT, BucketT> &LHS, + const DenseMapBase<DerivedT, KeyT, ValueT, KeyInfoT, BucketT> &RHS) { + if (LHS.size() != RHS.size()) + return false; + + bool R = true; + LHS.forEach( + [&](const typename DenseMapBase<DerivedT, KeyT, ValueT, KeyInfoT, + BucketT>::value_type &KV) -> bool { + const auto *I = RHS.find(KV.first); + if (!I || I->second != KV.second) { + R = false; + return false; + } + return true; + }); + + return R; +} + /// Inequality comparison for DenseMap. /// /// Equivalent to !(LHS == RHS). See operator== for performance notes. diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_dense_map_info.h b/compiler-rt/lib/sanitizer_common/sanitizer_dense_map_info.h index 85c6427906c1..f4640369ae58 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_dense_map_info.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_dense_map_info.h @@ -18,7 +18,7 @@ namespace __sanitizer { namespace detail { /// Simplistic combination of 32-bit hash values into 32-bit hash values. -static inline unsigned combineHashValue(unsigned a, unsigned b) { +static constexpr unsigned combineHashValue(unsigned a, unsigned b) { u64 key = (u64)a << 32 | (u64)b; key += ~(key << 32); key ^= (key >> 22); @@ -37,18 +37,19 @@ template <typename KeyT, typename ValueT> struct DenseMapPair { KeyT first = {}; ValueT second = {}; - DenseMapPair() = default; - DenseMapPair(const KeyT &f, const ValueT &s) : first(f), second(s) {} + constexpr DenseMapPair() = default; + constexpr DenseMapPair(const KeyT &f, const ValueT &s) + : first(f), second(s) {} template <typename KeyT2, typename ValueT2> - DenseMapPair(KeyT2 &&f, ValueT2 &&s) + constexpr DenseMapPair(KeyT2 &&f, ValueT2 &&s) : first(__sanitizer::forward<KeyT2>(f)), second(__sanitizer::forward<ValueT2>(s)) {} - DenseMapPair(const DenseMapPair &other) = default; - DenseMapPair &operator=(const DenseMapPair &other) = default; - DenseMapPair(DenseMapPair &&other) = default; - DenseMapPair &operator=(DenseMapPair &&other) = default; + constexpr DenseMapPair(const DenseMapPair &other) = default; + constexpr DenseMapPair &operator=(const DenseMapPair &other) = default; + constexpr DenseMapPair(DenseMapPair &&other) = default; + constexpr DenseMapPair &operator=(DenseMapPair &&other) = default; KeyT &getFirst() { return first; } const KeyT &getFirst() const { return first; } @@ -60,8 +61,8 @@ struct DenseMapPair { template <typename T> struct DenseMapInfo { - // static inline T getEmptyKey(); - // static inline T getTombstoneKey(); + // static T getEmptyKey(); + // static T getTombstoneKey(); // static unsigned getHashValue(const T &Val); // static bool isEqual(const T &LHS, const T &RHS); }; @@ -79,43 +80,50 @@ struct DenseMapInfo<T *> { // "Log2MaxAlign bits of alignment"); static constexpr uptr Log2MaxAlign = 12; - static inline T *getEmptyKey() { + static constexpr T *getEmptyKey() { uptr Val = static_cast<uptr>(-1); Val <<= Log2MaxAlign; return reinterpret_cast<T *>(Val); } - static inline T *getTombstoneKey() { + static constexpr T *getTombstoneKey() { uptr Val = static_cast<uptr>(-2); Val <<= Log2MaxAlign; return reinterpret_cast<T *>(Val); } - static unsigned getHashValue(const T *PtrVal) { + static constexpr unsigned getHashValue(const T *PtrVal) { return (unsigned((uptr)PtrVal) >> 4) ^ (unsigned((uptr)PtrVal) >> 9); } - static bool isEqual(const T *LHS, const T *RHS) { return LHS == RHS; } + static constexpr bool isEqual(const T *LHS, const T *RHS) { + return LHS == RHS; + } }; // Provide DenseMapInfo for chars. template <> struct DenseMapInfo<char> { - static inline char getEmptyKey() { return ~0; } - static inline char getTombstoneKey() { return ~0 - 1; } - static unsigned getHashValue(const char &Val) { return Val * 37U; } + static constexpr char getEmptyKey() { return ~0; } + static constexpr char getTombstoneKey() { return ~0 - 1; } + static constexpr unsigned getHashValue(const char &Val) { return Val * 37U; } - static bool isEqual(const char &LHS, const char &RHS) { return LHS == RHS; } + static constexpr bool isEqual(const char &LHS, const char &RHS) { + return LHS == RHS; + } }; // Provide DenseMapInfo for unsigned chars. template <> struct DenseMapInfo<unsigned char> { - static inline unsigned char getEmptyKey() { return ~0; } - static inline unsigned char getTombstoneKey() { return ~0 - 1; } - static unsigned getHashValue(const unsigned char &Val) { return Val * 37U; } + static constexpr unsigned char getEmptyKey() { return ~0; } + static constexpr unsigned char getTombstoneKey() { return ~0 - 1; } + static constexpr unsigned getHashValue(const unsigned char &Val) { + return Val * 37U; + } - static bool isEqual(const unsigned char &LHS, const unsigned char &RHS) { + static constexpr bool isEqual(const unsigned char &LHS, + const unsigned char &RHS) { return LHS == RHS; } }; @@ -123,11 +131,14 @@ struct DenseMapInfo<unsigned char> { // Provide DenseMapInfo for unsigned shorts. template <> struct DenseMapInfo<unsigned short> { - static inline unsigned short getEmptyKey() { return 0xFFFF; } - static inline unsigned short getTombstoneKey() { return 0xFFFF - 1; } - static unsigned getHashValue(const unsigned short &Val) { return Val * 37U; } + static constexpr unsigned short getEmptyKey() { return 0xFFFF; } + static constexpr unsigned short getTombstoneKey() { return 0xFFFF - 1; } + static constexpr unsigned getHashValue(const unsigned short &Val) { + return Val * 37U; + } - static bool isEqual(const unsigned short &LHS, const unsigned short &RHS) { + static constexpr bool isEqual(const unsigned short &LHS, + const unsigned short &RHS) { return LHS == RHS; } }; @@ -135,11 +146,13 @@ struct DenseMapInfo<unsigned short> { // Provide DenseMapInfo for unsigned ints. template <> struct DenseMapInfo<unsigned> { - static inline unsigned getEmptyKey() { return ~0U; } - static inline unsigned getTombstoneKey() { return ~0U - 1; } - static unsigned getHashValue(const unsigned &Val) { return Val * 37U; } + static constexpr unsigned getEmptyKey() { return ~0U; } + static constexpr unsigned getTombstoneKey() { return ~0U - 1; } + static constexpr unsigned getHashValue(const unsigned &Val) { + return Val * 37U; + } - static bool isEqual(const unsigned &LHS, const unsigned &RHS) { + static constexpr bool isEqual(const unsigned &LHS, const unsigned &RHS) { return LHS == RHS; } }; @@ -147,14 +160,15 @@ struct DenseMapInfo<unsigned> { // Provide DenseMapInfo for unsigned longs. template <> struct DenseMapInfo<unsigned long> { - static inline unsigned long getEmptyKey() { return ~0UL; } - static inline unsigned long getTombstoneKey() { return ~0UL - 1L; } + static constexpr unsigned long getEmptyKey() { return ~0UL; } + static constexpr unsigned long getTombstoneKey() { return ~0UL - 1L; } - static unsigned getHashValue(const unsigned long &Val) { + static constexpr unsigned getHashValue(const unsigned long &Val) { return (unsigned)(Val * 37UL); } - static bool isEqual(const unsigned long &LHS, const unsigned long &RHS) { + static constexpr bool isEqual(const unsigned long &LHS, + const unsigned long &RHS) { return LHS == RHS; } }; @@ -162,15 +176,15 @@ struct DenseMapInfo<unsigned long> { // Provide DenseMapInfo for unsigned long longs. template <> struct DenseMapInfo<unsigned long long> { - static inline unsigned long long getEmptyKey() { return ~0ULL; } - static inline unsigned long long getTombstoneKey() { return ~0ULL - 1ULL; } + static constexpr unsigned long long getEmptyKey() { return ~0ULL; } + static constexpr unsigned long long getTombstoneKey() { return ~0ULL - 1ULL; } - static unsigned getHashValue(const unsigned long long &Val) { + static constexpr unsigned getHashValue(const unsigned long long &Val) { return (unsigned)(Val * 37ULL); } - static bool isEqual(const unsigned long long &LHS, - const unsigned long long &RHS) { + static constexpr bool isEqual(const unsigned long long &LHS, + const unsigned long long &RHS) { return LHS == RHS; } }; @@ -178,51 +192,59 @@ struct DenseMapInfo<unsigned long long> { // Provide DenseMapInfo for shorts. template <> struct DenseMapInfo<short> { - static inline short getEmptyKey() { return 0x7FFF; } - static inline short getTombstoneKey() { return -0x7FFF - 1; } - static unsigned getHashValue(const short &Val) { return Val * 37U; } - static bool isEqual(const short &LHS, const short &RHS) { return LHS == RHS; } + static constexpr short getEmptyKey() { return 0x7FFF; } + static constexpr short getTombstoneKey() { return -0x7FFF - 1; } + static constexpr unsigned getHashValue(const short &Val) { return Val * 37U; } + static constexpr bool isEqual(const short &LHS, const short &RHS) { + return LHS == RHS; + } }; // Provide DenseMapInfo for ints. template <> struct DenseMapInfo<int> { - static inline int getEmptyKey() { return 0x7fffffff; } - static inline int getTombstoneKey() { return -0x7fffffff - 1; } - static unsigned getHashValue(const int &Val) { return (unsigned)(Val * 37U); } + static constexpr int getEmptyKey() { return 0x7fffffff; } + static constexpr int getTombstoneKey() { return -0x7fffffff - 1; } + static constexpr unsigned getHashValue(const int &Val) { + return (unsigned)(Val * 37U); + } - static bool isEqual(const int &LHS, const int &RHS) { return LHS == RHS; } + static constexpr bool isEqual(const int &LHS, const int &RHS) { + return LHS == RHS; + } }; // Provide DenseMapInfo for longs. template <> struct DenseMapInfo<long> { - static inline long getEmptyKey() { + static constexpr long getEmptyKey() { return (1UL << (sizeof(long) * 8 - 1)) - 1UL; } - static inline long getTombstoneKey() { return getEmptyKey() - 1L; } + static constexpr long getTombstoneKey() { return getEmptyKey() - 1L; } - static unsigned getHashValue(const long &Val) { + static constexpr unsigned getHashValue(const long &Val) { return (unsigned)(Val * 37UL); } - static bool isEqual(const long &LHS, const long &RHS) { return LHS == RHS; } + static constexpr bool isEqual(const long &LHS, const long &RHS) { + return LHS == RHS; + } }; // Provide DenseMapInfo for long longs. template <> struct DenseMapInfo<long long> { - static inline long long getEmptyKey() { return 0x7fffffffffffffffLL; } - static inline long long getTombstoneKey() { + static constexpr long long getEmptyKey() { return 0x7fffffffffffffffLL; } + static constexpr long long getTombstoneKey() { return -0x7fffffffffffffffLL - 1; } - static unsigned getHashValue(const long long &Val) { + static constexpr unsigned getHashValue(const long long &Val) { return (unsigned)(Val * 37ULL); } - static bool isEqual(const long long &LHS, const long long &RHS) { + static constexpr bool isEqual(const long long &LHS, const long long &RHS) { return LHS == RHS; } }; @@ -234,22 +256,22 @@ struct DenseMapInfo<detail::DenseMapPair<T, U>> { using FirstInfo = DenseMapInfo<T>; using SecondInfo = DenseMapInfo<U>; - static inline Pair getEmptyKey() { + static constexpr Pair getEmptyKey() { return detail::DenseMapPair<T, U>(FirstInfo::getEmptyKey(), SecondInfo::getEmptyKey()); } - static inline Pair getTombstoneKey() { + static constexpr Pair getTombstoneKey() { return detail::DenseMapPair<T, U>(FirstInfo::getTombstoneKey(), SecondInfo::getTombstoneKey()); } - static unsigned getHashValue(const Pair &PairVal) { + static constexpr unsigned getHashValue(const Pair &PairVal) { return detail::combineHashValue(FirstInfo::getHashValue(PairVal.first), SecondInfo::getHashValue(PairVal.second)); } - static bool isEqual(const Pair &LHS, const Pair &RHS) { + static constexpr bool isEqual(const Pair &LHS, const Pair &RHS) { return FirstInfo::isEqual(LHS.first, RHS.first) && SecondInfo::isEqual(LHS.second, RHS.second); } diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_fuchsia.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_fuchsia.cpp index c7b30d988365..9b5f6f1da1a1 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_fuchsia.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_fuchsia.cpp @@ -274,6 +274,15 @@ void *MmapFixedNoAccess(uptr fixed_addr, uptr size, const char *name) { UNIMPLEMENTED(); } +bool MprotectNoAccess(uptr addr, uptr size) { + return _zx_vmar_protect(_zx_vmar_root_self(), 0, Addr, Size) == ZX_OK; +} + +bool MprotectReadOnly(uptr addr, uptr size) { + return _zx_vmar_protect(_zx_vmar_root_self(), ZX_VM_PERM_READ, Addr, Size) == + ZX_OK; +} + void *MmapAlignedOrDieOnFatalError(uptr size, uptr alignment, const char *mem_type) { CHECK_GE(size, GetPageSize()); diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_leb128.h b/compiler-rt/lib/sanitizer_common/sanitizer_leb128.h new file mode 100644 index 000000000000..553550d29552 --- /dev/null +++ b/compiler-rt/lib/sanitizer_common/sanitizer_leb128.h @@ -0,0 +1,87 @@ +//===-- sanitizer_leb128.h --------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef SANITIZER_LEB128_H +#define SANITIZER_LEB128_H + +#include "sanitizer_common.h" +#include "sanitizer_internal_defs.h" + +namespace __sanitizer { + +template <typename T, typename It> +It EncodeSLEB128(T value, It begin, It end) { + bool more; + do { + u8 byte = value & 0x7f; + // NOTE: this assumes that this signed shift is an arithmetic right shift. + value >>= 7; + more = !((((value == 0) && ((byte & 0x40) == 0)) || + ((value == -1) && ((byte & 0x40) != 0)))); + if (more) + byte |= 0x80; + if (UNLIKELY(begin == end)) + break; + *(begin++) = byte; + } while (more); + return begin; +} + +template <typename T, typename It> +It DecodeSLEB128(It begin, It end, T* v) { + T value = 0; + unsigned shift = 0; + u8 byte; + do { + if (UNLIKELY(begin == end)) + return begin; + byte = *(begin++); + T slice = byte & 0x7f; + value |= slice << shift; + shift += 7; + } while (byte >= 128); + if (shift < 64 && (byte & 0x40)) + value |= (-1ULL) << shift; + *v = value; + return begin; +} + +template <typename T, typename It> +It EncodeULEB128(T value, It begin, It end) { + do { + u8 byte = value & 0x7f; + value >>= 7; + if (value) + byte |= 0x80; + if (UNLIKELY(begin == end)) + break; + *(begin++) = byte; + } while (value); + return begin; +} + +template <typename T, typename It> +It DecodeULEB128(It begin, It end, T* v) { + T value = 0; + unsigned shift = 0; + u8 byte; + do { + if (UNLIKELY(begin == end)) + return begin; + byte = *(begin++); + T slice = byte & 0x7f; + value += slice << shift; + shift += 7; + } while (byte >= 128); + *v = value; + return begin; +} + +} // namespace __sanitizer + +#endif // SANITIZER_LEB128_H diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp index 596037d77222..2d787332a445 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp @@ -1760,6 +1760,8 @@ HandleSignalMode GetHandleSignalMode(int signum) { #if !SANITIZER_GO void *internal_start_thread(void *(*func)(void *arg), void *arg) { + if (&real_pthread_create == 0) + return nullptr; // Start the thread with signals blocked, otherwise it can steal user signals. ScopedBlockSignals block(nullptr); void *th; @@ -1768,7 +1770,8 @@ void *internal_start_thread(void *(*func)(void *arg), void *arg) { } void internal_join_thread(void *th) { - real_pthread_join(th, nullptr); + if (&real_pthread_join) + real_pthread_join(th, nullptr); } #else void *internal_start_thread(void *(*func)(void *), void *arg) { return 0; } diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp index b67203d4c10e..f9b5c531aeee 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp @@ -265,30 +265,32 @@ int internal_sysctlbyname(const char *sname, void *oldp, uptr *oldlenp, static fd_t internal_spawn_impl(const char *argv[], const char *envp[], pid_t *pid) { - fd_t master_fd = kInvalidFd; - fd_t slave_fd = kInvalidFd; + fd_t primary_fd = kInvalidFd; + fd_t secondary_fd = kInvalidFd; auto fd_closer = at_scope_exit([&] { - internal_close(master_fd); - internal_close(slave_fd); + internal_close(primary_fd); + internal_close(secondary_fd); }); // We need a new pseudoterminal to avoid buffering problems. The 'atos' tool // in particular detects when it's talking to a pipe and forgets to flush the // output stream after sending a response. - master_fd = posix_openpt(O_RDWR); - if (master_fd == kInvalidFd) return kInvalidFd; + primary_fd = posix_openpt(O_RDWR); + if (primary_fd == kInvalidFd) + return kInvalidFd; - int res = grantpt(master_fd) || unlockpt(master_fd); + int res = grantpt(primary_fd) || unlockpt(primary_fd); if (res != 0) return kInvalidFd; // Use TIOCPTYGNAME instead of ptsname() to avoid threading problems. - char slave_pty_name[128]; - res = ioctl(master_fd, TIOCPTYGNAME, slave_pty_name); + char secondary_pty_name[128]; + res = ioctl(primary_fd, TIOCPTYGNAME, secondary_pty_name); if (res == -1) return kInvalidFd; - slave_fd = internal_open(slave_pty_name, O_RDWR); - if (slave_fd == kInvalidFd) return kInvalidFd; + secondary_fd = internal_open(secondary_pty_name, O_RDWR); + if (secondary_fd == kInvalidFd) + return kInvalidFd; // File descriptor actions posix_spawn_file_actions_t acts; @@ -299,9 +301,9 @@ static fd_t internal_spawn_impl(const char *argv[], const char *envp[], posix_spawn_file_actions_destroy(&acts); }); - res = posix_spawn_file_actions_adddup2(&acts, slave_fd, STDIN_FILENO) || - posix_spawn_file_actions_adddup2(&acts, slave_fd, STDOUT_FILENO) || - posix_spawn_file_actions_addclose(&acts, slave_fd); + res = posix_spawn_file_actions_adddup2(&acts, secondary_fd, STDIN_FILENO) || + posix_spawn_file_actions_adddup2(&acts, secondary_fd, STDOUT_FILENO) || + posix_spawn_file_actions_addclose(&acts, secondary_fd); if (res != 0) return kInvalidFd; // Spawn attributes @@ -326,14 +328,14 @@ static fd_t internal_spawn_impl(const char *argv[], const char *envp[], // Disable echo in the new terminal, disable CR. struct termios termflags; - tcgetattr(master_fd, &termflags); + tcgetattr(primary_fd, &termflags); termflags.c_oflag &= ~ONLCR; termflags.c_lflag &= ~ECHO; - tcsetattr(master_fd, TCSANOW, &termflags); + tcsetattr(primary_fd, TCSANOW, &termflags); - // On success, do not close master_fd on scope exit. - fd_t fd = master_fd; - master_fd = kInvalidFd; + // On success, do not close primary_fd on scope exit. + fd_t fd = primary_fd; + primary_fd = kInvalidFd; return fd; } diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_stack_store.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_stack_store.cpp index ad88e2bbbefc..b1c15d8c2834 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_stack_store.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_stack_store.cpp @@ -14,78 +14,187 @@ namespace __sanitizer { -static constexpr u32 kStackSizeBits = 16; - -StackStore::Id StackStore::Store(const StackTrace &trace) { - uptr *stack_trace = Alloc(trace.size + 1); - CHECK_LT(trace.size, 1 << kStackSizeBits); - *stack_trace = trace.size + (trace.tag << kStackSizeBits); - internal_memcpy(stack_trace + 1, trace.trace, trace.size * sizeof(uptr)); - return reinterpret_cast<StackStore::Id>(stack_trace); +namespace { +struct StackTraceHeader { + static constexpr u32 kStackSizeBits = 8; + + u8 size; + u8 tag; + explicit StackTraceHeader(const StackTrace &trace) + : size(Min<uptr>(trace.size, (1u << 8) - 1)), tag(trace.tag) { + CHECK_EQ(trace.tag, static_cast<uptr>(tag)); + } + explicit StackTraceHeader(uptr h) + : size(h & ((1 << kStackSizeBits) - 1)), tag(h >> kStackSizeBits) {} + + uptr ToUptr() const { + return static_cast<uptr>(size) | (static_cast<uptr>(tag) << kStackSizeBits); + } +}; +} // namespace + +StackStore::Id StackStore::Store(const StackTrace &trace, uptr *pack) { + if (!trace.size && !trace.tag) + return 0; + StackTraceHeader h(trace); + uptr idx = 0; + *pack = 0; + uptr *stack_trace = Alloc(h.size + 1, &idx, pack); + *stack_trace = h.ToUptr(); + internal_memcpy(stack_trace + 1, trace.trace, h.size * sizeof(uptr)); + *pack += blocks_[GetBlockIdx(idx)].Stored(h.size + 1); + return OffsetToId(idx); } StackTrace StackStore::Load(Id id) { - const uptr *stack_trace = reinterpret_cast<const uptr *>(id); - uptr size = *stack_trace & ((1 << kStackSizeBits) - 1); - uptr tag = *stack_trace >> kStackSizeBits; - return StackTrace(stack_trace + 1, size, tag); + if (!id) + return {}; + uptr idx = IdToOffset(id); + uptr block_idx = GetBlockIdx(idx); + CHECK_LT(block_idx, ARRAY_SIZE(blocks_)); + const uptr *stack_trace = blocks_[block_idx].GetOrUnpack(); + if (!stack_trace) + return {}; + stack_trace += GetInBlockIdx(idx); + StackTraceHeader h(*stack_trace); + return StackTrace(stack_trace + 1, h.size, h.tag); } -uptr *StackStore::TryAlloc(uptr count) { - // Optimisic lock-free allocation, essentially try to bump the region ptr. +uptr StackStore::Allocated() const { + uptr next_block = GetBlockIdx( + RoundUpTo(atomic_load_relaxed(&total_frames_), kBlockSizeFrames)); + uptr res = 0; + for (uptr i = 0; i < next_block; ++i) res += blocks_[i].Allocated(); + return res + sizeof(*this); +} + +uptr *StackStore::Alloc(uptr count, uptr *idx, uptr *pack) { for (;;) { - uptr cmp = atomic_load(®ion_pos_, memory_order_acquire); - uptr end = atomic_load(®ion_end_, memory_order_acquire); - uptr size = count * sizeof(uptr); - if (cmp == 0 || cmp + size > end) - return nullptr; - if (atomic_compare_exchange_weak(®ion_pos_, &cmp, cmp + size, - memory_order_acquire)) - return reinterpret_cast<uptr *>(cmp); + // Optimisic lock-free allocation, essentially try to bump the + // total_frames_. + uptr start = atomic_fetch_add(&total_frames_, count, memory_order_relaxed); + uptr block_idx = GetBlockIdx(start); + uptr last_idx = GetBlockIdx(start + count - 1); + if (LIKELY(block_idx == last_idx)) { + // Fits into the a single block. + CHECK_LT(block_idx, ARRAY_SIZE(blocks_)); + *idx = start; + return blocks_[block_idx].GetOrCreate() + GetInBlockIdx(start); + } + + // Retry. We can't use range allocated in two different blocks. + CHECK_LE(count, kBlockSizeFrames); + uptr in_first = kBlockSizeFrames - GetInBlockIdx(start); + // Mark tail/head of these blocks as "stored".to avoid waiting before we can + // Pack(). + *pack += blocks_[block_idx].Stored(in_first); + *pack += blocks_[last_idx].Stored(count - in_first); } } -uptr *StackStore::Alloc(uptr count) { - // First, try to allocate optimisitically. - uptr *s = TryAlloc(count); - if (LIKELY(s)) - return s; - return RefillAndAlloc(count); +uptr StackStore::Pack(Compression type) { + uptr res = 0; + for (BlockInfo &b : blocks_) res += b.Pack(type); + return res; +} + +void StackStore::TestOnlyUnmap() { + for (BlockInfo &b : blocks_) b.TestOnlyUnmap(); + internal_memset(this, 0, sizeof(*this)); } -uptr *StackStore::RefillAndAlloc(uptr count) { - // If failed, lock, retry and alloc new superblock. +uptr *StackStore::BlockInfo::Get() const { + // Idiomatic double-checked locking uses memory_order_acquire here. But + // relaxed is find for us, justification is similar to + // TwoLevelMap::GetOrCreate. + return reinterpret_cast<uptr *>(atomic_load_relaxed(&data_)); +} + +uptr *StackStore::BlockInfo::Create() { SpinMutexLock l(&mtx_); - for (;;) { - uptr *s = TryAlloc(count); - if (s) - return s; - atomic_store(®ion_pos_, 0, memory_order_relaxed); - uptr size = count * sizeof(uptr) + sizeof(BlockInfo); - uptr allocsz = RoundUpTo(Max<uptr>(size, 64u * 1024u), GetPageSizeCached()); - uptr mem = (uptr)MmapOrDie(allocsz, "stack depot"); - BlockInfo *new_block = (BlockInfo *)(mem + allocsz) - 1; - new_block->next = curr_; - new_block->ptr = mem; - new_block->size = allocsz; - curr_ = new_block; - - atomic_fetch_add(&mapped_size_, allocsz, memory_order_relaxed); - - allocsz -= sizeof(BlockInfo); - atomic_store(®ion_end_, mem + allocsz, memory_order_release); - atomic_store(®ion_pos_, mem, memory_order_release); + uptr *ptr = Get(); + if (!ptr) { + ptr = reinterpret_cast<uptr *>( + MmapNoReserveOrDie(kBlockSizeBytes, "StackStore")); + atomic_store(&data_, reinterpret_cast<uptr>(ptr), memory_order_release); } + return ptr; } -void StackStore::TestOnlyUnmap() { - while (curr_) { - uptr mem = curr_->ptr; - uptr allocsz = curr_->size; - curr_ = curr_->next; - UnmapOrDie((void *)mem, allocsz); +uptr *StackStore::BlockInfo::GetOrCreate() { + uptr *ptr = Get(); + if (LIKELY(ptr)) + return ptr; + return Create(); +} + +uptr *StackStore::BlockInfo::GetOrUnpack() { + SpinMutexLock l(&mtx_); + switch (state) { + case State::Storing: + state = State::Unpacked; + FALLTHROUGH; + case State::Unpacked: + return Get(); + case State::Packed: + break; } - internal_memset(this, 0, sizeof(*this)); + + uptr *ptr = Get(); + CHECK_NE(nullptr, ptr); + // Fake unpacking. + for (uptr i = 0; i < kBlockSizeFrames; ++i) ptr[i] = ~ptr[i]; + state = State::Unpacked; + return Get(); +} + +uptr StackStore::BlockInfo::Pack(Compression type) { + if (type == Compression::None) + return 0; + + SpinMutexLock l(&mtx_); + switch (state) { + case State::Unpacked: + case State::Packed: + return 0; + case State::Storing: + break; + } + + uptr *ptr = Get(); + if (!ptr || !Stored(0)) + return 0; + + // Fake packing. + for (uptr i = 0; i < kBlockSizeFrames; ++i) ptr[i] = ~ptr[i]; + state = State::Packed; + return kBlockSizeBytes - kBlockSizeBytes / 10; +} + +uptr StackStore::BlockInfo::Allocated() const { + SpinMutexLock l(&mtx_); + switch (state) { + case State::Packed: + return kBlockSizeBytes / 10; + case State::Unpacked: + case State::Storing: + return kBlockSizeBytes; + } +} + +void StackStore::BlockInfo::TestOnlyUnmap() { + if (uptr *ptr = Get()) + UnmapOrDie(ptr, StackStore::kBlockSizeBytes); +} + +bool StackStore::BlockInfo::Stored(uptr n) { + return n + atomic_fetch_add(&stored_, n, memory_order_release) == + kBlockSizeFrames; +} + +bool StackStore::BlockInfo::IsPacked() const { + SpinMutexLock l(&mtx_); + return state == State::Packed; } } // namespace __sanitizer diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_stack_store.h b/compiler-rt/lib/sanitizer_common/sanitizer_stack_store.h index b5bbdccc20b1..e0bc4e9c4a45 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_stack_store.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_stack_store.h @@ -10,6 +10,7 @@ #define SANITIZER_STACK_STORE_H #include "sanitizer_atomic.h" +#include "sanitizer_common.h" #include "sanitizer_internal_defs.h" #include "sanitizer_mutex.h" #include "sanitizer_stacktrace.h" @@ -17,32 +18,91 @@ namespace __sanitizer { class StackStore { + static constexpr uptr kBlockSizeFrames = 0x100000; + static constexpr uptr kBlockCount = 0x1000; + static constexpr uptr kBlockSizeBytes = kBlockSizeFrames * sizeof(uptr); + public: + enum class Compression : u8 { + None = 0, + Test, + }; + constexpr StackStore() = default; - using Id = uptr; + using Id = u32; // Enough for 2^32 * sizeof(uptr) bytes of traces. + static_assert(u64(kBlockCount) * kBlockSizeFrames == 1ull << (sizeof(Id) * 8), + ""); - Id Store(const StackTrace &trace); + Id Store(const StackTrace &trace, + uptr *pack /* number of blocks completed by this call */); StackTrace Load(Id id); - uptr Allocated() const { return atomic_load_relaxed(&mapped_size_); } + uptr Allocated() const; + + // Packs all blocks which don't expect any more writes. A block is going to be + // packed once. As soon trace from that block was requested, it will unpack + // and stay unpacked after that. + // Returns the number of released bytes. + uptr Pack(Compression type); void TestOnlyUnmap(); private: - uptr *Alloc(uptr count = 1); - uptr *TryAlloc(uptr count); - uptr *RefillAndAlloc(uptr count); - mutable StaticSpinMutex mtx_ = {}; // Protects alloc of new blocks. - atomic_uintptr_t region_pos_ = {}; // Region allocator for Node's. - atomic_uintptr_t region_end_ = {}; - atomic_uintptr_t mapped_size_ = {}; - - struct BlockInfo { - const BlockInfo *next; - uptr ptr; - uptr size; + friend class StackStoreTest; + static constexpr uptr GetBlockIdx(uptr frame_idx) { + return frame_idx / kBlockSizeFrames; + } + + static constexpr uptr GetInBlockIdx(uptr frame_idx) { + return frame_idx % kBlockSizeFrames; + } + + static constexpr uptr IdToOffset(Id id) { + CHECK_NE(id, 0); + return id - 1; // Avoid zero as id. + } + + static constexpr uptr OffsetToId(Id id) { + // This makes UINT32_MAX to 0 and it will be retrived as and empty stack. + // But this is not a problem as we will not be able to store anything after + // that anyway. + return id + 1; // Avoid zero as id. + } + + uptr *Alloc(uptr count, uptr *idx, uptr *pack); + + // Total number of allocated frames. + atomic_uintptr_t total_frames_ = {}; + + // Each block will hold pointer to exactly kBlockSizeFrames. + class BlockInfo { + atomic_uintptr_t data_; + // Counter to track store progress to know when we can Pack() the block. + atomic_uint32_t stored_; + // Protects alloc of new blocks. + mutable StaticSpinMutex mtx_; + + enum class State : u8 { + Storing = 0, + Packed, + Unpacked, + }; + State state GUARDED_BY(mtx_); + + uptr *Create(); + + public: + uptr *Get() const; + uptr *GetOrCreate(); + uptr *GetOrUnpack(); + uptr Pack(Compression type); + uptr Allocated() const; + void TestOnlyUnmap(); + bool Stored(uptr n); + bool IsPacked() const; }; - const BlockInfo *curr_ = nullptr; + + BlockInfo blocks_[kBlockCount] = {}; }; } // namespace __sanitizer diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_stackdepot.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_stackdepot.cpp index e203b2cc4c89..527221b0c85c 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_stackdepot.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_stackdepot.cpp @@ -23,6 +23,7 @@ struct StackDepotNode { using hash_type = u64; hash_type stack_hash; u32 link; + StackStore::Id store_id; static const u32 kTabSizeLog = SANITIZER_ANDROID ? 16 : 20; @@ -53,11 +54,6 @@ static StackStore stackStore; typedef StackDepotBase<StackDepotNode, 1, StackDepotNode::kTabSizeLog> StackDepot; static StackDepot theDepot; -// Keep rarely accessed stack traces out of frequently access nodes to improve -// caching efficiency. -static TwoLevelMap<StackStore::Id, StackDepot::kNodesSize1, - StackDepot::kNodesSize2> - storeIds; // Keep mutable data out of frequently access nodes to improve caching // efficiency. static TwoLevelMap<atomic_uint32_t, StackDepot::kNodesSize1, @@ -73,17 +69,18 @@ void StackDepotHandle::inc_use_count_unsafe() { } uptr StackDepotNode::allocated() { - return stackStore.Allocated() + storeIds.MemoryUsage() + - useCounts.MemoryUsage(); + return stackStore.Allocated() + useCounts.MemoryUsage(); } void StackDepotNode::store(u32 id, const args_type &args, hash_type hash) { stack_hash = hash; - storeIds[id] = stackStore.Store(args); + uptr pack = 0; + store_id = stackStore.Store(args, &pack); + if (pack) + stackStore.Pack(StackStore::Compression::None); } StackDepotNode::args_type StackDepotNode::load(u32 id) const { - StackStore::Id store_id = storeIds[id]; if (!store_id) return {}; return stackStore.Load(store_id); @@ -121,7 +118,6 @@ StackDepotHandle StackDepotNode::get_handle(u32 id) { void StackDepotTestOnlyUnmap() { theDepot.TestOnlyUnmap(); - storeIds.TestOnlyUnmap(); stackStore.TestOnlyUnmap(); } diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace.h b/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace.h index 11c6154b09ea..aebd504669d2 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace.h @@ -20,7 +20,7 @@ namespace __sanitizer { struct BufferedStackTrace; -static const u32 kStackTraceMax = 256; +static const u32 kStackTraceMax = 255; #if SANITIZER_LINUX && defined(__mips__) # define SANITIZER_CAN_FAST_UNWIND 0 diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_win.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_win.cpp index c3607dbed23e..1a31ce02af4c 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_win.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_win.cpp @@ -337,6 +337,11 @@ bool MprotectNoAccess(uptr addr, uptr size) { return VirtualProtect((LPVOID)addr, size, PAGE_NOACCESS, &old_protection); } +bool MprotectReadOnly(uptr addr, uptr size) { + DWORD old_protection; + return VirtualProtect((LPVOID)addr, size, PAGE_READONLY, &old_protection); +} + void ReleaseMemoryPagesToOS(uptr beg, uptr end) { uptr beg_aligned = RoundDownTo(beg, GetPageSizeCached()), end_aligned = RoundDownTo(end, GetPageSizeCached()); diff --git a/compiler-rt/lib/tsan/rtl/tsan_defs.h b/compiler-rt/lib/tsan/rtl/tsan_defs.h index fe0c1da31599..4712c2be1813 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_defs.h +++ b/compiler-rt/lib/tsan/rtl/tsan_defs.h @@ -228,6 +228,7 @@ enum MutexType { MutexTypeFired, MutexTypeRacy, MutexTypeGlobalProc, + MutexTypeInternalAlloc, }; } // namespace __tsan diff --git a/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp b/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp index 25dbe487b280..cf3dc90d96a1 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp @@ -177,6 +177,7 @@ struct ThreadSignalContext { struct AtExitCtx { void (*f)(); void *arg; + uptr pc; }; // InterceptorContext holds all global data required for interceptors. @@ -367,7 +368,10 @@ TSAN_INTERCEPTOR(int, pause, int fake) { return BLOCK_REAL(pause)(fake); } -static void at_exit_wrapper() { +// Note: we specifically call the function in such strange way +// with "installed_at" because in reports it will appear between +// callback frames and the frame that installed the callback. +static void at_exit_callback_installed_at() { AtExitCtx *ctx; { // Ensure thread-safety. @@ -379,15 +383,21 @@ static void at_exit_wrapper() { interceptor_ctx()->AtExitStack.PopBack(); } - Acquire(cur_thread(), (uptr)0, (uptr)ctx); + ThreadState *thr = cur_thread(); + Acquire(thr, ctx->pc, (uptr)ctx); + FuncEntry(thr, ctx->pc); ((void(*)())ctx->f)(); + FuncExit(thr); Free(ctx); } -static void cxa_at_exit_wrapper(void *arg) { - Acquire(cur_thread(), 0, (uptr)arg); +static void cxa_at_exit_callback_installed_at(void *arg) { + ThreadState *thr = cur_thread(); AtExitCtx *ctx = (AtExitCtx*)arg; + Acquire(thr, ctx->pc, (uptr)arg); + FuncEntry(thr, ctx->pc); ((void(*)(void *arg))ctx->f)(ctx->arg); + FuncExit(thr); Free(ctx); } @@ -401,7 +411,7 @@ TSAN_INTERCEPTOR(int, atexit, void (*f)()) { // We want to setup the atexit callback even if we are in ignored lib // or after fork. SCOPED_INTERCEPTOR_RAW(atexit, f); - return setup_at_exit_wrapper(thr, pc, (void(*)())f, 0, 0); + return setup_at_exit_wrapper(thr, GET_CALLER_PC(), (void (*)())f, 0, 0); } #endif @@ -409,7 +419,7 @@ TSAN_INTERCEPTOR(int, __cxa_atexit, void (*f)(void *a), void *arg, void *dso) { if (in_symbolizer()) return 0; SCOPED_TSAN_INTERCEPTOR(__cxa_atexit, f, arg, dso); - return setup_at_exit_wrapper(thr, pc, (void(*)())f, arg, dso); + return setup_at_exit_wrapper(thr, GET_CALLER_PC(), (void (*)())f, arg, dso); } static int setup_at_exit_wrapper(ThreadState *thr, uptr pc, void(*f)(), @@ -417,6 +427,7 @@ static int setup_at_exit_wrapper(ThreadState *thr, uptr pc, void(*f)(), auto *ctx = New<AtExitCtx>(); ctx->f = f; ctx->arg = arg; + ctx->pc = pc; Release(thr, pc, (uptr)ctx); // Memory allocation in __cxa_atexit will race with free during exit, // because we do not see synchronization around atexit callback list. @@ -432,25 +443,27 @@ static int setup_at_exit_wrapper(ThreadState *thr, uptr pc, void(*f)(), // due to atexit_mu held on exit from the calloc interceptor. ScopedIgnoreInterceptors ignore; - res = REAL(__cxa_atexit)((void (*)(void *a))at_exit_wrapper, 0, 0); + res = REAL(__cxa_atexit)((void (*)(void *a))at_exit_callback_installed_at, + 0, 0); // Push AtExitCtx on the top of the stack of callback functions if (!res) { interceptor_ctx()->AtExitStack.PushBack(ctx); } } else { - res = REAL(__cxa_atexit)(cxa_at_exit_wrapper, ctx, dso); + res = REAL(__cxa_atexit)(cxa_at_exit_callback_installed_at, ctx, dso); } ThreadIgnoreEnd(thr); return res; } #if !SANITIZER_MAC && !SANITIZER_NETBSD -static void on_exit_wrapper(int status, void *arg) { +static void on_exit_callback_installed_at(int status, void *arg) { ThreadState *thr = cur_thread(); - uptr pc = 0; - Acquire(thr, pc, (uptr)arg); AtExitCtx *ctx = (AtExitCtx*)arg; + Acquire(thr, ctx->pc, (uptr)arg); + FuncEntry(thr, ctx->pc); ((void(*)(int status, void *arg))ctx->f)(status, ctx->arg); + FuncExit(thr); Free(ctx); } @@ -461,11 +474,12 @@ TSAN_INTERCEPTOR(int, on_exit, void(*f)(int, void*), void *arg) { auto *ctx = New<AtExitCtx>(); ctx->f = (void(*)())f; ctx->arg = arg; + ctx->pc = GET_CALLER_PC(); Release(thr, pc, (uptr)ctx); // Memory allocation in __cxa_atexit will race with free during exit, // because we do not see synchronization around atexit callback list. ThreadIgnoreBegin(thr, pc); - int res = REAL(on_exit)(on_exit_wrapper, ctx); + int res = REAL(on_exit)(on_exit_callback_installed_at, ctx); ThreadIgnoreEnd(thr); return res; } @@ -2363,6 +2377,15 @@ static void HandleRecvmsg(ThreadState *thr, uptr pc, if (fd >= 0) FdClose(thr, pc, fd); \ } +#define COMMON_INTERCEPTOR_DLOPEN(filename, flag) \ + ({ \ + CheckNoDeepBind(filename, flag); \ + ThreadIgnoreBegin(thr, 0); \ + void *res = REAL(dlopen)(filename, flag); \ + ThreadIgnoreEnd(thr); \ + res; \ + }) + #define COMMON_INTERCEPTOR_LIBRARY_LOADED(filename, handle) \ libignore()->OnLibraryLoaded(filename) diff --git a/compiler-rt/lib/tsan/rtl/tsan_mman.cpp b/compiler-rt/lib/tsan/rtl/tsan_mman.cpp index ef97ad0bc94e..a31bebcb6ba9 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_mman.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_mman.cpp @@ -69,8 +69,17 @@ Allocator *allocator() { struct GlobalProc { Mutex mtx; Processor *proc; - - GlobalProc() : mtx(MutexTypeGlobalProc), proc(ProcCreate()) {} + // This mutex represents the internal allocator combined for + // the purposes of deadlock detection. The internal allocator + // uses multiple mutexes, moreover they are locked only occasionally + // and they are spin mutexes which don't support deadlock detection. + // So we use this fake mutex to serve as a substitute for these mutexes. + CheckedMutex internal_alloc_mtx; + + GlobalProc() + : mtx(MutexTypeGlobalProc), + proc(ProcCreate()), + internal_alloc_mtx(MutexTypeInternalAlloc) {} }; static char global_proc_placeholder[sizeof(GlobalProc)] ALIGNED(64); @@ -78,6 +87,11 @@ GlobalProc *global_proc() { return reinterpret_cast<GlobalProc*>(&global_proc_placeholder); } +static void InternalAllocAccess() { + global_proc()->internal_alloc_mtx.Lock(); + global_proc()->internal_alloc_mtx.Unlock(); +} + ScopedGlobalProcessor::ScopedGlobalProcessor() { GlobalProc *gp = global_proc(); ThreadState *thr = cur_thread(); @@ -110,6 +124,18 @@ ScopedGlobalProcessor::~ScopedGlobalProcessor() { gp->mtx.Unlock(); } +void AllocatorLock() NO_THREAD_SAFETY_ANALYSIS { + global_proc()->mtx.Lock(); + global_proc()->internal_alloc_mtx.Lock(); + InternalAllocatorLock(); +} + +void AllocatorUnlock() NO_THREAD_SAFETY_ANALYSIS { + InternalAllocatorUnlock(); + global_proc()->internal_alloc_mtx.Unlock(); + global_proc()->mtx.Unlock(); +} + static constexpr uptr kMaxAllowedMallocSize = 1ull << 40; static uptr max_user_defined_malloc_size; @@ -342,6 +368,7 @@ void *Alloc(uptr sz) { thr->nomalloc = 0; // CHECK calls internal_malloc(). CHECK(0); } + InternalAllocAccess(); return InternalAlloc(sz, &thr->proc()->internal_alloc_cache); } @@ -351,6 +378,7 @@ void FreeImpl(void *p) { thr->nomalloc = 0; // CHECK calls internal_malloc(). CHECK(0); } + InternalAllocAccess(); InternalFree(p, &thr->proc()->internal_alloc_cache); } diff --git a/compiler-rt/lib/tsan/rtl/tsan_mman.h b/compiler-rt/lib/tsan/rtl/tsan_mman.h index efea5e5abdec..db8488eabbe2 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_mman.h +++ b/compiler-rt/lib/tsan/rtl/tsan_mman.h @@ -24,6 +24,8 @@ void ReplaceSystemMalloc(); void AllocatorProcStart(Processor *proc); void AllocatorProcFinish(Processor *proc); void AllocatorPrintStats(); +void AllocatorLock(); +void AllocatorUnlock(); // For user allocations. void *user_alloc_internal(ThreadState *thr, uptr pc, uptr sz, diff --git a/compiler-rt/lib/tsan/rtl/tsan_platform_mac.cpp b/compiler-rt/lib/tsan/rtl/tsan_platform_mac.cpp index 3faa2d0c6192..1465f9953c19 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_platform_mac.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_platform_mac.cpp @@ -25,6 +25,7 @@ #include "tsan_rtl.h" #include "tsan_flags.h" +#include <limits.h> #include <mach/mach.h> #include <pthread.h> #include <signal.h> @@ -45,70 +46,83 @@ namespace __tsan { #if !SANITIZER_GO -static void *SignalSafeGetOrAllocate(uptr *dst, uptr size) { - atomic_uintptr_t *a = (atomic_uintptr_t *)dst; - void *val = (void *)atomic_load_relaxed(a); - atomic_signal_fence(memory_order_acquire); // Turns the previous load into - // acquire wrt signals. - if (UNLIKELY(val == nullptr)) { - val = (void *)internal_mmap(nullptr, size, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANON, -1, 0); - CHECK(val); - void *cmp = nullptr; - if (!atomic_compare_exchange_strong(a, (uintptr_t *)&cmp, (uintptr_t)val, - memory_order_acq_rel)) { - internal_munmap(val, size); - val = cmp; - } - } - return val; +static char main_thread_state[sizeof(ThreadState)] ALIGNED( + SANITIZER_CACHE_LINE_SIZE); +static ThreadState *dead_thread_state; +static pthread_key_t thread_state_key; + +// We rely on the following documented, but Darwin-specific behavior to keep the +// reference to the ThreadState object alive in TLS: +// pthread_key_create man page: +// If, after all the destructors have been called for all non-NULL values with +// associated destructors, there are still some non-NULL values with +// associated destructors, then the process is repeated. If, after at least +// [PTHREAD_DESTRUCTOR_ITERATIONS] iterations of destructor calls for +// outstanding non-NULL values, there are still some non-NULL values with +// associated destructors, the implementation stops calling destructors. +static_assert(PTHREAD_DESTRUCTOR_ITERATIONS == 4, "Small number of iterations"); +static void ThreadStateDestructor(void *thr) { + int res = pthread_setspecific(thread_state_key, thr); + CHECK_EQ(res, 0); } -// On OS X, accessing TLVs via __thread or manually by using pthread_key_* is -// problematic, because there are several places where interceptors are called -// when TLVs are not accessible (early process startup, thread cleanup, ...). -// The following provides a "poor man's TLV" implementation, where we use the -// shadow memory of the pointer returned by pthread_self() to store a pointer to -// the ThreadState object. The main thread's ThreadState is stored separately -// in a static variable, because we need to access it even before the -// shadow memory is set up. -static uptr main_thread_identity = 0; -ALIGNED(64) static char main_thread_state[sizeof(ThreadState)]; -static ThreadState *main_thread_state_loc = (ThreadState *)main_thread_state; - -// We cannot use pthread_self() before libpthread has been initialized. Our -// current heuristic for guarding this is checking `main_thread_identity` which -// is only assigned in `__tsan::InitializePlatform`. -static ThreadState **cur_thread_location() { - if (main_thread_identity == 0) - return &main_thread_state_loc; - uptr thread_identity = (uptr)pthread_self(); - if (thread_identity == main_thread_identity) - return &main_thread_state_loc; - return (ThreadState **)MemToShadow(thread_identity); +static void InitializeThreadStateStorage() { + int res; + CHECK_EQ(thread_state_key, 0); + res = pthread_key_create(&thread_state_key, ThreadStateDestructor); + CHECK_EQ(res, 0); + res = pthread_setspecific(thread_state_key, main_thread_state); + CHECK_EQ(res, 0); + + auto dts = (ThreadState *)MmapOrDie(sizeof(ThreadState), "ThreadState"); + dts->fast_state.SetIgnoreBit(); + dts->ignore_interceptors = 1; + dts->is_dead = true; + const_cast<Tid &>(dts->tid) = kInvalidTid; + res = internal_mprotect(dts, sizeof(ThreadState), PROT_READ); // immutable + CHECK_EQ(res, 0); + dead_thread_state = dts; } ThreadState *cur_thread() { - return (ThreadState *)SignalSafeGetOrAllocate( - (uptr *)cur_thread_location(), sizeof(ThreadState)); + // Some interceptors get called before libpthread has been initialized and in + // these cases we must avoid calling any pthread APIs. + if (UNLIKELY(!thread_state_key)) { + return (ThreadState *)main_thread_state; + } + + // We only reach this line after InitializeThreadStateStorage() ran, i.e, + // after TSan (and therefore libpthread) have been initialized. + ThreadState *thr = (ThreadState *)pthread_getspecific(thread_state_key); + if (UNLIKELY(!thr)) { + thr = (ThreadState *)MmapOrDie(sizeof(ThreadState), "ThreadState"); + int res = pthread_setspecific(thread_state_key, thr); + CHECK_EQ(res, 0); + } + return thr; } void set_cur_thread(ThreadState *thr) { - *cur_thread_location() = thr; + int res = pthread_setspecific(thread_state_key, thr); + CHECK_EQ(res, 0); } -// TODO(kuba.brecka): This is not async-signal-safe. In particular, we call -// munmap first and then clear `fake_tls`; if we receive a signal in between, -// handler will try to access the unmapped ThreadState. void cur_thread_finalize() { - ThreadState **thr_state_loc = cur_thread_location(); - if (thr_state_loc == &main_thread_state_loc) { + ThreadState *thr = (ThreadState *)pthread_getspecific(thread_state_key); + CHECK(thr); + if (thr == (ThreadState *)main_thread_state) { // Calling dispatch_main() or xpc_main() actually invokes pthread_exit to // exit the main thread. Let's keep the main thread's ThreadState. return; } - internal_munmap(*thr_state_loc, sizeof(ThreadState)); - *thr_state_loc = nullptr; + // Intercepted functions can still get called after cur_thread_finalize() + // (called from DestroyThreadState()), so put a fake thread state for "dead" + // threads. An alternative solution would be to release the ThreadState + // object from THREAD_DESTROY (which is delivered later and on the parent + // thread) instead of THREAD_TERMINATE. + int res = pthread_setspecific(thread_state_key, dead_thread_state); + CHECK_EQ(res, 0); + UnmapOrDie(thr, sizeof(ThreadState)); } #endif @@ -222,11 +236,10 @@ static void my_pthread_introspection_hook(unsigned int event, pthread_t thread, ThreadStart(thr, tid, GetTid(), ThreadType::Worker); } } else if (event == PTHREAD_INTROSPECTION_THREAD_TERMINATE) { - if (thread == pthread_self()) { - ThreadState *thr = cur_thread(); - if (thr->tctx) { - DestroyThreadState(); - } + CHECK_EQ(thread, pthread_self()); + ThreadState *thr = cur_thread(); + if (thr->tctx) { + DestroyThreadState(); } } @@ -253,8 +266,7 @@ void InitializePlatform() { #if !SANITIZER_GO CheckAndProtect(); - CHECK_EQ(main_thread_identity, 0); - main_thread_identity = (uptr)pthread_self(); + InitializeThreadStateStorage(); prev_pthread_introspection_hook = pthread_introspection_hook_install(&my_pthread_introspection_hook); @@ -286,24 +298,11 @@ uptr ExtractLongJmpSp(uptr *env) { extern "C" void __tsan_tls_initialization() {} void ImitateTlsWrite(ThreadState *thr, uptr tls_addr, uptr tls_size) { - // The pointer to the ThreadState object is stored in the shadow memory - // of the tls. - uptr tls_end = tls_addr + tls_size; - uptr thread_identity = (uptr)pthread_self(); const uptr pc = StackTrace::GetNextInstructionPc( reinterpret_cast<uptr>(__tsan_tls_initialization)); - if (thread_identity == main_thread_identity) { - MemoryRangeImitateWrite(thr, pc, tls_addr, tls_size); - } else { - uptr thr_state_start = thread_identity; - uptr thr_state_end = thr_state_start + sizeof(uptr); - CHECK_GE(thr_state_start, tls_addr); - CHECK_LE(thr_state_start, tls_addr + tls_size); - CHECK_GE(thr_state_end, tls_addr); - CHECK_LE(thr_state_end, tls_addr + tls_size); - MemoryRangeImitateWrite(thr, pc, tls_addr, thr_state_start - tls_addr); - MemoryRangeImitateWrite(thr, pc, thr_state_end, tls_end - thr_state_end); - } + // Unlike Linux, we only store a pointer to the ThreadState object in TLS; + // just mark the entire range as written to. + MemoryRangeImitateWrite(thr, pc, tls_addr, tls_size); } #endif diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp b/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp index ff7726ef0608..c14af9788e32 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp @@ -34,6 +34,9 @@ extern "C" void __tsan_resume() { __tsan_resumed = 1; } +SANITIZER_WEAK_DEFAULT_IMPL +void __tsan_test_only_on_fork() {} + namespace __tsan { #if !SANITIZER_GO @@ -271,8 +274,39 @@ void DontNeedShadowFor(uptr addr, uptr size) { } #if !SANITIZER_GO +// We call UnmapShadow before the actual munmap, at that point we don't yet +// know if the provided address/size are sane. We can't call UnmapShadow +// after the actual munmap becuase at that point the memory range can +// already be reused for something else, so we can't rely on the munmap +// return value to understand is the values are sane. +// While calling munmap with insane values (non-canonical address, negative +// size, etc) is an error, the kernel won't crash. We must also try to not +// crash as the failure mode is very confusing (paging fault inside of the +// runtime on some derived shadow address). +static bool IsValidMmapRange(uptr addr, uptr size) { + if (size == 0) + return true; + if (static_cast<sptr>(size) < 0) + return false; + if (!IsAppMem(addr) || !IsAppMem(addr + size - 1)) + return false; + // Check that if the start of the region belongs to one of app ranges, + // end of the region belongs to the same region. + const uptr ranges[][2] = { + {LoAppMemBeg(), LoAppMemEnd()}, + {MidAppMemBeg(), MidAppMemEnd()}, + {HiAppMemBeg(), HiAppMemEnd()}, + }; + for (auto range : ranges) { + if (addr >= range[0] && addr < range[1]) + return addr + size <= range[1]; + } + return false; +} + void UnmapShadow(ThreadState *thr, uptr addr, uptr size) { - if (size == 0) return; + if (size == 0 || !IsValidMmapRange(addr, size)) + return; DontNeedShadowFor(addr, size); ScopedGlobalProcessor sgp; ctx->metamap.ResetRange(thr->proc(), addr, size); @@ -491,6 +525,7 @@ void ForkBefore(ThreadState *thr, uptr pc) NO_THREAD_SAFETY_ANALYSIS { ctx->thread_registry.Lock(); ctx->report_mtx.Lock(); ScopedErrorReportLock::Lock(); + AllocatorLock(); // Suppress all reports in the pthread_atfork callbacks. // Reports will deadlock on the report_mtx. // We could ignore sync operations as well, @@ -499,12 +534,20 @@ void ForkBefore(ThreadState *thr, uptr pc) NO_THREAD_SAFETY_ANALYSIS { thr->suppress_reports++; // On OS X, REAL(fork) can call intercepted functions (OSSpinLockLock), and // we'll assert in CheckNoLocks() unless we ignore interceptors. + // On OS X libSystem_atfork_prepare/parent/child callbacks are called + // after/before our callbacks and they call free. thr->ignore_interceptors++; + // Disables memory write in OnUserAlloc/Free. + thr->ignore_reads_and_writes++; + + __tsan_test_only_on_fork(); } void ForkParentAfter(ThreadState *thr, uptr pc) NO_THREAD_SAFETY_ANALYSIS { thr->suppress_reports--; // Enabled in ForkBefore. thr->ignore_interceptors--; + thr->ignore_reads_and_writes--; + AllocatorUnlock(); ScopedErrorReportLock::Unlock(); ctx->report_mtx.Unlock(); ctx->thread_registry.Unlock(); @@ -514,6 +557,8 @@ void ForkChildAfter(ThreadState *thr, uptr pc, bool start_thread) NO_THREAD_SAFETY_ANALYSIS { thr->suppress_reports--; // Enabled in ForkBefore. thr->ignore_interceptors--; + thr->ignore_reads_and_writes--; + AllocatorUnlock(); ScopedErrorReportLock::Unlock(); ctx->report_mtx.Unlock(); ctx->thread_registry.Unlock(); @@ -747,14 +792,17 @@ using namespace __tsan; MutexMeta mutex_meta[] = { {MutexInvalid, "Invalid", {}}, {MutexThreadRegistry, "ThreadRegistry", {}}, - {MutexTypeTrace, "Trace", {MutexLeaf}}, - {MutexTypeReport, "Report", {MutexTypeSyncVar}}, - {MutexTypeSyncVar, "SyncVar", {}}, + {MutexTypeTrace, "Trace", {}}, + {MutexTypeReport, + "Report", + {MutexTypeSyncVar, MutexTypeGlobalProc, MutexTypeTrace}}, + {MutexTypeSyncVar, "SyncVar", {MutexTypeTrace}}, {MutexTypeAnnotations, "Annotations", {}}, {MutexTypeAtExit, "AtExit", {MutexTypeSyncVar}}, {MutexTypeFired, "Fired", {MutexLeaf}}, {MutexTypeRacy, "Racy", {MutexLeaf}}, {MutexTypeGlobalProc, "GlobalProc", {}}, + {MutexTypeInternalAlloc, "InternalAlloc", {MutexLeaf}}, {}, }; diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp b/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp index 811695d144c5..f332a6a8d1d8 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp @@ -346,7 +346,7 @@ void ScopedReportBase::AddLocation(uptr addr, uptr size) { ThreadContext *tctx = FindThreadByTidLocked(b->tid); auto *loc = New<ReportLocation>(); loc->type = ReportLocationHeap; - loc->heap_chunk_start = (uptr)allocator()->GetBlockBegin((void *)addr); + loc->heap_chunk_start = block_begin; loc->heap_chunk_size = b->siz; loc->external_tag = b->tag; loc->tid = tctx ? tctx->tid : b->tid; diff --git a/libcxx/CREDITS.TXT b/libcxx/CREDITS.TXT index 597c5fcb7cf4..fc442f4db1a1 100644 --- a/libcxx/CREDITS.TXT +++ b/libcxx/CREDITS.TXT @@ -149,6 +149,10 @@ N: Klaas de Vries E: klaas at klaasgaaf dot nl D: Minor bug fix. +N: Mark de Wever +E: koraq at xs4all dot nl +D: Format library support. + N: Zhang Xiongpang E: zhangxiongpang@gmail.com D: Minor patches and bug fixes. diff --git a/libcxx/include/__bit/byteswap.h b/libcxx/include/__bit/byteswap.h new file mode 100644 index 000000000000..970074ed98ce --- /dev/null +++ b/libcxx/include/__bit/byteswap.h @@ -0,0 +1,55 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___BIT_BYTESWAP_H +#define _LIBCPP___BIT_BYTESWAP_H + +#include <__concepts/arithmetic.h> +#include <__config> +#include <cstdint> +#include <cstdlib> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if _LIBCPP_STD_VER > 20 && !defined(_LIBCPP_HAS_NO_CONCEPTS) + +template <integral _Tp> +_LIBCPP_HIDE_FROM_ABI constexpr _Tp byteswap(_Tp __val) noexcept { + + if constexpr (sizeof(_Tp) == 1) { + return __val; + } else if constexpr (sizeof(_Tp) == 2) { + return __builtin_bswap16(__val); + } else if constexpr (sizeof(_Tp) == 4) { + return __builtin_bswap32(__val); + } else if constexpr (sizeof(_Tp) == 8) { + return __builtin_bswap64(__val); +#ifndef _LIBCPP_HAS_NO_INT128 + } else if constexpr (sizeof(_Tp) == 16) { +#if __has_builtin(__builtin_bswap128) + return __builtin_bswap128(__val); +#else + return static_cast<_Tp>(byteswap(static_cast<uint64_t>(__val))) << 64 | + static_cast<_Tp>(byteswap(static_cast<uint64_t>(__val >> 64))); +#endif // __has_builtin(__builtin_bswap128) +#endif // _LIBCPP_HAS_NO_INT128 + } else { + static_assert(sizeof(_Tp) == 0, "byteswap is unimplemented for integral types of this size"); + } +} + +#endif // _LIBCPP_STD_VER > 20 && !defined(_LIBCPP_HAS_NO_CONCEPTS) + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___BIT_BYTESWAP_H diff --git a/libcxx/include/__bsd_locale_fallbacks.h b/libcxx/include/__bsd_locale_fallbacks.h index 2d5c2eca4679..a5788d9777b5 100644 --- a/libcxx/include/__bsd_locale_fallbacks.h +++ b/libcxx/include/__bsd_locale_fallbacks.h @@ -108,7 +108,7 @@ size_t __libcpp_mbsrtowcs_l(wchar_t *__dest, const char **__src, size_t __len, } #endif -inline +inline _LIBCPP_ATTRIBUTE_FORMAT(__printf__, 4, 5) int __libcpp_snprintf_l(char *__s, size_t __n, locale_t __l, const char *__format, ...) { va_list __va; va_start(__va, __format); @@ -118,7 +118,7 @@ int __libcpp_snprintf_l(char *__s, size_t __n, locale_t __l, const char *__forma return __res; } -inline +inline _LIBCPP_ATTRIBUTE_FORMAT(__printf__, 3, 4) int __libcpp_asprintf_l(char **__s, locale_t __l, const char *__format, ...) { va_list __va; va_start(__va, __format); @@ -128,7 +128,7 @@ int __libcpp_asprintf_l(char **__s, locale_t __l, const char *__format, ...) { return __res; } -inline +inline _LIBCPP_ATTRIBUTE_FORMAT(__scanf__, 3, 4) int __libcpp_sscanf_l(const char *__s, locale_t __l, const char *__format, ...) { va_list __va; va_start(__va, __format); diff --git a/libcxx/include/__compare/partial_order.h b/libcxx/include/__compare/partial_order.h new file mode 100644 index 000000000000..ac8b405a4090 --- /dev/null +++ b/libcxx/include/__compare/partial_order.h @@ -0,0 +1,71 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___COMPARE_PARTIAL_ORDER +#define _LIBCPP___COMPARE_PARTIAL_ORDER + +#include <__compare/compare_three_way.h> +#include <__compare/ordering.h> +#include <__compare/weak_order.h> +#include <__config> +#include <__utility/forward.h> +#include <__utility/priority_tag.h> +#include <type_traits> + +#ifndef _LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER +#pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS) + +// [cmp.alg] +namespace __partial_order { + struct __fn { + template<class _Tp, class _Up> + requires is_same_v<decay_t<_Tp>, decay_t<_Up>> + _LIBCPP_HIDE_FROM_ABI static constexpr auto + __go(_Tp&& __t, _Up&& __u, __priority_tag<2>) + noexcept(noexcept(partial_ordering(partial_order(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u))))) + -> decltype( partial_ordering(partial_order(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u)))) + { return partial_ordering(partial_order(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u))); } + + template<class _Tp, class _Up> + requires is_same_v<decay_t<_Tp>, decay_t<_Up>> + _LIBCPP_HIDE_FROM_ABI static constexpr auto + __go(_Tp&& __t, _Up&& __u, __priority_tag<1>) + noexcept(noexcept(partial_ordering(compare_three_way()(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u))))) + -> decltype( partial_ordering(compare_three_way()(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u)))) + { return partial_ordering(compare_three_way()(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u))); } + + template<class _Tp, class _Up> + requires is_same_v<decay_t<_Tp>, decay_t<_Up>> + _LIBCPP_HIDE_FROM_ABI static constexpr auto + __go(_Tp&& __t, _Up&& __u, __priority_tag<0>) + noexcept(noexcept(partial_ordering(_VSTD::weak_order(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u))))) + -> decltype( partial_ordering(_VSTD::weak_order(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u)))) + { return partial_ordering(_VSTD::weak_order(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u))); } + + template<class _Tp, class _Up> + _LIBCPP_HIDE_FROM_ABI constexpr auto operator()(_Tp&& __t, _Up&& __u) const + noexcept(noexcept(__go(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u), __priority_tag<2>()))) + -> decltype( __go(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u), __priority_tag<2>())) + { return __go(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u), __priority_tag<2>()); } + }; +} // namespace __partial_order + +inline namespace __cpo { + inline constexpr auto partial_order = __partial_order::__fn{}; +} // namespace __cpo + +#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS) + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___COMPARE_PARTIAL_ORDER diff --git a/libcxx/include/__compare/strong_order.h b/libcxx/include/__compare/strong_order.h new file mode 100644 index 000000000000..e49b2d45de45 --- /dev/null +++ b/libcxx/include/__compare/strong_order.h @@ -0,0 +1,136 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___COMPARE_STRONG_ORDER +#define _LIBCPP___COMPARE_STRONG_ORDER + +#include <__bit/bit_cast.h> +#include <__compare/compare_three_way.h> +#include <__compare/ordering.h> +#include <__config> +#include <__utility/forward.h> +#include <__utility/priority_tag.h> +#include <cmath> +#include <cstdint> +#include <limits> +#include <type_traits> + +#ifndef _LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER +#pragma GCC system_header +#endif + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS) + +// [cmp.alg] +namespace __strong_order { + struct __fn { + template<class _Tp, class _Up> + requires is_same_v<decay_t<_Tp>, decay_t<_Up>> + _LIBCPP_HIDE_FROM_ABI static constexpr auto + __go(_Tp&& __t, _Up&& __u, __priority_tag<2>) + noexcept(noexcept(strong_ordering(strong_order(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u))))) + -> decltype( strong_ordering(strong_order(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u)))) + { return strong_ordering(strong_order(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u))); } + + template<class _Tp, class _Up, class _Dp = decay_t<_Tp>> + requires is_same_v<_Dp, decay_t<_Up>> && is_floating_point_v<_Dp> + _LIBCPP_HIDE_FROM_ABI static constexpr strong_ordering + __go(_Tp&& __t, _Up&& __u, __priority_tag<1>) noexcept + { + if constexpr (numeric_limits<_Dp>::is_iec559 && sizeof(_Dp) == sizeof(int32_t)) { + int32_t __rx = _VSTD::bit_cast<int32_t>(__t); + int32_t __ry = _VSTD::bit_cast<int32_t>(__u); + __rx = (__rx < 0) ? (numeric_limits<int32_t>::min() - __rx - 1) : __rx; + __ry = (__ry < 0) ? (numeric_limits<int32_t>::min() - __ry - 1) : __ry; + return (__rx <=> __ry); + } else if constexpr (numeric_limits<_Dp>::is_iec559 && sizeof(_Dp) == sizeof(int64_t)) { + int64_t __rx = _VSTD::bit_cast<int64_t>(__t); + int64_t __ry = _VSTD::bit_cast<int64_t>(__u); + __rx = (__rx < 0) ? (numeric_limits<int64_t>::min() - __rx - 1) : __rx; + __ry = (__ry < 0) ? (numeric_limits<int64_t>::min() - __ry - 1) : __ry; + return (__rx <=> __ry); + } else if (__t < __u) { + return strong_ordering::less; + } else if (__t > __u) { + return strong_ordering::greater; + } else if (__t == __u) { + if constexpr (numeric_limits<_Dp>::radix == 2) { + return _VSTD::signbit(__u) <=> _VSTD::signbit(__t); + } else { + // This is bullet 3 of the IEEE754 algorithm, relevant + // only for decimal floating-point; + // see https://stackoverflow.com/questions/69068075/ + if (__t == 0 || _VSTD::isinf(__t)) { + return _VSTD::signbit(__u) <=> _VSTD::signbit(__t); + } else { + int __texp, __uexp; + (void)_VSTD::frexp(__t, &__texp); + (void)_VSTD::frexp(__u, &__uexp); + return (__t < 0) ? (__texp <=> __uexp) : (__uexp <=> __texp); + } + } + } else { + // They're unordered, so one of them must be a NAN. + // The order is -QNAN, -SNAN, numbers, +SNAN, +QNAN. + bool __t_is_nan = _VSTD::isnan(__t); + bool __u_is_nan = _VSTD::isnan(__u); + bool __t_is_negative = _VSTD::signbit(__t); + bool __u_is_negative = _VSTD::signbit(__u); + using _IntType = std::conditional_t< + sizeof(__t) == sizeof(int32_t), int32_t, std::conditional_t< + sizeof(__t) == sizeof(int64_t), int64_t, void> + >; + if constexpr (std::is_same_v<_IntType, void>) { + static_assert(sizeof(_Dp) == 0, "std::strong_order is unimplemented for this floating-point type"); + } else if (__t_is_nan && __u_is_nan) { + // Order by sign bit, then by "payload bits" (we'll just use bit_cast). + if (__t_is_negative != __u_is_negative) { + return (__u_is_negative <=> __t_is_negative); + } else { + return _VSTD::bit_cast<_IntType>(__t) <=> _VSTD::bit_cast<_IntType>(__u); + } + } else if (__t_is_nan) { + return __t_is_negative ? strong_ordering::less : strong_ordering::greater; + } else { + return __u_is_negative ? strong_ordering::greater : strong_ordering::less; + } + } + } + + template<class _Tp, class _Up> + requires is_same_v<decay_t<_Tp>, decay_t<_Up>> + _LIBCPP_HIDE_FROM_ABI static constexpr auto + __go(_Tp&& __t, _Up&& __u, __priority_tag<0>) + noexcept(noexcept(strong_ordering(compare_three_way()(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u))))) + -> decltype( strong_ordering(compare_three_way()(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u)))) + { return strong_ordering(compare_three_way()(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u))); } + + template<class _Tp, class _Up> + _LIBCPP_HIDE_FROM_ABI constexpr auto operator()(_Tp&& __t, _Up&& __u) const + noexcept(noexcept(__go(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u), __priority_tag<2>()))) + -> decltype( __go(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u), __priority_tag<2>())) + { return __go(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u), __priority_tag<2>()); } + }; +} // namespace __strong_order + +inline namespace __cpo { + inline constexpr auto strong_order = __strong_order::__fn{}; +} // namespace __cpo + +#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS) + +_LIBCPP_END_NAMESPACE_STD + +_LIBCPP_POP_MACROS + +#endif // _LIBCPP___COMPARE_STRONG_ORDER diff --git a/libcxx/include/__compare/weak_order.h b/libcxx/include/__compare/weak_order.h new file mode 100644 index 000000000000..f67416ed3ebe --- /dev/null +++ b/libcxx/include/__compare/weak_order.h @@ -0,0 +1,100 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___COMPARE_WEAK_ORDER +#define _LIBCPP___COMPARE_WEAK_ORDER + +#include <__compare/compare_three_way.h> +#include <__compare/ordering.h> +#include <__compare/strong_order.h> +#include <__config> +#include <__utility/forward.h> +#include <__utility/priority_tag.h> +#include <cmath> +#include <type_traits> + +#ifndef _LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER +#pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS) + +// [cmp.alg] +namespace __weak_order { + struct __fn { + template<class _Tp, class _Up> + requires is_same_v<decay_t<_Tp>, decay_t<_Up>> + _LIBCPP_HIDE_FROM_ABI static constexpr auto + __go(_Tp&& __t, _Up&& __u, __priority_tag<3>) + noexcept(noexcept(weak_ordering(weak_order(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u))))) + -> decltype( weak_ordering(weak_order(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u)))) + { return weak_ordering(weak_order(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u))); } + + template<class _Tp, class _Up, class _Dp = decay_t<_Tp>> + requires is_same_v<_Dp, decay_t<_Up>> && is_floating_point_v<_Dp> + _LIBCPP_HIDE_FROM_ABI static constexpr weak_ordering + __go(_Tp&& __t, _Up&& __u, __priority_tag<2>) noexcept + { + std::partial_ordering __po = (__t <=> __u); + if (__po == std::partial_ordering::less) { + return std::weak_ordering::less; + } else if (__po == std::partial_ordering::equivalent) { + return std::weak_ordering::equivalent; + } else if (__po == std::partial_ordering::greater) { + return std::weak_ordering::greater; + } else { + // Otherwise, at least one of them is a NaN. + bool __t_is_nan = _VSTD::isnan(__t); + bool __u_is_nan = _VSTD::isnan(__u); + bool __t_is_negative = _VSTD::signbit(__t); + bool __u_is_negative = _VSTD::signbit(__u); + if (__t_is_nan && __u_is_nan) { + return (__u_is_negative <=> __t_is_negative); + } else if (__t_is_nan) { + return __t_is_negative ? weak_ordering::less : weak_ordering::greater; + } else { + return __u_is_negative ? weak_ordering::greater : weak_ordering::less; + } + } + } + + template<class _Tp, class _Up> + requires is_same_v<decay_t<_Tp>, decay_t<_Up>> + _LIBCPP_HIDE_FROM_ABI static constexpr auto + __go(_Tp&& __t, _Up&& __u, __priority_tag<1>) + noexcept(noexcept(weak_ordering(compare_three_way()(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u))))) + -> decltype( weak_ordering(compare_three_way()(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u)))) + { return weak_ordering(compare_three_way()(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u))); } + + template<class _Tp, class _Up> + requires is_same_v<decay_t<_Tp>, decay_t<_Up>> + _LIBCPP_HIDE_FROM_ABI static constexpr auto + __go(_Tp&& __t, _Up&& __u, __priority_tag<0>) + noexcept(noexcept(weak_ordering(_VSTD::strong_order(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u))))) + -> decltype( weak_ordering(_VSTD::strong_order(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u)))) + { return weak_ordering(_VSTD::strong_order(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u))); } + + template<class _Tp, class _Up> + _LIBCPP_HIDE_FROM_ABI constexpr auto operator()(_Tp&& __t, _Up&& __u) const + noexcept(noexcept(__go(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u), __priority_tag<3>()))) + -> decltype( __go(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u), __priority_tag<3>())) + { return __go(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u), __priority_tag<3>()); } + }; +} // namespace __weak_order + +inline namespace __cpo { + inline constexpr auto weak_order = __weak_order::__fn{}; +} // namespace __cpo + +#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS) + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___COMPARE_WEAK_ORDER diff --git a/libcxx/include/__config b/libcxx/include/__config index dbf4383cd6e3..da03e877f753 100644 --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -74,10 +74,6 @@ # define _LIBCPP_ABI_FIX_UNORDERED_NODE_POINTER_UB # define _LIBCPP_ABI_FORWARD_LIST_REMOVE_NODE_POINTER_UB # define _LIBCPP_ABI_FIX_UNORDERED_CONTAINER_SIZE_TYPE -// Don't use a nullptr_t simulation type in C++03 instead using C++11 nullptr -// provided under the alternate keyword __nullptr, which changes the mangling -// of nullptr_t. This option is ABI incompatible with GCC in C++03 mode. -# define _LIBCPP_ABI_ALWAYS_USE_CXX11_NULLPTR // Define a key function for `bad_function_call` in the library, to centralize // its vtable and typeinfo to libc++ rather than having all other libraries // using that class define their own copies. @@ -127,6 +123,23 @@ # endif #endif +// By default, don't use a nullptr_t emulation type in C++03. +// +// This is technically an ABI break from previous releases, however it is +// very unlikely to impact anyone. If a user is impacted by this break, +// they can return to using the C++03 nullptr emulation by defining +// _LIBCPP_ABI_USE_CXX03_NULLPTR_EMULATION. +// +// This switch will be removed entirely in favour of never providing a +// C++03 emulation after one release. +// +// IMPORTANT: IF YOU ARE READING THIS AND YOU TURN THIS MACRO ON, PLEASE LEAVE +// A COMMENT ON https://reviews.llvm.org/D109459 OR YOU WILL BE BROKEN +// IN THE FUTURE WHEN WE REMOVE THE ABILITY TO USE THE C++03 EMULATION. +#ifndef _LIBCPP_ABI_USE_CXX03_NULLPTR_EMULATION +# define _LIBCPP_ABI_ALWAYS_USE_CXX11_NULLPTR +#endif + #if defined(_LIBCPP_BUILDING_LIBRARY) || defined(_LIBCPP_ABI_UNSTABLE) || _LIBCPP_ABI_VERSION >= 2 // Enable additional explicit instantiations of iostreams components. This // reduces the number of weak definitions generated in programs that use @@ -1056,12 +1069,6 @@ typedef unsigned int char32_t; # define _LIBCPP_NODISCARD_AFTER_CXX17 #endif -#if !defined(_LIBCPP_DEBUG) && _LIBCPP_STD_VER > 11 -# define _LIBCPP_CONSTEXPR_IF_NODEBUG constexpr -#else -# define _LIBCPP_CONSTEXPR_IF_NODEBUG -#endif - #if __has_attribute(no_destroy) # define _LIBCPP_NO_DESTROY __attribute__((__no_destroy__)) #else @@ -1376,10 +1383,12 @@ extern "C" _LIBCPP_FUNC_VIS void __sanitizer_annotate_contiguous_container( #endif #if defined(__GNUC__) || defined(__clang__) -#define _LIBCPP_FORMAT_PRINTF(a, b) \ - __attribute__((__format__(__printf__, a, b))) + // The attribute uses 1-based indices for ordinary and static member functions. + // The attribute uses 2-based indices for non-static member functions. +# define _LIBCPP_ATTRIBUTE_FORMAT(archetype, format_string_index, first_format_arg_index) \ + __attribute__((__format__(archetype, format_string_index, first_format_arg_index))) #else -#define _LIBCPP_FORMAT_PRINTF(a, b) +# define _LIBCPP_ATTRIBUTE_FORMAT(archetype, format_string_index, first_format_arg_index) /* nothing */ #endif #endif // __cplusplus diff --git a/libcxx/include/__iterator/reverse_iterator.h b/libcxx/include/__iterator/reverse_iterator.h index f7a948950df2..d06859ee5f39 100644 --- a/libcxx/include/__iterator/reverse_iterator.h +++ b/libcxx/include/__iterator/reverse_iterator.h @@ -11,6 +11,8 @@ #define _LIBCPP___ITERATOR_REVERSE_ITERATOR_H #include <__config> +#include <__compare/compare_three_way_result.h> +#include <__compare/three_way_comparable.h> #include <__iterator/iterator.h> #include <__iterator/iterator_traits.h> #include <__memory/addressof.h> @@ -193,6 +195,16 @@ operator<=(const reverse_iterator<_Iter1>& __x, const reverse_iterator<_Iter2>& return __x.base() >= __y.base(); } +#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS) +template <class _Iter1, three_way_comparable_with<_Iter1> _Iter2> +_LIBCPP_HIDE_FROM_ABI constexpr +compare_three_way_result_t<_Iter1, _Iter2> +operator<=>(const reverse_iterator<_Iter1>& __x, const reverse_iterator<_Iter2>& __y) +{ + return __y.base() <=> __x.base(); +} +#endif + #ifndef _LIBCPP_CXX03_LANG template <class _Iter1, class _Iter2> inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 diff --git a/libcxx/include/__iterator/wrap_iter.h b/libcxx/include/__iterator/wrap_iter.h index 28872f9fa41a..cfcc9857b3fc 100644 --- a/libcxx/include/__iterator/wrap_iter.h +++ b/libcxx/include/__iterator/wrap_iter.h @@ -40,120 +40,129 @@ public: private: iterator_type __i; public: - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG __wrap_iter() _NOEXCEPT -#if _LIBCPP_STD_VER > 11 - : __i{} -#endif + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 __wrap_iter() _NOEXCEPT + : __i() { #if _LIBCPP_DEBUG_LEVEL == 2 + if (!__libcpp_is_constant_evaluated()) __get_db()->__insert_i(this); #endif } - template <class _Up> _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG + template <class _Up> _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 __wrap_iter(const __wrap_iter<_Up>& __u, typename enable_if<is_convertible<_Up, iterator_type>::value>::type* = nullptr) _NOEXCEPT : __i(__u.base()) { #if _LIBCPP_DEBUG_LEVEL == 2 + if (!__libcpp_is_constant_evaluated()) __get_db()->__iterator_copy(this, _VSTD::addressof(__u)); #endif } #if _LIBCPP_DEBUG_LEVEL == 2 - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 __wrap_iter(const __wrap_iter& __x) : __i(__x.base()) { + if (!__libcpp_is_constant_evaluated()) __get_db()->__iterator_copy(this, _VSTD::addressof(__x)); } - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 __wrap_iter& operator=(const __wrap_iter& __x) { - if (this != _VSTD::addressof(__x)) + if (this != _VSTD::addressof(__x) && !__libcpp_is_constant_evaluated()) { __get_db()->__iterator_copy(this, _VSTD::addressof(__x)); __i = __x.__i; } return *this; } - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17 ~__wrap_iter() { + if (!__libcpp_is_constant_evaluated()) __get_db()->__erase_i(this); } #endif - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG reference operator*() const _NOEXCEPT + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 reference operator*() const _NOEXCEPT { #if _LIBCPP_DEBUG_LEVEL == 2 + if (!__libcpp_is_constant_evaluated()) _LIBCPP_ASSERT(__get_const_db()->__dereferenceable(this), "Attempted to dereference a non-dereferenceable iterator"); #endif return *__i; } - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG pointer operator->() const _NOEXCEPT + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 pointer operator->() const _NOEXCEPT { #if _LIBCPP_DEBUG_LEVEL == 2 + if (!__libcpp_is_constant_evaluated()) _LIBCPP_ASSERT(__get_const_db()->__dereferenceable(this), "Attempted to dereference a non-dereferenceable iterator"); #endif return _VSTD::__to_address(__i); } - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG __wrap_iter& operator++() _NOEXCEPT + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 __wrap_iter& operator++() _NOEXCEPT { #if _LIBCPP_DEBUG_LEVEL == 2 + if (!__libcpp_is_constant_evaluated()) _LIBCPP_ASSERT(__get_const_db()->__dereferenceable(this), "Attempted to increment a non-incrementable iterator"); #endif ++__i; return *this; } - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG __wrap_iter operator++(int) _NOEXCEPT + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 __wrap_iter operator++(int) _NOEXCEPT {__wrap_iter __tmp(*this); ++(*this); return __tmp;} - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG __wrap_iter& operator--() _NOEXCEPT + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 __wrap_iter& operator--() _NOEXCEPT { #if _LIBCPP_DEBUG_LEVEL == 2 + if (!__libcpp_is_constant_evaluated()) _LIBCPP_ASSERT(__get_const_db()->__decrementable(this), "Attempted to decrement a non-decrementable iterator"); #endif --__i; return *this; } - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG __wrap_iter operator--(int) _NOEXCEPT + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 __wrap_iter operator--(int) _NOEXCEPT {__wrap_iter __tmp(*this); --(*this); return __tmp;} - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG __wrap_iter operator+ (difference_type __n) const _NOEXCEPT + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 __wrap_iter operator+ (difference_type __n) const _NOEXCEPT {__wrap_iter __w(*this); __w += __n; return __w;} - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG __wrap_iter& operator+=(difference_type __n) _NOEXCEPT + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 __wrap_iter& operator+=(difference_type __n) _NOEXCEPT { #if _LIBCPP_DEBUG_LEVEL == 2 + if (!__libcpp_is_constant_evaluated()) _LIBCPP_ASSERT(__get_const_db()->__addable(this, __n), "Attempted to add/subtract an iterator outside its valid range"); #endif __i += __n; return *this; } - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG __wrap_iter operator- (difference_type __n) const _NOEXCEPT + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 __wrap_iter operator- (difference_type __n) const _NOEXCEPT {return *this + (-__n);} - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG __wrap_iter& operator-=(difference_type __n) _NOEXCEPT + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 __wrap_iter& operator-=(difference_type __n) _NOEXCEPT {*this += -__n; return *this;} - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG reference operator[](difference_type __n) const _NOEXCEPT + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 reference operator[](difference_type __n) const _NOEXCEPT { #if _LIBCPP_DEBUG_LEVEL == 2 + if (!__libcpp_is_constant_evaluated()) _LIBCPP_ASSERT(__get_const_db()->__subscriptable(this, __n), "Attempted to subscript an iterator outside its valid range"); #endif return __i[__n]; } - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG iterator_type base() const _NOEXCEPT {return __i;} + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 iterator_type base() const _NOEXCEPT {return __i;} private: #if _LIBCPP_DEBUG_LEVEL == 2 - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG __wrap_iter(const void* __p, iterator_type __x) : __i(__x) + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 __wrap_iter(const void* __p, iterator_type __x) : __i(__x) { + if (!__libcpp_is_constant_evaluated()) __get_db()->__insert_ic(this, __p); } #else - _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG __wrap_iter(iterator_type __x) _NOEXCEPT : __i(__x) {} + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 __wrap_iter(iterator_type __x) _NOEXCEPT : __i(__x) {} #endif template <class _Up> friend class __wrap_iter; @@ -163,24 +172,25 @@ private: }; template <class _Iter1> -_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR bool operator==(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter1>& __y) _NOEXCEPT { return __x.base() == __y.base(); } template <class _Iter1, class _Iter2> -_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR bool operator==(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPT { return __x.base() == __y.base(); } template <class _Iter1> -_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 bool operator<(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter1>& __y) _NOEXCEPT { #if _LIBCPP_DEBUG_LEVEL == 2 + if (!__libcpp_is_constant_evaluated()) _LIBCPP_ASSERT(__get_const_db()->__less_than_comparable(_VSTD::addressof(__x), _VSTD::addressof(__y)), "Attempted to compare incomparable iterators"); #endif @@ -188,10 +198,11 @@ bool operator<(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter1>& __y) _ } template <class _Iter1, class _Iter2> -_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 bool operator<(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPT { #if _LIBCPP_DEBUG_LEVEL == 2 + if (!__libcpp_is_constant_evaluated()) _LIBCPP_ASSERT(__get_const_db()->__less_than_comparable(&__x, &__y), "Attempted to compare incomparable iterators"); #endif @@ -199,63 +210,63 @@ bool operator<(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _ } template <class _Iter1> -_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR bool operator!=(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter1>& __y) _NOEXCEPT { return !(__x == __y); } template <class _Iter1, class _Iter2> -_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR bool operator!=(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPT { return !(__x == __y); } template <class _Iter1> -_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR bool operator>(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter1>& __y) _NOEXCEPT { return __y < __x; } template <class _Iter1, class _Iter2> -_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR bool operator>(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPT { return __y < __x; } template <class _Iter1> -_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR bool operator>=(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter1>& __y) _NOEXCEPT { return !(__x < __y); } template <class _Iter1, class _Iter2> -_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR bool operator>=(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPT { return !(__x < __y); } template <class _Iter1> -_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR bool operator<=(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter1>& __y) _NOEXCEPT { return !(__y < __x); } template <class _Iter1, class _Iter2> -_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR bool operator<=(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPT { return !(__y < __x); } template <class _Iter1, class _Iter2> -_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 #ifndef _LIBCPP_CXX03_LANG auto operator-(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPT -> decltype(__x.base() - __y.base()) @@ -265,6 +276,7 @@ operator-(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXC #endif // C++03 { #if _LIBCPP_DEBUG_LEVEL == 2 + if (!__libcpp_is_constant_evaluated()) _LIBCPP_ASSERT(__get_const_db()->__less_than_comparable(_VSTD::addressof(__x), _VSTD::addressof(__y)), "Attempted to subtract incompatible iterators"); #endif @@ -272,7 +284,7 @@ operator-(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXC } template <class _Iter1> -_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 __wrap_iter<_Iter1> operator+(typename __wrap_iter<_Iter1>::difference_type __n, __wrap_iter<_Iter1> __x) _NOEXCEPT { __x += __n; diff --git a/libcxx/include/__memory/allocator_traits.h b/libcxx/include/__memory/allocator_traits.h index cc32352ae11c..f4c8fa02d650 100644 --- a/libcxx/include/__memory/allocator_traits.h +++ b/libcxx/include/__memory/allocator_traits.h @@ -349,14 +349,6 @@ struct _LIBCPP_TEMPLATE_VIS allocator_traits } }; -// A version of `allocator_traits` for internal usage that SFINAEs away if the -// given allocator doesn't have a nested `value_type`. This helps avoid hard -// errors when forming implicit deduction guides for a container that has an -// invalid Allocator type. See https://wg21.link/LWGXXXXX. -// TODO(varconst): use the actual link once available. -template <class _Alloc, class _ValueType = typename _Alloc::value_type> -struct _LIBCPP_TEMPLATE_VIS __allocator_traits : allocator_traits<_Alloc> {}; - template <class _Traits, class _Tp> struct __rebind_alloc_helper { #ifndef _LIBCPP_CXX03_LANG diff --git a/libcxx/include/__memory/unique_ptr.h b/libcxx/include/__memory/unique_ptr.h index 838960269c97..433120394269 100644 --- a/libcxx/include/__memory/unique_ptr.h +++ b/libcxx/include/__memory/unique_ptr.h @@ -174,17 +174,17 @@ public: template <bool _Dummy = true, class = _EnableIfDeleterDefaultConstructible<_Dummy> > _LIBCPP_INLINE_VISIBILITY - _LIBCPP_CONSTEXPR unique_ptr() _NOEXCEPT : __ptr_(pointer(), __default_init_tag()) {} + _LIBCPP_CONSTEXPR unique_ptr() _NOEXCEPT : __ptr_(__value_init_tag(), __value_init_tag()) {} template <bool _Dummy = true, class = _EnableIfDeleterDefaultConstructible<_Dummy> > _LIBCPP_INLINE_VISIBILITY - _LIBCPP_CONSTEXPR unique_ptr(nullptr_t) _NOEXCEPT : __ptr_(pointer(), __default_init_tag()) {} + _LIBCPP_CONSTEXPR unique_ptr(nullptr_t) _NOEXCEPT : __ptr_(__value_init_tag(), __value_init_tag()) {} template <bool _Dummy = true, class = _EnableIfDeleterDefaultConstructible<_Dummy> > _LIBCPP_INLINE_VISIBILITY - explicit unique_ptr(pointer __p) _NOEXCEPT : __ptr_(__p, __default_init_tag()) {} + explicit unique_ptr(pointer __p) _NOEXCEPT : __ptr_(__p, __value_init_tag()) {} template <bool _Dummy = true, class = _EnableIfDeleterConstructible<_LValRefType<_Dummy> > > @@ -226,7 +226,7 @@ public: typename enable_if<is_convertible<_Up*, _Tp*>::value && is_same<_Dp, default_delete<_Tp> >::value, __nat>::type = __nat()) _NOEXCEPT - : __ptr_(__p.release(), __default_init_tag()) {} + : __ptr_(__p.release(), __value_init_tag()) {} #endif _LIBCPP_INLINE_VISIBILITY @@ -397,19 +397,19 @@ public: template <bool _Dummy = true, class = _EnableIfDeleterDefaultConstructible<_Dummy> > _LIBCPP_INLINE_VISIBILITY - _LIBCPP_CONSTEXPR unique_ptr() _NOEXCEPT : __ptr_(pointer(), __default_init_tag()) {} + _LIBCPP_CONSTEXPR unique_ptr() _NOEXCEPT : __ptr_(__value_init_tag(), __value_init_tag()) {} template <bool _Dummy = true, class = _EnableIfDeleterDefaultConstructible<_Dummy> > _LIBCPP_INLINE_VISIBILITY - _LIBCPP_CONSTEXPR unique_ptr(nullptr_t) _NOEXCEPT : __ptr_(pointer(), __default_init_tag()) {} + _LIBCPP_CONSTEXPR unique_ptr(nullptr_t) _NOEXCEPT : __ptr_(__value_init_tag(), __value_init_tag()) {} template <class _Pp, bool _Dummy = true, class = _EnableIfDeleterDefaultConstructible<_Dummy>, class = _EnableIfPointerConvertible<_Pp> > _LIBCPP_INLINE_VISIBILITY explicit unique_ptr(_Pp __p) _NOEXCEPT - : __ptr_(__p, __default_init_tag()) {} + : __ptr_(__p, __value_init_tag()) {} template <class _Pp, bool _Dummy = true, class = _EnableIfDeleterConstructible<_LValRefType<_Dummy> >, diff --git a/libcxx/include/__numeric/accumulate.h b/libcxx/include/__numeric/accumulate.h new file mode 100644 index 000000000000..fcdad58df141 --- /dev/null +++ b/libcxx/include/__numeric/accumulate.h @@ -0,0 +1,52 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___NUMERIC_ACCUMULATE_H +#define _LIBCPP___NUMERIC_ACCUMULATE_H + +#include <__config> +#include <__utility/move.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +template <class _InputIterator, class _Tp> +_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 +_Tp +accumulate(_InputIterator __first, _InputIterator __last, _Tp __init) +{ + for (; __first != __last; ++__first) +#if _LIBCPP_STD_VER > 17 + __init = _VSTD::move(__init) + *__first; +#else + __init = __init + *__first; +#endif + return __init; +} + +template <class _InputIterator, class _Tp, class _BinaryOperation> +_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 +_Tp +accumulate(_InputIterator __first, _InputIterator __last, _Tp __init, _BinaryOperation __binary_op) +{ + for (; __first != __last; ++__first) +#if _LIBCPP_STD_VER > 17 + __init = __binary_op(_VSTD::move(__init), *__first); +#else + __init = __binary_op(__init, *__first); +#endif + return __init; +} + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___NUMERIC_ACCUMULATE_H diff --git a/libcxx/include/__numeric/adjacent_difference.h b/libcxx/include/__numeric/adjacent_difference.h new file mode 100644 index 000000000000..5c712ecdf77d --- /dev/null +++ b/libcxx/include/__numeric/adjacent_difference.h @@ -0,0 +1,72 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___NUMERIC_ADJACENT_DIFFERENCE_H +#define _LIBCPP___NUMERIC_ADJACENT_DIFFERENCE_H + +#include <__config> +#include <__iterator/iterator_traits.h> +#include <__utility/move.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +template <class _InputIterator, class _OutputIterator> +_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 +_OutputIterator +adjacent_difference(_InputIterator __first, _InputIterator __last, _OutputIterator __result) +{ + if (__first != __last) + { + typename iterator_traits<_InputIterator>::value_type __acc(*__first); + *__result = __acc; + for (++__first, (void) ++__result; __first != __last; ++__first, (void) ++__result) + { + typename iterator_traits<_InputIterator>::value_type __val(*__first); +#if _LIBCPP_STD_VER > 17 + *__result = __val - _VSTD::move(__acc); +#else + *__result = __val - __acc; +#endif + __acc = _VSTD::move(__val); + } + } + return __result; +} + +template <class _InputIterator, class _OutputIterator, class _BinaryOperation> +_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 +_OutputIterator +adjacent_difference(_InputIterator __first, _InputIterator __last, _OutputIterator __result, + _BinaryOperation __binary_op) +{ + if (__first != __last) + { + typename iterator_traits<_InputIterator>::value_type __acc(*__first); + *__result = __acc; + for (++__first, (void) ++__result; __first != __last; ++__first, (void) ++__result) + { + typename iterator_traits<_InputIterator>::value_type __val(*__first); +#if _LIBCPP_STD_VER > 17 + *__result = __binary_op(__val, _VSTD::move(__acc)); +#else + *__result = __binary_op(__val, __acc); +#endif + __acc = _VSTD::move(__val); + } + } + return __result; +} + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___NUMERIC_ADJACENT_DIFFERENCE_H diff --git a/libcxx/include/__numeric/exclusive_scan.h b/libcxx/include/__numeric/exclusive_scan.h new file mode 100644 index 000000000000..c0c89b38805d --- /dev/null +++ b/libcxx/include/__numeric/exclusive_scan.h @@ -0,0 +1,53 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___NUMERIC_EXCLUSIVE_SCAN_H +#define _LIBCPP___NUMERIC_EXCLUSIVE_SCAN_H + +#include <__config> +#include <__functional/operations.h> +#include <__utility/move.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if _LIBCPP_STD_VER > 14 + +template <class _InputIterator, class _OutputIterator, class _Tp, class _BinaryOp> +_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 _OutputIterator +exclusive_scan(_InputIterator __first, _InputIterator __last, _OutputIterator __result, _Tp __init, _BinaryOp __b) { + if (__first != __last) { + _Tp __tmp(__b(__init, *__first)); + while (true) { + *__result = _VSTD::move(__init); + ++__result; + ++__first; + if (__first == __last) + break; + __init = _VSTD::move(__tmp); + __tmp = __b(__init, *__first); + } + } + return __result; +} + +template <class _InputIterator, class _OutputIterator, class _Tp> +_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 _OutputIterator +exclusive_scan(_InputIterator __first, _InputIterator __last, _OutputIterator __result, _Tp __init) { + return _VSTD::exclusive_scan(__first, __last, __result, __init, _VSTD::plus<>()); +} + +#endif // _LIBCPP_STD_VER > 14 + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___NUMERIC_EXCLUSIVE_SCAN_H diff --git a/libcxx/include/__numeric/gcd_lcm.h b/libcxx/include/__numeric/gcd_lcm.h new file mode 100644 index 000000000000..34c0e533c928 --- /dev/null +++ b/libcxx/include/__numeric/gcd_lcm.h @@ -0,0 +1,96 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___NUMERIC_GCD_LCM_H +#define _LIBCPP___NUMERIC_GCD_LCM_H + +#include <__config> +#include <__debug> +#include <limits> +#include <type_traits> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if _LIBCPP_STD_VER > 14 + +template <typename _Result, typename _Source, bool _IsSigned = is_signed<_Source>::value> struct __ct_abs; + +template <typename _Result, typename _Source> +struct __ct_abs<_Result, _Source, true> { + _LIBCPP_CONSTEXPR _LIBCPP_INLINE_VISIBILITY + _Result operator()(_Source __t) const noexcept + { + if (__t >= 0) return __t; + if (__t == numeric_limits<_Source>::min()) return -static_cast<_Result>(__t); + return -__t; + } +}; + +template <typename _Result, typename _Source> +struct __ct_abs<_Result, _Source, false> { + _LIBCPP_CONSTEXPR _LIBCPP_INLINE_VISIBILITY + _Result operator()(_Source __t) const noexcept { return __t; } +}; + + +template<class _Tp> +_LIBCPP_CONSTEXPR _LIBCPP_HIDDEN +_Tp __gcd(_Tp __m, _Tp __n) +{ + static_assert((!is_signed<_Tp>::value), ""); + return __n == 0 ? __m : _VSTD::__gcd<_Tp>(__n, __m % __n); +} + +template<class _Tp, class _Up> +_LIBCPP_CONSTEXPR _LIBCPP_INLINE_VISIBILITY +common_type_t<_Tp,_Up> +gcd(_Tp __m, _Up __n) +{ + static_assert((is_integral<_Tp>::value && is_integral<_Up>::value), "Arguments to gcd must be integer types"); + static_assert((!is_same<typename remove_cv<_Tp>::type, bool>::value), "First argument to gcd cannot be bool" ); + static_assert((!is_same<typename remove_cv<_Up>::type, bool>::value), "Second argument to gcd cannot be bool" ); + using _Rp = common_type_t<_Tp,_Up>; + using _Wp = make_unsigned_t<_Rp>; + return static_cast<_Rp>(_VSTD::__gcd( + static_cast<_Wp>(__ct_abs<_Rp, _Tp>()(__m)), + static_cast<_Wp>(__ct_abs<_Rp, _Up>()(__n)))); +} + +template<class _Tp, class _Up> +_LIBCPP_CONSTEXPR _LIBCPP_INLINE_VISIBILITY +common_type_t<_Tp,_Up> +lcm(_Tp __m, _Up __n) +{ + static_assert((is_integral<_Tp>::value && is_integral<_Up>::value), "Arguments to lcm must be integer types"); + static_assert((!is_same<typename remove_cv<_Tp>::type, bool>::value), "First argument to lcm cannot be bool" ); + static_assert((!is_same<typename remove_cv<_Up>::type, bool>::value), "Second argument to lcm cannot be bool" ); + if (__m == 0 || __n == 0) + return 0; + + using _Rp = common_type_t<_Tp,_Up>; + _Rp __val1 = __ct_abs<_Rp, _Tp>()(__m) / _VSTD::gcd(__m, __n); + _Rp __val2 = __ct_abs<_Rp, _Up>()(__n); + _LIBCPP_ASSERT((numeric_limits<_Rp>::max() / __val1 > __val2), "Overflow in lcm"); + return __val1 * __val2; +} + +#endif // _LIBCPP_STD_VER + +_LIBCPP_END_NAMESPACE_STD + +_LIBCPP_POP_MACROS + +#endif // _LIBCPP___NUMERIC_GCD_LCM_H diff --git a/libcxx/include/__numeric/inclusive_scan.h b/libcxx/include/__numeric/inclusive_scan.h new file mode 100644 index 000000000000..a6b005075835 --- /dev/null +++ b/libcxx/include/__numeric/inclusive_scan.h @@ -0,0 +1,60 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___NUMERIC_INCLUSIVE_SCAN_H +#define _LIBCPP___NUMERIC_INCLUSIVE_SCAN_H + +#include <__config> +#include <__functional/operations.h> +#include <__iterator/iterator_traits.h> +#include <__utility/move.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if _LIBCPP_STD_VER > 14 + +template <class _InputIterator, class _OutputIterator, class _Tp, class _BinaryOp> +_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 _OutputIterator +inclusive_scan(_InputIterator __first, _InputIterator __last, _OutputIterator __result, _BinaryOp __b, _Tp __init) { + for (; __first != __last; ++__first, (void)++__result) { + __init = __b(__init, *__first); + *__result = __init; + } + return __result; +} + +template <class _InputIterator, class _OutputIterator, class _BinaryOp> +_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 _OutputIterator +inclusive_scan(_InputIterator __first, _InputIterator __last, _OutputIterator __result, _BinaryOp __b) { + if (__first != __last) { + typename iterator_traits<_InputIterator>::value_type __init = *__first; + *__result++ = __init; + if (++__first != __last) + return _VSTD::inclusive_scan(__first, __last, __result, __b, __init); + } + + return __result; +} + +template <class _InputIterator, class _OutputIterator> +_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 _OutputIterator inclusive_scan(_InputIterator __first, + _InputIterator __last, + _OutputIterator __result) { + return _VSTD::inclusive_scan(__first, __last, __result, _VSTD::plus<>()); +} + +#endif // _LIBCPP_STD_VER > 14 + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___NUMERIC_INCLUSIVE_SCAN_H diff --git a/libcxx/include/__numeric/inner_product.h b/libcxx/include/__numeric/inner_product.h new file mode 100644 index 000000000000..004acdde6a0c --- /dev/null +++ b/libcxx/include/__numeric/inner_product.h @@ -0,0 +1,53 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___NUMERIC_INNER_PRODUCT_H +#define _LIBCPP___NUMERIC_INNER_PRODUCT_H + +#include <__config> +#include <__utility/move.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +template <class _InputIterator1, class _InputIterator2, class _Tp> +_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 +_Tp +inner_product(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, _Tp __init) +{ + for (; __first1 != __last1; ++__first1, (void) ++__first2) +#if _LIBCPP_STD_VER > 17 + __init = _VSTD::move(__init) + *__first1 * *__first2; +#else + __init = __init + *__first1 * *__first2; +#endif + return __init; +} + +template <class _InputIterator1, class _InputIterator2, class _Tp, class _BinaryOperation1, class _BinaryOperation2> +_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 +_Tp +inner_product(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, + _Tp __init, _BinaryOperation1 __binary_op1, _BinaryOperation2 __binary_op2) +{ + for (; __first1 != __last1; ++__first1, (void) ++__first2) +#if _LIBCPP_STD_VER > 17 + __init = __binary_op1(_VSTD::move(__init), __binary_op2(*__first1, *__first2)); +#else + __init = __binary_op1(__init, __binary_op2(*__first1, *__first2)); +#endif + return __init; +} + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___NUMERIC_INNER_PRODUCT_H diff --git a/libcxx/include/__numeric/iota.h b/libcxx/include/__numeric/iota.h new file mode 100644 index 000000000000..b30e0e0a5484 --- /dev/null +++ b/libcxx/include/__numeric/iota.h @@ -0,0 +1,32 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___NUMERIC_IOTA_H +#define _LIBCPP___NUMERIC_IOTA_H + +#include <__config> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +template <class _ForwardIterator, class _Tp> +_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 +void +iota(_ForwardIterator __first, _ForwardIterator __last, _Tp __value_) +{ + for (; __first != __last; ++__first, (void) ++__value_) + *__first = __value_; +} + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___NUMERIC_IOTA_H diff --git a/libcxx/include/__numeric/midpoint.h b/libcxx/include/__numeric/midpoint.h new file mode 100644 index 000000000000..668030c46bcb --- /dev/null +++ b/libcxx/include/__numeric/midpoint.h @@ -0,0 +1,85 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___NUMERIC_MIDPOINT_H +#define _LIBCPP___NUMERIC_MIDPOINT_H + +#include <__config> +#include <limits> +#include <type_traits> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if _LIBCPP_STD_VER > 17 +template <class _Tp> +_LIBCPP_INLINE_VISIBILITY constexpr +enable_if_t<is_integral_v<_Tp> && !is_same_v<bool, _Tp> && !is_null_pointer_v<_Tp>, _Tp> +midpoint(_Tp __a, _Tp __b) noexcept +_LIBCPP_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK +{ + using _Up = make_unsigned_t<_Tp>; + constexpr _Up __bitshift = numeric_limits<_Up>::digits - 1; + + _Up __diff = _Up(__b) - _Up(__a); + _Up __sign_bit = __b < __a; + + _Up __half_diff = (__diff / 2) + (__sign_bit << __bitshift) + (__sign_bit & __diff); + + return __a + __half_diff; +} + + +template <class _TPtr> +_LIBCPP_INLINE_VISIBILITY constexpr +enable_if_t<is_pointer_v<_TPtr> + && is_object_v<remove_pointer_t<_TPtr>> + && ! is_void_v<remove_pointer_t<_TPtr>> + && (sizeof(remove_pointer_t<_TPtr>) > 0), _TPtr> +midpoint(_TPtr __a, _TPtr __b) noexcept +{ + return __a + _VSTD::midpoint(ptrdiff_t(0), __b - __a); +} + + +template <typename _Tp> +constexpr int __sign(_Tp __val) { + return (_Tp(0) < __val) - (__val < _Tp(0)); +} + +template <typename _Fp> +constexpr _Fp __fp_abs(_Fp __f) { return __f >= 0 ? __f : -__f; } + +template <class _Fp> +_LIBCPP_INLINE_VISIBILITY constexpr +enable_if_t<is_floating_point_v<_Fp>, _Fp> +midpoint(_Fp __a, _Fp __b) noexcept +{ + constexpr _Fp __lo = numeric_limits<_Fp>::min()*2; + constexpr _Fp __hi = numeric_limits<_Fp>::max()/2; + return __fp_abs(__a) <= __hi && __fp_abs(__b) <= __hi ? // typical case: overflow is impossible + (__a + __b)/2 : // always correctly rounded + __fp_abs(__a) < __lo ? __a + __b/2 : // not safe to halve a + __fp_abs(__b) < __lo ? __a/2 + __b : // not safe to halve b + __a/2 + __b/2; // otherwise correctly rounded +} + +#endif // _LIBCPP_STD_VER > 17 + +_LIBCPP_END_NAMESPACE_STD + +_LIBCPP_POP_MACROS + +#endif // _LIBCPP___NUMERIC_MIDPOINT_H diff --git a/libcxx/include/__numeric/partial_sum.h b/libcxx/include/__numeric/partial_sum.h new file mode 100644 index 000000000000..9acee3afc2b0 --- /dev/null +++ b/libcxx/include/__numeric/partial_sum.h @@ -0,0 +1,70 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___NUMERIC_PARTIAL_SUM_H +#define _LIBCPP___NUMERIC_PARTIAL_SUM_H + +#include <__config> +#include <__iterator/iterator_traits.h> +#include <__utility/move.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +template <class _InputIterator, class _OutputIterator> +_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 +_OutputIterator +partial_sum(_InputIterator __first, _InputIterator __last, _OutputIterator __result) +{ + if (__first != __last) + { + typename iterator_traits<_InputIterator>::value_type __t(*__first); + *__result = __t; + for (++__first, (void) ++__result; __first != __last; ++__first, (void) ++__result) + { +#if _LIBCPP_STD_VER > 17 + __t = _VSTD::move(__t) + *__first; +#else + __t = __t + *__first; +#endif + *__result = __t; + } + } + return __result; +} + +template <class _InputIterator, class _OutputIterator, class _BinaryOperation> +_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 +_OutputIterator +partial_sum(_InputIterator __first, _InputIterator __last, _OutputIterator __result, + _BinaryOperation __binary_op) +{ + if (__first != __last) + { + typename iterator_traits<_InputIterator>::value_type __t(*__first); + *__result = __t; + for (++__first, (void) ++__result; __first != __last; ++__first, (void) ++__result) + { +#if _LIBCPP_STD_VER > 17 + __t = __binary_op(_VSTD::move(__t), *__first); +#else + __t = __binary_op(__t, *__first); +#endif + *__result = __t; + } + } + return __result; +} + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___NUMERIC_PARTIAL_SUM_H diff --git a/libcxx/include/__numeric/reduce.h b/libcxx/include/__numeric/reduce.h new file mode 100644 index 000000000000..90e4d238d868 --- /dev/null +++ b/libcxx/include/__numeric/reduce.h @@ -0,0 +1,47 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___NUMERIC_REDUCE_H +#define _LIBCPP___NUMERIC_REDUCE_H + +#include <__config> +#include <__functional/operations.h> +#include <__iterator/iterator_traits.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if _LIBCPP_STD_VER > 14 +template <class _InputIterator, class _Tp, class _BinaryOp> +_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 _Tp reduce(_InputIterator __first, _InputIterator __last, + _Tp __init, _BinaryOp __b) { + for (; __first != __last; ++__first) + __init = __b(__init, *__first); + return __init; +} + +template <class _InputIterator, class _Tp> +_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 _Tp reduce(_InputIterator __first, _InputIterator __last, + _Tp __init) { + return _VSTD::reduce(__first, __last, __init, _VSTD::plus<>()); +} + +template <class _InputIterator> +_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 typename iterator_traits<_InputIterator>::value_type +reduce(_InputIterator __first, _InputIterator __last) { + return _VSTD::reduce(__first, __last, typename iterator_traits<_InputIterator>::value_type{}); +} +#endif + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___NUMERIC_REDUCE_H diff --git a/libcxx/include/__numeric/transform_exclusive_scan.h b/libcxx/include/__numeric/transform_exclusive_scan.h new file mode 100644 index 000000000000..45b3077f6649 --- /dev/null +++ b/libcxx/include/__numeric/transform_exclusive_scan.h @@ -0,0 +1,49 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___NUMERIC_TRANSFORM_EXCLUSIVE_SCAN_H +#define _LIBCPP___NUMERIC_TRANSFORM_EXCLUSIVE_SCAN_H + +#include <__config> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if _LIBCPP_STD_VER > 14 + +template <class _InputIterator, class _OutputIterator, class _Tp, + class _BinaryOp, class _UnaryOp> +_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 +_OutputIterator +transform_exclusive_scan(_InputIterator __first, _InputIterator __last, + _OutputIterator __result, _Tp __init, + _BinaryOp __b, _UnaryOp __u) +{ + if (__first != __last) + { + _Tp __saved = __init; + do + { + __init = __b(__init, __u(*__first)); + *__result = __saved; + __saved = __init; + ++__result; + } while (++__first != __last); + } + return __result; +} + +#endif // _LIBCPP_STD_VER > 14 + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___NUMERIC_TRANSFORM_EXCLUSIVE_SCAN_H diff --git a/libcxx/include/__numeric/transform_inclusive_scan.h b/libcxx/include/__numeric/transform_inclusive_scan.h new file mode 100644 index 000000000000..b0d4ab5a88fd --- /dev/null +++ b/libcxx/include/__numeric/transform_inclusive_scan.h @@ -0,0 +1,58 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___NUMERIC_TRANSFORM_INCLUSIVE_SCAN_H +#define _LIBCPP___NUMERIC_TRANSFORM_INCLUSIVE_SCAN_H + +#include <__config> +#include <__iterator/iterator_traits.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if _LIBCPP_STD_VER > 14 + +template <class _InputIterator, class _OutputIterator, class _Tp, class _BinaryOp, class _UnaryOp> +_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 +_OutputIterator +transform_inclusive_scan(_InputIterator __first, _InputIterator __last, + _OutputIterator __result, _BinaryOp __b, _UnaryOp __u, _Tp __init) +{ + for (; __first != __last; ++__first, (void) ++__result) { + __init = __b(__init, __u(*__first)); + *__result = __init; + } + + return __result; +} + +template <class _InputIterator, class _OutputIterator, class _BinaryOp, class _UnaryOp> +_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 +_OutputIterator +transform_inclusive_scan(_InputIterator __first, _InputIterator __last, + _OutputIterator __result, _BinaryOp __b, _UnaryOp __u) +{ + if (__first != __last) { + typename iterator_traits<_InputIterator>::value_type __init = __u(*__first); + *__result++ = __init; + if (++__first != __last) + return _VSTD::transform_inclusive_scan(__first, __last, __result, __b, __u, __init); + } + + return __result; +} + +#endif // _LIBCPP_STD_VER > 14 + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___NUMERIC_TRANSFORM_INCLUSIVE_SCAN_H diff --git a/libcxx/include/__numeric/transform_reduce.h b/libcxx/include/__numeric/transform_reduce.h new file mode 100644 index 000000000000..da5a77988c38 --- /dev/null +++ b/libcxx/include/__numeric/transform_reduce.h @@ -0,0 +1,54 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___NUMERIC_TRANSFORM_REDUCE_H +#define _LIBCPP___NUMERIC_TRANSFORM_REDUCE_H + +#include <__config> +#include <__functional/operations.h> +#include <__utility/move.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if _LIBCPP_STD_VER > 14 +template <class _InputIterator, class _Tp, class _BinaryOp, class _UnaryOp> +_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 _Tp transform_reduce(_InputIterator __first, + _InputIterator __last, _Tp __init, + _BinaryOp __b, _UnaryOp __u) { + for (; __first != __last; ++__first) + __init = __b(__init, __u(*__first)); + return __init; +} + +template <class _InputIterator1, class _InputIterator2, class _Tp, class _BinaryOp1, class _BinaryOp2> +_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 _Tp transform_reduce(_InputIterator1 __first1, + _InputIterator1 __last1, + _InputIterator2 __first2, _Tp __init, + _BinaryOp1 __b1, _BinaryOp2 __b2) { + for (; __first1 != __last1; ++__first1, (void)++__first2) + __init = __b1(__init, __b2(*__first1, *__first2)); + return __init; +} + +template <class _InputIterator1, class _InputIterator2, class _Tp> +_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 _Tp transform_reduce(_InputIterator1 __first1, + _InputIterator1 __last1, + _InputIterator2 __first2, _Tp __init) { + return _VSTD::transform_reduce(__first1, __last1, __first2, _VSTD::move(__init), _VSTD::plus<>(), + _VSTD::multiplies<>()); +} +#endif + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___NUMERIC_TRANSFORM_REDUCE_H diff --git a/libcxx/include/__random/bernoulli_distribution.h b/libcxx/include/__random/bernoulli_distribution.h new file mode 100644 index 000000000000..60ae5eae7033 --- /dev/null +++ b/libcxx/include/__random/bernoulli_distribution.h @@ -0,0 +1,143 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___RANDOM_BERNOULLI_DISTRIBUTION_H +#define _LIBCPP___RANDOM_BERNOULLI_DISTRIBUTION_H + +#include <__config> +#include <__random/uniform_real_distribution.h> +#include <iosfwd> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + +_LIBCPP_BEGIN_NAMESPACE_STD + +class _LIBCPP_TEMPLATE_VIS bernoulli_distribution +{ +public: + // types + typedef bool result_type; + + class _LIBCPP_TEMPLATE_VIS param_type + { + double __p_; + public: + typedef bernoulli_distribution distribution_type; + + _LIBCPP_INLINE_VISIBILITY + explicit param_type(double __p = 0.5) : __p_(__p) {} + + _LIBCPP_INLINE_VISIBILITY + double p() const {return __p_;} + + friend _LIBCPP_INLINE_VISIBILITY + bool operator==(const param_type& __x, const param_type& __y) + {return __x.__p_ == __y.__p_;} + friend _LIBCPP_INLINE_VISIBILITY + bool operator!=(const param_type& __x, const param_type& __y) + {return !(__x == __y);} + }; + +private: + param_type __p_; + +public: + // constructors and reset functions +#ifndef _LIBCPP_CXX03_LANG + _LIBCPP_INLINE_VISIBILITY + bernoulli_distribution() : bernoulli_distribution(0.5) {} + _LIBCPP_INLINE_VISIBILITY + explicit bernoulli_distribution(double __p) : __p_(param_type(__p)) {} +#else + _LIBCPP_INLINE_VISIBILITY + explicit bernoulli_distribution(double __p = 0.5) : __p_(param_type(__p)) {} +#endif + _LIBCPP_INLINE_VISIBILITY + explicit bernoulli_distribution(const param_type& __p) : __p_(__p) {} + _LIBCPP_INLINE_VISIBILITY + void reset() {} + + // generating functions + template<class _URNG> + _LIBCPP_INLINE_VISIBILITY + result_type operator()(_URNG& __g) + {return (*this)(__g, __p_);} + template<class _URNG> _LIBCPP_INLINE_VISIBILITY result_type operator()(_URNG& __g, const param_type& __p); + + // property functions + _LIBCPP_INLINE_VISIBILITY + double p() const {return __p_.p();} + + _LIBCPP_INLINE_VISIBILITY + param_type param() const {return __p_;} + _LIBCPP_INLINE_VISIBILITY + void param(const param_type& __p) {__p_ = __p;} + + _LIBCPP_INLINE_VISIBILITY + result_type min() const {return false;} + _LIBCPP_INLINE_VISIBILITY + result_type max() const {return true;} + + friend _LIBCPP_INLINE_VISIBILITY + bool operator==(const bernoulli_distribution& __x, + const bernoulli_distribution& __y) + {return __x.__p_ == __y.__p_;} + friend _LIBCPP_INLINE_VISIBILITY + bool operator!=(const bernoulli_distribution& __x, + const bernoulli_distribution& __y) + {return !(__x == __y);} +}; + +template<class _URNG> +inline +bernoulli_distribution::result_type +bernoulli_distribution::operator()(_URNG& __g, const param_type& __p) +{ + uniform_real_distribution<double> __gen; + return __gen(__g) < __p.p(); +} + +template <class _CharT, class _Traits> +basic_ostream<_CharT, _Traits>& +operator<<(basic_ostream<_CharT, _Traits>& __os, const bernoulli_distribution& __x) +{ + __save_flags<_CharT, _Traits> __lx(__os); + typedef basic_ostream<_CharT, _Traits> _OStream; + __os.flags(_OStream::dec | _OStream::left | _OStream::fixed | + _OStream::scientific); + _CharT __sp = __os.widen(' '); + __os.fill(__sp); + return __os << __x.p(); +} + +template <class _CharT, class _Traits> +basic_istream<_CharT, _Traits>& +operator>>(basic_istream<_CharT, _Traits>& __is, bernoulli_distribution& __x) +{ + typedef bernoulli_distribution _Eng; + typedef typename _Eng::param_type param_type; + __save_flags<_CharT, _Traits> __lx(__is); + typedef basic_istream<_CharT, _Traits> _Istream; + __is.flags(_Istream::dec | _Istream::skipws); + double __p; + __is >> __p; + if (!__is.fail()) + __x.param(param_type(__p)); + return __is; +} + +_LIBCPP_END_NAMESPACE_STD + +_LIBCPP_POP_MACROS + +#endif // _LIBCPP___RANDOM_BERNOULLI_DISTRIBUTION_H diff --git a/libcxx/include/__random/binomial_distribution.h b/libcxx/include/__random/binomial_distribution.h new file mode 100644 index 000000000000..9662de8befd9 --- /dev/null +++ b/libcxx/include/__random/binomial_distribution.h @@ -0,0 +1,225 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___RANDOM_BINOMIAL_DISTRIBUTION_H +#define _LIBCPP___RANDOM_BINOMIAL_DISTRIBUTION_H + +#include <__config> +#include <__random/uniform_real_distribution.h> +#include <cmath> +#include <iosfwd> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + +_LIBCPP_BEGIN_NAMESPACE_STD + +template<class _IntType = int> +class _LIBCPP_TEMPLATE_VIS binomial_distribution +{ +public: + // types + typedef _IntType result_type; + + class _LIBCPP_TEMPLATE_VIS param_type + { + result_type __t_; + double __p_; + double __pr_; + double __odds_ratio_; + result_type __r0_; + public: + typedef binomial_distribution distribution_type; + + explicit param_type(result_type __t = 1, double __p = 0.5); + + _LIBCPP_INLINE_VISIBILITY + result_type t() const {return __t_;} + _LIBCPP_INLINE_VISIBILITY + double p() const {return __p_;} + + friend _LIBCPP_INLINE_VISIBILITY + bool operator==(const param_type& __x, const param_type& __y) + {return __x.__t_ == __y.__t_ && __x.__p_ == __y.__p_;} + friend _LIBCPP_INLINE_VISIBILITY + bool operator!=(const param_type& __x, const param_type& __y) + {return !(__x == __y);} + + friend class binomial_distribution; + }; + +private: + param_type __p_; + +public: + // constructors and reset functions +#ifndef _LIBCPP_CXX03_LANG + _LIBCPP_INLINE_VISIBILITY + binomial_distribution() : binomial_distribution(1) {} + _LIBCPP_INLINE_VISIBILITY + explicit binomial_distribution(result_type __t, double __p = 0.5) + : __p_(param_type(__t, __p)) {} +#else + _LIBCPP_INLINE_VISIBILITY + explicit binomial_distribution(result_type __t = 1, double __p = 0.5) + : __p_(param_type(__t, __p)) {} +#endif + _LIBCPP_INLINE_VISIBILITY + explicit binomial_distribution(const param_type& __p) : __p_(__p) {} + _LIBCPP_INLINE_VISIBILITY + void reset() {} + + // generating functions + template<class _URNG> + _LIBCPP_INLINE_VISIBILITY + result_type operator()(_URNG& __g) + {return (*this)(__g, __p_);} + template<class _URNG> result_type operator()(_URNG& __g, const param_type& __p); + + // property functions + _LIBCPP_INLINE_VISIBILITY + result_type t() const {return __p_.t();} + _LIBCPP_INLINE_VISIBILITY + double p() const {return __p_.p();} + + _LIBCPP_INLINE_VISIBILITY + param_type param() const {return __p_;} + _LIBCPP_INLINE_VISIBILITY + void param(const param_type& __p) {__p_ = __p;} + + _LIBCPP_INLINE_VISIBILITY + result_type min() const {return 0;} + _LIBCPP_INLINE_VISIBILITY + result_type max() const {return t();} + + friend _LIBCPP_INLINE_VISIBILITY + bool operator==(const binomial_distribution& __x, + const binomial_distribution& __y) + {return __x.__p_ == __y.__p_;} + friend _LIBCPP_INLINE_VISIBILITY + bool operator!=(const binomial_distribution& __x, + const binomial_distribution& __y) + {return !(__x == __y);} +}; + +#ifndef _LIBCPP_MSVCRT_LIKE +extern "C" double lgamma_r(double, int *); +#endif + +inline _LIBCPP_INLINE_VISIBILITY double __libcpp_lgamma(double __d) { +#if defined(_LIBCPP_MSVCRT_LIKE) + return lgamma(__d); +#else + int __sign; + return lgamma_r(__d, &__sign); +#endif +} + +template<class _IntType> +binomial_distribution<_IntType>::param_type::param_type(result_type __t, double __p) + : __t_(__t), __p_(__p) +{ + if (0 < __p_ && __p_ < 1) + { + __r0_ = static_cast<result_type>((__t_ + 1) * __p_); + __pr_ = _VSTD::exp(__libcpp_lgamma(__t_ + 1.) - + __libcpp_lgamma(__r0_ + 1.) - + __libcpp_lgamma(__t_ - __r0_ + 1.) + __r0_ * _VSTD::log(__p_) + + (__t_ - __r0_) * _VSTD::log(1 - __p_)); + __odds_ratio_ = __p_ / (1 - __p_); + } +} + +// Reference: Kemp, C.D. (1986). `A modal method for generating binomial +// variables', Commun. Statist. - Theor. Meth. 15(3), 805-813. +template<class _IntType> +template<class _URNG> +_IntType +binomial_distribution<_IntType>::operator()(_URNG& __g, const param_type& __pr) +{ + if (__pr.__t_ == 0 || __pr.__p_ == 0) + return 0; + if (__pr.__p_ == 1) + return __pr.__t_; + uniform_real_distribution<double> __gen; + double __u = __gen(__g) - __pr.__pr_; + if (__u < 0) + return __pr.__r0_; + double __pu = __pr.__pr_; + double __pd = __pu; + result_type __ru = __pr.__r0_; + result_type __rd = __ru; + while (true) + { + bool __break = true; + if (__rd >= 1) + { + __pd *= __rd / (__pr.__odds_ratio_ * (__pr.__t_ - __rd + 1)); + __u -= __pd; + __break = false; + if (__u < 0) + return __rd - 1; + } + if ( __rd != 0 ) + --__rd; + ++__ru; + if (__ru <= __pr.__t_) + { + __pu *= (__pr.__t_ - __ru + 1) * __pr.__odds_ratio_ / __ru; + __u -= __pu; + __break = false; + if (__u < 0) + return __ru; + } + if (__break) + return 0; + } +} + +template <class _CharT, class _Traits, class _IntType> +basic_ostream<_CharT, _Traits>& +operator<<(basic_ostream<_CharT, _Traits>& __os, + const binomial_distribution<_IntType>& __x) +{ + __save_flags<_CharT, _Traits> __lx(__os); + typedef basic_ostream<_CharT, _Traits> _OStream; + __os.flags(_OStream::dec | _OStream::left | _OStream::fixed | + _OStream::scientific); + _CharT __sp = __os.widen(' '); + __os.fill(__sp); + return __os << __x.t() << __sp << __x.p(); +} + +template <class _CharT, class _Traits, class _IntType> +basic_istream<_CharT, _Traits>& +operator>>(basic_istream<_CharT, _Traits>& __is, + binomial_distribution<_IntType>& __x) +{ + typedef binomial_distribution<_IntType> _Eng; + typedef typename _Eng::result_type result_type; + typedef typename _Eng::param_type param_type; + __save_flags<_CharT, _Traits> __lx(__is); + typedef basic_istream<_CharT, _Traits> _Istream; + __is.flags(_Istream::dec | _Istream::skipws); + result_type __t; + double __p; + __is >> __t >> __p; + if (!__is.fail()) + __x.param(param_type(__t, __p)); + return __is; +} + +_LIBCPP_END_NAMESPACE_STD + +_LIBCPP_POP_MACROS + +#endif // _LIBCPP___RANDOM_BINOMIAL_DISTRIBUTION_H diff --git a/libcxx/include/__random/cauchy_distribution.h b/libcxx/include/__random/cauchy_distribution.h new file mode 100644 index 000000000000..6661e00bf939 --- /dev/null +++ b/libcxx/include/__random/cauchy_distribution.h @@ -0,0 +1,162 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___RANDOM_CAUCHY_DISTRIBUTION_H +#define _LIBCPP___RANDOM_CAUCHY_DISTRIBUTION_H + +#include <__config> +#include <__random/uniform_real_distribution.h> +#include <cmath> +#include <iosfwd> +#include <limits> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + +_LIBCPP_BEGIN_NAMESPACE_STD + +template<class _RealType = double> +class _LIBCPP_TEMPLATE_VIS cauchy_distribution +{ +public: + // types + typedef _RealType result_type; + + class _LIBCPP_TEMPLATE_VIS param_type + { + result_type __a_; + result_type __b_; + public: + typedef cauchy_distribution distribution_type; + + _LIBCPP_INLINE_VISIBILITY + explicit param_type(result_type __a = 0, result_type __b = 1) + : __a_(__a), __b_(__b) {} + + _LIBCPP_INLINE_VISIBILITY + result_type a() const {return __a_;} + _LIBCPP_INLINE_VISIBILITY + result_type b() const {return __b_;} + + friend _LIBCPP_INLINE_VISIBILITY + bool operator==(const param_type& __x, const param_type& __y) + {return __x.__a_ == __y.__a_ && __x.__b_ == __y.__b_;} + friend _LIBCPP_INLINE_VISIBILITY + bool operator!=(const param_type& __x, const param_type& __y) + {return !(__x == __y);} + }; + +private: + param_type __p_; + +public: + // constructor and reset functions +#ifndef _LIBCPP_CXX03_LANG + _LIBCPP_INLINE_VISIBILITY + cauchy_distribution() : cauchy_distribution(0) {} + _LIBCPP_INLINE_VISIBILITY + explicit cauchy_distribution(result_type __a, result_type __b = 1) + : __p_(param_type(__a, __b)) {} +#else + _LIBCPP_INLINE_VISIBILITY + explicit cauchy_distribution(result_type __a = 0, result_type __b = 1) + : __p_(param_type(__a, __b)) {} +#endif + _LIBCPP_INLINE_VISIBILITY + explicit cauchy_distribution(const param_type& __p) + : __p_(__p) {} + _LIBCPP_INLINE_VISIBILITY + void reset() {} + + // generating functions + template<class _URNG> + _LIBCPP_INLINE_VISIBILITY + result_type operator()(_URNG& __g) + {return (*this)(__g, __p_);} + template<class _URNG> _LIBCPP_INLINE_VISIBILITY result_type operator()(_URNG& __g, const param_type& __p); + + // property functions + _LIBCPP_INLINE_VISIBILITY + result_type a() const {return __p_.a();} + _LIBCPP_INLINE_VISIBILITY + result_type b() const {return __p_.b();} + + _LIBCPP_INLINE_VISIBILITY + param_type param() const {return __p_;} + _LIBCPP_INLINE_VISIBILITY + void param(const param_type& __p) {__p_ = __p;} + + _LIBCPP_INLINE_VISIBILITY + result_type min() const {return -numeric_limits<result_type>::infinity();} + _LIBCPP_INLINE_VISIBILITY + result_type max() const {return numeric_limits<result_type>::infinity();} + + friend _LIBCPP_INLINE_VISIBILITY + bool operator==(const cauchy_distribution& __x, + const cauchy_distribution& __y) + {return __x.__p_ == __y.__p_;} + friend _LIBCPP_INLINE_VISIBILITY + bool operator!=(const cauchy_distribution& __x, + const cauchy_distribution& __y) + {return !(__x == __y);} +}; + +template <class _RealType> +template<class _URNG> +inline +_RealType +cauchy_distribution<_RealType>::operator()(_URNG& __g, const param_type& __p) +{ + uniform_real_distribution<result_type> __gen; + // purposefully let tan arg get as close to pi/2 as it wants, tan will return a finite + return __p.a() + __p.b() * _VSTD::tan(3.1415926535897932384626433832795 * __gen(__g)); +} + +template <class _CharT, class _Traits, class _RT> +basic_ostream<_CharT, _Traits>& +operator<<(basic_ostream<_CharT, _Traits>& __os, + const cauchy_distribution<_RT>& __x) +{ + __save_flags<_CharT, _Traits> __lx(__os); + typedef basic_ostream<_CharT, _Traits> _OStream; + __os.flags(_OStream::dec | _OStream::left | _OStream::fixed | + _OStream::scientific); + _CharT __sp = __os.widen(' '); + __os.fill(__sp); + __os << __x.a() << __sp << __x.b(); + return __os; +} + +template <class _CharT, class _Traits, class _RT> +basic_istream<_CharT, _Traits>& +operator>>(basic_istream<_CharT, _Traits>& __is, + cauchy_distribution<_RT>& __x) +{ + typedef cauchy_distribution<_RT> _Eng; + typedef typename _Eng::result_type result_type; + typedef typename _Eng::param_type param_type; + __save_flags<_CharT, _Traits> __lx(__is); + typedef basic_istream<_CharT, _Traits> _Istream; + __is.flags(_Istream::dec | _Istream::skipws); + result_type __a; + result_type __b; + __is >> __a >> __b; + if (!__is.fail()) + __x.param(param_type(__a, __b)); + return __is; +} + +_LIBCPP_END_NAMESPACE_STD + +_LIBCPP_POP_MACROS + +#endif // _LIBCPP___RANDOM_CAUCHY_DISTRIBUTION_H diff --git a/libcxx/include/__random/chi_squared_distribution.h b/libcxx/include/__random/chi_squared_distribution.h new file mode 100644 index 000000000000..9cf38971bdde --- /dev/null +++ b/libcxx/include/__random/chi_squared_distribution.h @@ -0,0 +1,144 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___RANDOM_CHI_SQUARED_DISTRIBUTION_H +#define _LIBCPP___RANDOM_CHI_SQUARED_DISTRIBUTION_H + +#include <__config> +#include <__random/gamma_distribution.h> +#include <limits> +#include <iosfwd> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + +_LIBCPP_BEGIN_NAMESPACE_STD + +template<class _RealType = double> +class _LIBCPP_TEMPLATE_VIS chi_squared_distribution +{ +public: + // types + typedef _RealType result_type; + + class _LIBCPP_TEMPLATE_VIS param_type + { + result_type __n_; + public: + typedef chi_squared_distribution distribution_type; + + _LIBCPP_INLINE_VISIBILITY + explicit param_type(result_type __n = 1) : __n_(__n) {} + + _LIBCPP_INLINE_VISIBILITY + result_type n() const {return __n_;} + + friend _LIBCPP_INLINE_VISIBILITY + bool operator==(const param_type& __x, const param_type& __y) + {return __x.__n_ == __y.__n_;} + friend _LIBCPP_INLINE_VISIBILITY + bool operator!=(const param_type& __x, const param_type& __y) + {return !(__x == __y);} + }; + +private: + param_type __p_; + +public: + // constructor and reset functions +#ifndef _LIBCPP_CXX03_LANG + _LIBCPP_INLINE_VISIBILITY + chi_squared_distribution() : chi_squared_distribution(1) {} + _LIBCPP_INLINE_VISIBILITY + explicit chi_squared_distribution(result_type __n) + : __p_(param_type(__n)) {} +#else + _LIBCPP_INLINE_VISIBILITY + explicit chi_squared_distribution(result_type __n = 1) + : __p_(param_type(__n)) {} +#endif + _LIBCPP_INLINE_VISIBILITY + explicit chi_squared_distribution(const param_type& __p) + : __p_(__p) {} + _LIBCPP_INLINE_VISIBILITY + void reset() {} + + // generating functions + template<class _URNG> + _LIBCPP_INLINE_VISIBILITY + result_type operator()(_URNG& __g) + {return (*this)(__g, __p_);} + template<class _URNG> + _LIBCPP_INLINE_VISIBILITY + result_type operator()(_URNG& __g, const param_type& __p) + {return gamma_distribution<result_type>(__p.n() / 2, 2)(__g);} + + // property functions + _LIBCPP_INLINE_VISIBILITY + result_type n() const {return __p_.n();} + + _LIBCPP_INLINE_VISIBILITY + param_type param() const {return __p_;} + _LIBCPP_INLINE_VISIBILITY + void param(const param_type& __p) {__p_ = __p;} + + _LIBCPP_INLINE_VISIBILITY + result_type min() const {return 0;} + _LIBCPP_INLINE_VISIBILITY + result_type max() const {return numeric_limits<result_type>::infinity();} + + friend _LIBCPP_INLINE_VISIBILITY + bool operator==(const chi_squared_distribution& __x, + const chi_squared_distribution& __y) + {return __x.__p_ == __y.__p_;} + friend _LIBCPP_INLINE_VISIBILITY + bool operator!=(const chi_squared_distribution& __x, + const chi_squared_distribution& __y) + {return !(__x == __y);} +}; + +template <class _CharT, class _Traits, class _RT> +basic_ostream<_CharT, _Traits>& +operator<<(basic_ostream<_CharT, _Traits>& __os, + const chi_squared_distribution<_RT>& __x) +{ + __save_flags<_CharT, _Traits> __lx(__os); + typedef basic_ostream<_CharT, _Traits> _OStream; + __os.flags(_OStream::dec | _OStream::left | _OStream::fixed | + _OStream::scientific); + __os << __x.n(); + return __os; +} + +template <class _CharT, class _Traits, class _RT> +basic_istream<_CharT, _Traits>& +operator>>(basic_istream<_CharT, _Traits>& __is, + chi_squared_distribution<_RT>& __x) +{ + typedef chi_squared_distribution<_RT> _Eng; + typedef typename _Eng::result_type result_type; + typedef typename _Eng::param_type param_type; + __save_flags<_CharT, _Traits> __lx(__is); + typedef basic_istream<_CharT, _Traits> _Istream; + __is.flags(_Istream::dec | _Istream::skipws); + result_type __n; + __is >> __n; + if (!__is.fail()) + __x.param(param_type(__n)); + return __is; +} + +_LIBCPP_END_NAMESPACE_STD + +_LIBCPP_POP_MACROS + +#endif // _LIBCPP___RANDOM_CHI_SQUARED_DISTRIBUTION_H diff --git a/libcxx/include/__random/default_random_engine.h b/libcxx/include/__random/default_random_engine.h new file mode 100644 index 000000000000..61c5cf9c7142 --- /dev/null +++ b/libcxx/include/__random/default_random_engine.h @@ -0,0 +1,25 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___RANDOM_DEFAULT_RANDOM_ENGINE_H +#define _LIBCPP___RANDOM_DEFAULT_RANDOM_ENGINE_H + +#include <__config> +#include <__random/linear_congruential_engine.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +typedef minstd_rand default_random_engine; + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___RANDOM_DEFAULT_RANDOM_ENGINE_H diff --git a/libcxx/include/__random/discard_block_engine.h b/libcxx/include/__random/discard_block_engine.h new file mode 100644 index 000000000000..335715211884 --- /dev/null +++ b/libcxx/include/__random/discard_block_engine.h @@ -0,0 +1,203 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___RANDOM_DISCARD_BLOCK_ENGINE_H +#define _LIBCPP___RANDOM_DISCARD_BLOCK_ENGINE_H + +#include <__config> +#include <__random/is_seed_sequence.h> +#include <__utility/move.h> +#include <climits> +#include <iosfwd> +#include <type_traits> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + +_LIBCPP_BEGIN_NAMESPACE_STD + +template<class _Engine, size_t __p, size_t __r> +class _LIBCPP_TEMPLATE_VIS discard_block_engine +{ + _Engine __e_; + int __n_; + + static_assert( 0 < __r, "discard_block_engine invalid parameters"); + static_assert(__r <= __p, "discard_block_engine invalid parameters"); + static_assert(__r <= INT_MAX, "discard_block_engine invalid parameters"); +public: + // types + typedef typename _Engine::result_type result_type; + + // engine characteristics + static _LIBCPP_CONSTEXPR const size_t block_size = __p; + static _LIBCPP_CONSTEXPR const size_t used_block = __r; + +#ifdef _LIBCPP_CXX03_LANG + static const result_type _Min = _Engine::_Min; + static const result_type _Max = _Engine::_Max; +#else + static _LIBCPP_CONSTEXPR const result_type _Min = _Engine::min(); + static _LIBCPP_CONSTEXPR const result_type _Max = _Engine::max(); +#endif + + _LIBCPP_INLINE_VISIBILITY + static _LIBCPP_CONSTEXPR result_type min() { return _Engine::min(); } + _LIBCPP_INLINE_VISIBILITY + static _LIBCPP_CONSTEXPR result_type max() { return _Engine::max(); } + + // constructors and seeding functions + _LIBCPP_INLINE_VISIBILITY + discard_block_engine() : __n_(0) {} + _LIBCPP_INLINE_VISIBILITY + explicit discard_block_engine(const _Engine& __e) + : __e_(__e), __n_(0) {} +#ifndef _LIBCPP_CXX03_LANG + _LIBCPP_INLINE_VISIBILITY + explicit discard_block_engine(_Engine&& __e) + : __e_(_VSTD::move(__e)), __n_(0) {} +#endif // _LIBCPP_CXX03_LANG + _LIBCPP_INLINE_VISIBILITY + explicit discard_block_engine(result_type __sd) : __e_(__sd), __n_(0) {} + template<class _Sseq> + _LIBCPP_INLINE_VISIBILITY + explicit discard_block_engine(_Sseq& __q, + typename enable_if<__is_seed_sequence<_Sseq, discard_block_engine>::value && + !is_convertible<_Sseq, _Engine>::value>::type* = 0) + : __e_(__q), __n_(0) {} + _LIBCPP_INLINE_VISIBILITY + void seed() {__e_.seed(); __n_ = 0;} + _LIBCPP_INLINE_VISIBILITY + void seed(result_type __sd) {__e_.seed(__sd); __n_ = 0;} + template<class _Sseq> + _LIBCPP_INLINE_VISIBILITY + typename enable_if + < + __is_seed_sequence<_Sseq, discard_block_engine>::value, + void + >::type + seed(_Sseq& __q) {__e_.seed(__q); __n_ = 0;} + + // generating functions + result_type operator()(); + _LIBCPP_INLINE_VISIBILITY + void discard(unsigned long long __z) {for (; __z; --__z) operator()();} + + // property functions + _LIBCPP_INLINE_VISIBILITY + const _Engine& base() const _NOEXCEPT {return __e_;} + + template<class _Eng, size_t _Pp, size_t _Rp> + friend + bool + operator==( + const discard_block_engine<_Eng, _Pp, _Rp>& __x, + const discard_block_engine<_Eng, _Pp, _Rp>& __y); + + template<class _Eng, size_t _Pp, size_t _Rp> + friend + bool + operator!=( + const discard_block_engine<_Eng, _Pp, _Rp>& __x, + const discard_block_engine<_Eng, _Pp, _Rp>& __y); + + template <class _CharT, class _Traits, + class _Eng, size_t _Pp, size_t _Rp> + friend + basic_ostream<_CharT, _Traits>& + operator<<(basic_ostream<_CharT, _Traits>& __os, + const discard_block_engine<_Eng, _Pp, _Rp>& __x); + + template <class _CharT, class _Traits, + class _Eng, size_t _Pp, size_t _Rp> + friend + basic_istream<_CharT, _Traits>& + operator>>(basic_istream<_CharT, _Traits>& __is, + discard_block_engine<_Eng, _Pp, _Rp>& __x); +}; + +template<class _Engine, size_t __p, size_t __r> + _LIBCPP_CONSTEXPR const size_t discard_block_engine<_Engine, __p, __r>::block_size; + +template<class _Engine, size_t __p, size_t __r> + _LIBCPP_CONSTEXPR const size_t discard_block_engine<_Engine, __p, __r>::used_block; + +template<class _Engine, size_t __p, size_t __r> +typename discard_block_engine<_Engine, __p, __r>::result_type +discard_block_engine<_Engine, __p, __r>::operator()() +{ + if (__n_ >= static_cast<int>(__r)) + { + __e_.discard(__p - __r); + __n_ = 0; + } + ++__n_; + return __e_(); +} + +template<class _Eng, size_t _Pp, size_t _Rp> +inline _LIBCPP_INLINE_VISIBILITY +bool +operator==(const discard_block_engine<_Eng, _Pp, _Rp>& __x, + const discard_block_engine<_Eng, _Pp, _Rp>& __y) +{ + return __x.__n_ == __y.__n_ && __x.__e_ == __y.__e_; +} + +template<class _Eng, size_t _Pp, size_t _Rp> +inline _LIBCPP_INLINE_VISIBILITY +bool +operator!=(const discard_block_engine<_Eng, _Pp, _Rp>& __x, + const discard_block_engine<_Eng, _Pp, _Rp>& __y) +{ + return !(__x == __y); +} + +template <class _CharT, class _Traits, + class _Eng, size_t _Pp, size_t _Rp> +basic_ostream<_CharT, _Traits>& +operator<<(basic_ostream<_CharT, _Traits>& __os, + const discard_block_engine<_Eng, _Pp, _Rp>& __x) +{ + __save_flags<_CharT, _Traits> __lx(__os); + typedef basic_ostream<_CharT, _Traits> _Ostream; + __os.flags(_Ostream::dec | _Ostream::left); + _CharT __sp = __os.widen(' '); + __os.fill(__sp); + return __os << __x.__e_ << __sp << __x.__n_; +} + +template <class _CharT, class _Traits, + class _Eng, size_t _Pp, size_t _Rp> +basic_istream<_CharT, _Traits>& +operator>>(basic_istream<_CharT, _Traits>& __is, + discard_block_engine<_Eng, _Pp, _Rp>& __x) +{ + __save_flags<_CharT, _Traits> __lx(__is); + typedef basic_istream<_CharT, _Traits> _Istream; + __is.flags(_Istream::dec | _Istream::skipws); + _Eng __e; + int __n; + __is >> __e >> __n; + if (!__is.fail()) + { + __x.__e_ = __e; + __x.__n_ = __n; + } + return __is; +} + +_LIBCPP_END_NAMESPACE_STD + +_LIBCPP_POP_MACROS + +#endif // _LIBCPP___RANDOM_DISCARD_BLOCK_ENGINE_H diff --git a/libcxx/include/__random/discrete_distribution.h b/libcxx/include/__random/discrete_distribution.h new file mode 100644 index 000000000000..dc9881a92c38 --- /dev/null +++ b/libcxx/include/__random/discrete_distribution.h @@ -0,0 +1,260 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___RANDOM_DISCRETE_DISTRIBUTION_H +#define _LIBCPP___RANDOM_DISCRETE_DISTRIBUTION_H + +#include <__algorithm/upper_bound.h> +#include <__config> +#include <__random/uniform_real_distribution.h> +#include <cstddef> +#include <iosfwd> +#include <numeric> +#include <vector> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + +_LIBCPP_BEGIN_NAMESPACE_STD + +template<class _IntType = int> +class _LIBCPP_TEMPLATE_VIS discrete_distribution +{ +public: + // types + typedef _IntType result_type; + + class _LIBCPP_TEMPLATE_VIS param_type + { + vector<double> __p_; + public: + typedef discrete_distribution distribution_type; + + _LIBCPP_INLINE_VISIBILITY + param_type() {} + template<class _InputIterator> + _LIBCPP_INLINE_VISIBILITY + param_type(_InputIterator __f, _InputIterator __l) + : __p_(__f, __l) {__init();} +#ifndef _LIBCPP_CXX03_LANG + _LIBCPP_INLINE_VISIBILITY + param_type(initializer_list<double> __wl) + : __p_(__wl.begin(), __wl.end()) {__init();} +#endif // _LIBCPP_CXX03_LANG + template<class _UnaryOperation> + param_type(size_t __nw, double __xmin, double __xmax, + _UnaryOperation __fw); + + vector<double> probabilities() const; + + friend _LIBCPP_INLINE_VISIBILITY + bool operator==(const param_type& __x, const param_type& __y) + {return __x.__p_ == __y.__p_;} + friend _LIBCPP_INLINE_VISIBILITY + bool operator!=(const param_type& __x, const param_type& __y) + {return !(__x == __y);} + + private: + void __init(); + + friend class discrete_distribution; + + template <class _CharT, class _Traits, class _IT> + friend + basic_ostream<_CharT, _Traits>& + operator<<(basic_ostream<_CharT, _Traits>& __os, + const discrete_distribution<_IT>& __x); + + template <class _CharT, class _Traits, class _IT> + friend + basic_istream<_CharT, _Traits>& + operator>>(basic_istream<_CharT, _Traits>& __is, + discrete_distribution<_IT>& __x); + }; + +private: + param_type __p_; + +public: + // constructor and reset functions + _LIBCPP_INLINE_VISIBILITY + discrete_distribution() {} + template<class _InputIterator> + _LIBCPP_INLINE_VISIBILITY + discrete_distribution(_InputIterator __f, _InputIterator __l) + : __p_(__f, __l) {} +#ifndef _LIBCPP_CXX03_LANG + _LIBCPP_INLINE_VISIBILITY + discrete_distribution(initializer_list<double> __wl) + : __p_(__wl) {} +#endif // _LIBCPP_CXX03_LANG + template<class _UnaryOperation> + _LIBCPP_INLINE_VISIBILITY + discrete_distribution(size_t __nw, double __xmin, double __xmax, + _UnaryOperation __fw) + : __p_(__nw, __xmin, __xmax, __fw) {} + _LIBCPP_INLINE_VISIBILITY + explicit discrete_distribution(const param_type& __p) + : __p_(__p) {} + _LIBCPP_INLINE_VISIBILITY + void reset() {} + + // generating functions + template<class _URNG> + _LIBCPP_INLINE_VISIBILITY + result_type operator()(_URNG& __g) + {return (*this)(__g, __p_);} + template<class _URNG> result_type operator()(_URNG& __g, const param_type& __p); + + // property functions + _LIBCPP_INLINE_VISIBILITY + vector<double> probabilities() const {return __p_.probabilities();} + + _LIBCPP_INLINE_VISIBILITY + param_type param() const {return __p_;} + _LIBCPP_INLINE_VISIBILITY + void param(const param_type& __p) {__p_ = __p;} + + _LIBCPP_INLINE_VISIBILITY + result_type min() const {return 0;} + _LIBCPP_INLINE_VISIBILITY + result_type max() const {return __p_.__p_.size();} + + friend _LIBCPP_INLINE_VISIBILITY + bool operator==(const discrete_distribution& __x, + const discrete_distribution& __y) + {return __x.__p_ == __y.__p_;} + friend _LIBCPP_INLINE_VISIBILITY + bool operator!=(const discrete_distribution& __x, + const discrete_distribution& __y) + {return !(__x == __y);} + + template <class _CharT, class _Traits, class _IT> + friend + basic_ostream<_CharT, _Traits>& + operator<<(basic_ostream<_CharT, _Traits>& __os, + const discrete_distribution<_IT>& __x); + + template <class _CharT, class _Traits, class _IT> + friend + basic_istream<_CharT, _Traits>& + operator>>(basic_istream<_CharT, _Traits>& __is, + discrete_distribution<_IT>& __x); +}; + +template<class _IntType> +template<class _UnaryOperation> +discrete_distribution<_IntType>::param_type::param_type(size_t __nw, + double __xmin, + double __xmax, + _UnaryOperation __fw) +{ + if (__nw > 1) + { + __p_.reserve(__nw - 1); + double __d = (__xmax - __xmin) / __nw; + double __d2 = __d / 2; + for (size_t __k = 0; __k < __nw; ++__k) + __p_.push_back(__fw(__xmin + __k * __d + __d2)); + __init(); + } +} + +template<class _IntType> +void +discrete_distribution<_IntType>::param_type::__init() +{ + if (!__p_.empty()) + { + if (__p_.size() > 1) + { + double __s = _VSTD::accumulate(__p_.begin(), __p_.end(), 0.0); + for (vector<double>::iterator __i = __p_.begin(), __e = __p_.end(); __i < __e; ++__i) + *__i /= __s; + vector<double> __t(__p_.size() - 1); + _VSTD::partial_sum(__p_.begin(), __p_.end() - 1, __t.begin()); + swap(__p_, __t); + } + else + { + __p_.clear(); + __p_.shrink_to_fit(); + } + } +} + +template<class _IntType> +vector<double> +discrete_distribution<_IntType>::param_type::probabilities() const +{ + size_t __n = __p_.size(); + vector<double> __p(__n+1); + _VSTD::adjacent_difference(__p_.begin(), __p_.end(), __p.begin()); + if (__n > 0) + __p[__n] = 1 - __p_[__n-1]; + else + __p[0] = 1; + return __p; +} + +template<class _IntType> +template<class _URNG> +_IntType +discrete_distribution<_IntType>::operator()(_URNG& __g, const param_type& __p) +{ + uniform_real_distribution<double> __gen; + return static_cast<_IntType>( + _VSTD::upper_bound(__p.__p_.begin(), __p.__p_.end(), __gen(__g)) - + __p.__p_.begin()); +} + +template <class _CharT, class _Traits, class _IT> +basic_ostream<_CharT, _Traits>& +operator<<(basic_ostream<_CharT, _Traits>& __os, + const discrete_distribution<_IT>& __x) +{ + __save_flags<_CharT, _Traits> __lx(__os); + typedef basic_ostream<_CharT, _Traits> _OStream; + __os.flags(_OStream::dec | _OStream::left | _OStream::fixed | + _OStream::scientific); + _CharT __sp = __os.widen(' '); + __os.fill(__sp); + size_t __n = __x.__p_.__p_.size(); + __os << __n; + for (size_t __i = 0; __i < __n; ++__i) + __os << __sp << __x.__p_.__p_[__i]; + return __os; +} + +template <class _CharT, class _Traits, class _IT> +basic_istream<_CharT, _Traits>& +operator>>(basic_istream<_CharT, _Traits>& __is, + discrete_distribution<_IT>& __x) +{ + __save_flags<_CharT, _Traits> __lx(__is); + typedef basic_istream<_CharT, _Traits> _Istream; + __is.flags(_Istream::dec | _Istream::skipws); + size_t __n; + __is >> __n; + vector<double> __p(__n); + for (size_t __i = 0; __i < __n; ++__i) + __is >> __p[__i]; + if (!__is.fail()) + swap(__x.__p_.__p_, __p); + return __is; +} + +_LIBCPP_END_NAMESPACE_STD + +_LIBCPP_POP_MACROS + +#endif // _LIBCPP___RANDOM_DISCRETE_DISTRIBUTION_H diff --git a/libcxx/include/__random/exponential_distribution.h b/libcxx/include/__random/exponential_distribution.h new file mode 100644 index 000000000000..9e555f0c1075 --- /dev/null +++ b/libcxx/include/__random/exponential_distribution.h @@ -0,0 +1,155 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___RANDOM_EXPONENTIAL_DISTRIBUTION_H +#define _LIBCPP___RANDOM_EXPONENTIAL_DISTRIBUTION_H + +#include <__config> +#include <__random/generate_canonical.h> +#include <__random/uniform_real_distribution.h> +#include <cmath> +#include <iosfwd> +#include <limits> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + +_LIBCPP_BEGIN_NAMESPACE_STD + +template<class _RealType = double> +class _LIBCPP_TEMPLATE_VIS exponential_distribution +{ +public: + // types + typedef _RealType result_type; + + class _LIBCPP_TEMPLATE_VIS param_type + { + result_type __lambda_; + public: + typedef exponential_distribution distribution_type; + + _LIBCPP_INLINE_VISIBILITY + explicit param_type(result_type __lambda = 1) : __lambda_(__lambda) {} + + _LIBCPP_INLINE_VISIBILITY + result_type lambda() const {return __lambda_;} + + friend _LIBCPP_INLINE_VISIBILITY + bool operator==(const param_type& __x, const param_type& __y) + {return __x.__lambda_ == __y.__lambda_;} + friend _LIBCPP_INLINE_VISIBILITY + bool operator!=(const param_type& __x, const param_type& __y) + {return !(__x == __y);} + }; + +private: + param_type __p_; + +public: + // constructors and reset functions +#ifndef _LIBCPP_CXX03_LANG + _LIBCPP_INLINE_VISIBILITY + exponential_distribution() : exponential_distribution(1) {} + _LIBCPP_INLINE_VISIBILITY + explicit exponential_distribution(result_type __lambda) + : __p_(param_type(__lambda)) {} +#else + _LIBCPP_INLINE_VISIBILITY + explicit exponential_distribution(result_type __lambda = 1) + : __p_(param_type(__lambda)) {} +#endif + _LIBCPP_INLINE_VISIBILITY + explicit exponential_distribution(const param_type& __p) : __p_(__p) {} + _LIBCPP_INLINE_VISIBILITY + void reset() {} + + // generating functions + template<class _URNG> + _LIBCPP_INLINE_VISIBILITY + result_type operator()(_URNG& __g) + {return (*this)(__g, __p_);} + template<class _URNG> result_type operator()(_URNG& __g, const param_type& __p); + + // property functions + _LIBCPP_INLINE_VISIBILITY + result_type lambda() const {return __p_.lambda();} + + _LIBCPP_INLINE_VISIBILITY + param_type param() const {return __p_;} + _LIBCPP_INLINE_VISIBILITY + void param(const param_type& __p) {__p_ = __p;} + + _LIBCPP_INLINE_VISIBILITY + result_type min() const {return 0;} + _LIBCPP_INLINE_VISIBILITY + result_type max() const {return numeric_limits<result_type>::infinity();} + + friend _LIBCPP_INLINE_VISIBILITY + bool operator==(const exponential_distribution& __x, + const exponential_distribution& __y) + {return __x.__p_ == __y.__p_;} + friend _LIBCPP_INLINE_VISIBILITY + bool operator!=(const exponential_distribution& __x, + const exponential_distribution& __y) + {return !(__x == __y);} +}; + +template <class _RealType> +template<class _URNG> +_RealType +exponential_distribution<_RealType>::operator()(_URNG& __g, const param_type& __p) +{ + return -_VSTD::log + ( + result_type(1) - + _VSTD::generate_canonical<result_type, + numeric_limits<result_type>::digits>(__g) + ) + / __p.lambda(); +} + +template <class _CharT, class _Traits, class _RealType> +basic_ostream<_CharT, _Traits>& +operator<<(basic_ostream<_CharT, _Traits>& __os, + const exponential_distribution<_RealType>& __x) +{ + __save_flags<_CharT, _Traits> __lx(__os); + typedef basic_ostream<_CharT, _Traits> _OStream; + __os.flags(_OStream::dec | _OStream::left | _OStream::fixed | + _OStream::scientific); + return __os << __x.lambda(); +} + +template <class _CharT, class _Traits, class _RealType> +basic_istream<_CharT, _Traits>& +operator>>(basic_istream<_CharT, _Traits>& __is, + exponential_distribution<_RealType>& __x) +{ + typedef exponential_distribution<_RealType> _Eng; + typedef typename _Eng::result_type result_type; + typedef typename _Eng::param_type param_type; + __save_flags<_CharT, _Traits> __lx(__is); + typedef basic_istream<_CharT, _Traits> _Istream; + __is.flags(_Istream::dec | _Istream::skipws); + result_type __lambda; + __is >> __lambda; + if (!__is.fail()) + __x.param(param_type(__lambda)); + return __is; +} + +_LIBCPP_END_NAMESPACE_STD + +_LIBCPP_POP_MACROS + +#endif // _LIBCPP___RANDOM_EXPONENTIAL_DISTRIBUTION_H diff --git a/libcxx/include/__random/extreme_value_distribution.h b/libcxx/include/__random/extreme_value_distribution.h new file mode 100644 index 000000000000..0e200f91d7ff --- /dev/null +++ b/libcxx/include/__random/extreme_value_distribution.h @@ -0,0 +1,161 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___RANDOM_EXTREME_VALUE_DISTRIBUTION_H +#define _LIBCPP___RANDOM_EXTREME_VALUE_DISTRIBUTION_H + +#include <__config> +#include <__random/uniform_real_distribution.h> +#include <cmath> +#include <iosfwd> +#include <limits> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + +_LIBCPP_BEGIN_NAMESPACE_STD + +template<class _RealType = double> +class _LIBCPP_TEMPLATE_VIS extreme_value_distribution +{ +public: + // types + typedef _RealType result_type; + + class _LIBCPP_TEMPLATE_VIS param_type + { + result_type __a_; + result_type __b_; + public: + typedef extreme_value_distribution distribution_type; + + _LIBCPP_INLINE_VISIBILITY + explicit param_type(result_type __a = 0, result_type __b = 1) + : __a_(__a), __b_(__b) {} + + _LIBCPP_INLINE_VISIBILITY + result_type a() const {return __a_;} + _LIBCPP_INLINE_VISIBILITY + result_type b() const {return __b_;} + + friend _LIBCPP_INLINE_VISIBILITY + bool operator==(const param_type& __x, const param_type& __y) + {return __x.__a_ == __y.__a_ && __x.__b_ == __y.__b_;} + friend _LIBCPP_INLINE_VISIBILITY + bool operator!=(const param_type& __x, const param_type& __y) + {return !(__x == __y);} + }; + +private: + param_type __p_; + +public: + // constructor and reset functions +#ifndef _LIBCPP_CXX03_LANG + _LIBCPP_INLINE_VISIBILITY + extreme_value_distribution() : extreme_value_distribution(0) {} + _LIBCPP_INLINE_VISIBILITY + explicit extreme_value_distribution(result_type __a, result_type __b = 1) + : __p_(param_type(__a, __b)) {} +#else + _LIBCPP_INLINE_VISIBILITY + explicit extreme_value_distribution(result_type __a = 0, + result_type __b = 1) + : __p_(param_type(__a, __b)) {} +#endif + _LIBCPP_INLINE_VISIBILITY + explicit extreme_value_distribution(const param_type& __p) + : __p_(__p) {} + _LIBCPP_INLINE_VISIBILITY + void reset() {} + + // generating functions + template<class _URNG> + _LIBCPP_INLINE_VISIBILITY + result_type operator()(_URNG& __g) + {return (*this)(__g, __p_);} + template<class _URNG> result_type operator()(_URNG& __g, const param_type& __p); + + // property functions + _LIBCPP_INLINE_VISIBILITY + result_type a() const {return __p_.a();} + _LIBCPP_INLINE_VISIBILITY + result_type b() const {return __p_.b();} + + _LIBCPP_INLINE_VISIBILITY + param_type param() const {return __p_;} + _LIBCPP_INLINE_VISIBILITY + void param(const param_type& __p) {__p_ = __p;} + + _LIBCPP_INLINE_VISIBILITY + result_type min() const {return -numeric_limits<result_type>::infinity();} + _LIBCPP_INLINE_VISIBILITY + result_type max() const {return numeric_limits<result_type>::infinity();} + + friend _LIBCPP_INLINE_VISIBILITY + bool operator==(const extreme_value_distribution& __x, + const extreme_value_distribution& __y) + {return __x.__p_ == __y.__p_;} + friend _LIBCPP_INLINE_VISIBILITY + bool operator!=(const extreme_value_distribution& __x, + const extreme_value_distribution& __y) + {return !(__x == __y);} +}; + +template<class _RealType> +template<class _URNG> +_RealType +extreme_value_distribution<_RealType>::operator()(_URNG& __g, const param_type& __p) +{ + return __p.a() - __p.b() * + _VSTD::log(-_VSTD::log(1-uniform_real_distribution<result_type>()(__g))); +} + +template <class _CharT, class _Traits, class _RT> +basic_ostream<_CharT, _Traits>& +operator<<(basic_ostream<_CharT, _Traits>& __os, + const extreme_value_distribution<_RT>& __x) +{ + __save_flags<_CharT, _Traits> __lx(__os); + typedef basic_ostream<_CharT, _Traits> _OStream; + __os.flags(_OStream::dec | _OStream::left | _OStream::fixed | + _OStream::scientific); + _CharT __sp = __os.widen(' '); + __os.fill(__sp); + __os << __x.a() << __sp << __x.b(); + return __os; +} + +template <class _CharT, class _Traits, class _RT> +basic_istream<_CharT, _Traits>& +operator>>(basic_istream<_CharT, _Traits>& __is, + extreme_value_distribution<_RT>& __x) +{ + typedef extreme_value_distribution<_RT> _Eng; + typedef typename _Eng::result_type result_type; + typedef typename _Eng::param_type param_type; + __save_flags<_CharT, _Traits> __lx(__is); + typedef basic_istream<_CharT, _Traits> _Istream; + __is.flags(_Istream::dec | _Istream::skipws); + result_type __a; + result_type __b; + __is >> __a >> __b; + if (!__is.fail()) + __x.param(param_type(__a, __b)); + return __is; +} + +_LIBCPP_END_NAMESPACE_STD + +_LIBCPP_POP_MACROS + +#endif // _LIBCPP___RANDOM_EXTREME_VALUE_DISTRIBUTION_H diff --git a/libcxx/include/__random/fisher_f_distribution.h b/libcxx/include/__random/fisher_f_distribution.h new file mode 100644 index 000000000000..bf64d33a645a --- /dev/null +++ b/libcxx/include/__random/fisher_f_distribution.h @@ -0,0 +1,160 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___RANDOM_FISHER_F_DISTRIBUTION_H +#define _LIBCPP___RANDOM_FISHER_F_DISTRIBUTION_H + +#include <__config> +#include <__random/gamma_distribution.h> +#include <iosfwd> +#include <limits> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + +_LIBCPP_BEGIN_NAMESPACE_STD + +template<class _RealType = double> +class _LIBCPP_TEMPLATE_VIS fisher_f_distribution +{ +public: + // types + typedef _RealType result_type; + + class _LIBCPP_TEMPLATE_VIS param_type + { + result_type __m_; + result_type __n_; + public: + typedef fisher_f_distribution distribution_type; + + _LIBCPP_INLINE_VISIBILITY + explicit param_type(result_type __m = 1, result_type __n = 1) + : __m_(__m), __n_(__n) {} + + _LIBCPP_INLINE_VISIBILITY + result_type m() const {return __m_;} + _LIBCPP_INLINE_VISIBILITY + result_type n() const {return __n_;} + + friend _LIBCPP_INLINE_VISIBILITY + bool operator==(const param_type& __x, const param_type& __y) + {return __x.__m_ == __y.__m_ && __x.__n_ == __y.__n_;} + friend _LIBCPP_INLINE_VISIBILITY + bool operator!=(const param_type& __x, const param_type& __y) + {return !(__x == __y);} + }; + +private: + param_type __p_; + +public: + // constructor and reset functions +#ifndef _LIBCPP_CXX03_LANG + _LIBCPP_INLINE_VISIBILITY + fisher_f_distribution() : fisher_f_distribution(1) {} + _LIBCPP_INLINE_VISIBILITY + explicit fisher_f_distribution(result_type __m, result_type __n = 1) + : __p_(param_type(__m, __n)) {} +#else + _LIBCPP_INLINE_VISIBILITY + explicit fisher_f_distribution(result_type __m = 1, result_type __n = 1) + : __p_(param_type(__m, __n)) {} +#endif + _LIBCPP_INLINE_VISIBILITY + explicit fisher_f_distribution(const param_type& __p) + : __p_(__p) {} + _LIBCPP_INLINE_VISIBILITY + void reset() {} + + // generating functions + template<class _URNG> + _LIBCPP_INLINE_VISIBILITY + result_type operator()(_URNG& __g) + {return (*this)(__g, __p_);} + template<class _URNG> result_type operator()(_URNG& __g, const param_type& __p); + + // property functions + _LIBCPP_INLINE_VISIBILITY + result_type m() const {return __p_.m();} + _LIBCPP_INLINE_VISIBILITY + result_type n() const {return __p_.n();} + + _LIBCPP_INLINE_VISIBILITY + param_type param() const {return __p_;} + _LIBCPP_INLINE_VISIBILITY + void param(const param_type& __p) {__p_ = __p;} + + _LIBCPP_INLINE_VISIBILITY + result_type min() const {return 0;} + _LIBCPP_INLINE_VISIBILITY + result_type max() const {return numeric_limits<result_type>::infinity();} + + friend _LIBCPP_INLINE_VISIBILITY + bool operator==(const fisher_f_distribution& __x, + const fisher_f_distribution& __y) + {return __x.__p_ == __y.__p_;} + friend _LIBCPP_INLINE_VISIBILITY + bool operator!=(const fisher_f_distribution& __x, + const fisher_f_distribution& __y) + {return !(__x == __y);} +}; + +template <class _RealType> +template<class _URNG> +_RealType +fisher_f_distribution<_RealType>::operator()(_URNG& __g, const param_type& __p) +{ + gamma_distribution<result_type> __gdm(__p.m() * result_type(.5)); + gamma_distribution<result_type> __gdn(__p.n() * result_type(.5)); + return __p.n() * __gdm(__g) / (__p.m() * __gdn(__g)); +} + +template <class _CharT, class _Traits, class _RT> +basic_ostream<_CharT, _Traits>& +operator<<(basic_ostream<_CharT, _Traits>& __os, + const fisher_f_distribution<_RT>& __x) +{ + __save_flags<_CharT, _Traits> __lx(__os); + typedef basic_ostream<_CharT, _Traits> _OStream; + __os.flags(_OStream::dec | _OStream::left | _OStream::fixed | + _OStream::scientific); + _CharT __sp = __os.widen(' '); + __os.fill(__sp); + __os << __x.m() << __sp << __x.n(); + return __os; +} + +template <class _CharT, class _Traits, class _RT> +basic_istream<_CharT, _Traits>& +operator>>(basic_istream<_CharT, _Traits>& __is, + fisher_f_distribution<_RT>& __x) +{ + typedef fisher_f_distribution<_RT> _Eng; + typedef typename _Eng::result_type result_type; + typedef typename _Eng::param_type param_type; + __save_flags<_CharT, _Traits> __lx(__is); + typedef basic_istream<_CharT, _Traits> _Istream; + __is.flags(_Istream::dec | _Istream::skipws); + result_type __m; + result_type __n; + __is >> __m >> __n; + if (!__is.fail()) + __x.param(param_type(__m, __n)); + return __is; +} + +_LIBCPP_END_NAMESPACE_STD + +_LIBCPP_POP_MACROS + +#endif // _LIBCPP___RANDOM_FISHER_F_DISTRIBUTION_H diff --git a/libcxx/include/__random/gamma_distribution.h b/libcxx/include/__random/gamma_distribution.h new file mode 100644 index 000000000000..49d024eafea2 --- /dev/null +++ b/libcxx/include/__random/gamma_distribution.h @@ -0,0 +1,213 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___RANDOM_GAMMA_DISTRIBUTION_H +#define _LIBCPP___RANDOM_GAMMA_DISTRIBUTION_H + +#include <__config> +#include <__random/uniform_real_distribution.h> +#include <__random/exponential_distribution.h> +#include <cmath> +#include <iosfwd> +#include <limits> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + +_LIBCPP_BEGIN_NAMESPACE_STD + +template<class _RealType = double> +class _LIBCPP_TEMPLATE_VIS gamma_distribution +{ +public: + // types + typedef _RealType result_type; + + class _LIBCPP_TEMPLATE_VIS param_type + { + result_type __alpha_; + result_type __beta_; + public: + typedef gamma_distribution distribution_type; + + _LIBCPP_INLINE_VISIBILITY + explicit param_type(result_type __alpha = 1, result_type __beta = 1) + : __alpha_(__alpha), __beta_(__beta) {} + + _LIBCPP_INLINE_VISIBILITY + result_type alpha() const {return __alpha_;} + _LIBCPP_INLINE_VISIBILITY + result_type beta() const {return __beta_;} + + friend _LIBCPP_INLINE_VISIBILITY + bool operator==(const param_type& __x, const param_type& __y) + {return __x.__alpha_ == __y.__alpha_ && __x.__beta_ == __y.__beta_;} + friend _LIBCPP_INLINE_VISIBILITY + bool operator!=(const param_type& __x, const param_type& __y) + {return !(__x == __y);} + }; + +private: + param_type __p_; + +public: + // constructors and reset functions +#ifndef _LIBCPP_CXX03_LANG + _LIBCPP_INLINE_VISIBILITY + gamma_distribution() : gamma_distribution(1) {} + _LIBCPP_INLINE_VISIBILITY + explicit gamma_distribution(result_type __alpha, result_type __beta = 1) + : __p_(param_type(__alpha, __beta)) {} +#else + _LIBCPP_INLINE_VISIBILITY + explicit gamma_distribution(result_type __alpha = 1, + result_type __beta = 1) + : __p_(param_type(__alpha, __beta)) {} +#endif + _LIBCPP_INLINE_VISIBILITY + explicit gamma_distribution(const param_type& __p) + : __p_(__p) {} + _LIBCPP_INLINE_VISIBILITY + void reset() {} + + // generating functions + template<class _URNG> + _LIBCPP_INLINE_VISIBILITY + result_type operator()(_URNG& __g) + {return (*this)(__g, __p_);} + template<class _URNG> result_type operator()(_URNG& __g, const param_type& __p); + + // property functions + _LIBCPP_INLINE_VISIBILITY + result_type alpha() const {return __p_.alpha();} + _LIBCPP_INLINE_VISIBILITY + result_type beta() const {return __p_.beta();} + + _LIBCPP_INLINE_VISIBILITY + param_type param() const {return __p_;} + _LIBCPP_INLINE_VISIBILITY + void param(const param_type& __p) {__p_ = __p;} + + _LIBCPP_INLINE_VISIBILITY + result_type min() const {return 0;} + _LIBCPP_INLINE_VISIBILITY + result_type max() const {return numeric_limits<result_type>::infinity();} + + friend _LIBCPP_INLINE_VISIBILITY + bool operator==(const gamma_distribution& __x, + const gamma_distribution& __y) + {return __x.__p_ == __y.__p_;} + friend _LIBCPP_INLINE_VISIBILITY + bool operator!=(const gamma_distribution& __x, + const gamma_distribution& __y) + {return !(__x == __y);} +}; + +template <class _RealType> +template<class _URNG> +_RealType +gamma_distribution<_RealType>::operator()(_URNG& __g, const param_type& __p) +{ + result_type __a = __p.alpha(); + uniform_real_distribution<result_type> __gen(0, 1); + exponential_distribution<result_type> __egen; + result_type __x; + if (__a == 1) + __x = __egen(__g); + else if (__a > 1) + { + const result_type __b = __a - 1; + const result_type __c = 3 * __a - result_type(0.75); + while (true) + { + const result_type __u = __gen(__g); + const result_type __v = __gen(__g); + const result_type __w = __u * (1 - __u); + if (__w != 0) + { + const result_type __y = _VSTD::sqrt(__c / __w) * + (__u - result_type(0.5)); + __x = __b + __y; + if (__x >= 0) + { + const result_type __z = 64 * __w * __w * __w * __v * __v; + if (__z <= 1 - 2 * __y * __y / __x) + break; + if (_VSTD::log(__z) <= 2 * (__b * _VSTD::log(__x / __b) - __y)) + break; + } + } + } + } + else // __a < 1 + { + while (true) + { + const result_type __u = __gen(__g); + const result_type __es = __egen(__g); + if (__u <= 1 - __a) + { + __x = _VSTD::pow(__u, 1 / __a); + if (__x <= __es) + break; + } + else + { + const result_type __e = -_VSTD::log((1-__u)/__a); + __x = _VSTD::pow(1 - __a + __a * __e, 1 / __a); + if (__x <= __e + __es) + break; + } + } + } + return __x * __p.beta(); +} + +template <class _CharT, class _Traits, class _RT> +basic_ostream<_CharT, _Traits>& +operator<<(basic_ostream<_CharT, _Traits>& __os, + const gamma_distribution<_RT>& __x) +{ + __save_flags<_CharT, _Traits> __lx(__os); + typedef basic_ostream<_CharT, _Traits> _OStream; + __os.flags(_OStream::dec | _OStream::left | _OStream::fixed | + _OStream::scientific); + _CharT __sp = __os.widen(' '); + __os.fill(__sp); + __os << __x.alpha() << __sp << __x.beta(); + return __os; +} + +template <class _CharT, class _Traits, class _RT> +basic_istream<_CharT, _Traits>& +operator>>(basic_istream<_CharT, _Traits>& __is, + gamma_distribution<_RT>& __x) +{ + typedef gamma_distribution<_RT> _Eng; + typedef typename _Eng::result_type result_type; + typedef typename _Eng::param_type param_type; + __save_flags<_CharT, _Traits> __lx(__is); + typedef basic_istream<_CharT, _Traits> _Istream; + __is.flags(_Istream::dec | _Istream::skipws); + result_type __alpha; + result_type __beta; + __is >> __alpha >> __beta; + if (!__is.fail()) + __x.param(param_type(__alpha, __beta)); + return __is; +} + +_LIBCPP_END_NAMESPACE_STD + +_LIBCPP_POP_MACROS + +#endif // _LIBCPP___RANDOM_GAMMA_DISTRIBUTION_H diff --git a/libcxx/include/__random/generate_canonical.h b/libcxx/include/__random/generate_canonical.h new file mode 100644 index 000000000000..46c3b2980952 --- /dev/null +++ b/libcxx/include/__random/generate_canonical.h @@ -0,0 +1,53 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___RANDOM_GENERATE_CANONICAL_H +#define _LIBCPP___RANDOM_GENERATE_CANONICAL_H + +#include <__config> +#include <__random/log2.h> +#include <cstdint> +#include <initializer_list> +#include <limits> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + +_LIBCPP_BEGIN_NAMESPACE_STD + +// generate_canonical + +template<class _RealType, size_t __bits, class _URNG> +_RealType +generate_canonical(_URNG& __g) +{ + const size_t _Dt = numeric_limits<_RealType>::digits; + const size_t __b = _Dt < __bits ? _Dt : __bits; +#ifdef _LIBCPP_CXX03_LANG + const size_t __logR = __log2<uint64_t, _URNG::_Max - _URNG::_Min + uint64_t(1)>::value; +#else + const size_t __logR = __log2<uint64_t, _URNG::max() - _URNG::min() + uint64_t(1)>::value; +#endif + const size_t __k = __b / __logR + (__b % __logR != 0) + (__b == 0); + const _RealType _Rp = static_cast<_RealType>(_URNG::max() - _URNG::min()) + _RealType(1); + _RealType __base = _Rp; + _RealType _Sp = __g() - _URNG::min(); + for (size_t __i = 1; __i < __k; ++__i, __base *= _Rp) + _Sp += (__g() - _URNG::min()) * __base; + return _Sp / __base; +} + +_LIBCPP_END_NAMESPACE_STD + +_LIBCPP_POP_MACROS + +#endif // _LIBCPP___RANDOM_GENERATE_CANONICAL_H diff --git a/libcxx/include/__random/geometric_distribution.h b/libcxx/include/__random/geometric_distribution.h new file mode 100644 index 000000000000..174914eaed2e --- /dev/null +++ b/libcxx/include/__random/geometric_distribution.h @@ -0,0 +1,141 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___RANDOM_GEOMETRIC_DISTRIBUTION_H +#define _LIBCPP___RANDOM_GEOMETRIC_DISTRIBUTION_H + +#include <__config> +#include <__random/negative_binomial_distribution.h> +#include <iosfwd> +#include <limits> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + +_LIBCPP_BEGIN_NAMESPACE_STD + +template<class _IntType = int> +class _LIBCPP_TEMPLATE_VIS geometric_distribution +{ +public: + // types + typedef _IntType result_type; + + class _LIBCPP_TEMPLATE_VIS param_type + { + double __p_; + public: + typedef geometric_distribution distribution_type; + + _LIBCPP_INLINE_VISIBILITY + explicit param_type(double __p = 0.5) : __p_(__p) {} + + _LIBCPP_INLINE_VISIBILITY + double p() const {return __p_;} + + friend _LIBCPP_INLINE_VISIBILITY + bool operator==(const param_type& __x, const param_type& __y) + {return __x.__p_ == __y.__p_;} + friend _LIBCPP_INLINE_VISIBILITY + bool operator!=(const param_type& __x, const param_type& __y) + {return !(__x == __y);} + }; + +private: + param_type __p_; + +public: + // constructors and reset functions +#ifndef _LIBCPP_CXX03_LANG + _LIBCPP_INLINE_VISIBILITY + geometric_distribution() : geometric_distribution(0.5) {} + _LIBCPP_INLINE_VISIBILITY + explicit geometric_distribution(double __p) + : __p_(__p) {} +#else + _LIBCPP_INLINE_VISIBILITY + explicit geometric_distribution(double __p = 0.5) + : __p_(__p) {} +#endif + _LIBCPP_INLINE_VISIBILITY + explicit geometric_distribution(const param_type& __p) : __p_(__p) {} + _LIBCPP_INLINE_VISIBILITY + void reset() {} + + // generating functions + template<class _URNG> + _LIBCPP_INLINE_VISIBILITY + result_type operator()(_URNG& __g) + {return (*this)(__g, __p_);} + template<class _URNG> + _LIBCPP_INLINE_VISIBILITY + result_type operator()(_URNG& __g, const param_type& __p) + {return negative_binomial_distribution<result_type>(1, __p.p())(__g);} + + // property functions + _LIBCPP_INLINE_VISIBILITY + double p() const {return __p_.p();} + + _LIBCPP_INLINE_VISIBILITY + param_type param() const {return __p_;} + _LIBCPP_INLINE_VISIBILITY + void param(const param_type& __p) {__p_ = __p;} + + _LIBCPP_INLINE_VISIBILITY + result_type min() const {return 0;} + _LIBCPP_INLINE_VISIBILITY + result_type max() const {return numeric_limits<result_type>::max();} + + friend _LIBCPP_INLINE_VISIBILITY + bool operator==(const geometric_distribution& __x, + const geometric_distribution& __y) + {return __x.__p_ == __y.__p_;} + friend _LIBCPP_INLINE_VISIBILITY + bool operator!=(const geometric_distribution& __x, + const geometric_distribution& __y) + {return !(__x == __y);} +}; + +template <class _CharT, class _Traits, class _IntType> +basic_ostream<_CharT, _Traits>& +operator<<(basic_ostream<_CharT, _Traits>& __os, + const geometric_distribution<_IntType>& __x) +{ + __save_flags<_CharT, _Traits> __lx(__os); + typedef basic_ostream<_CharT, _Traits> _OStream; + __os.flags(_OStream::dec | _OStream::left | _OStream::fixed | + _OStream::scientific); + return __os << __x.p(); +} + +template <class _CharT, class _Traits, class _IntType> +basic_istream<_CharT, _Traits>& +operator>>(basic_istream<_CharT, _Traits>& __is, + geometric_distribution<_IntType>& __x) +{ + typedef geometric_distribution<_IntType> _Eng; + typedef typename _Eng::param_type param_type; + __save_flags<_CharT, _Traits> __lx(__is); + typedef basic_istream<_CharT, _Traits> _Istream; + __is.flags(_Istream::dec | _Istream::skipws); + double __p; + __is >> __p; + if (!__is.fail()) + __x.param(param_type(__p)); + return __is; +} + +_LIBCPP_END_NAMESPACE_STD + +_LIBCPP_POP_MACROS + +#endif // _LIBCPP___RANDOM_GEOMETRIC_DISTRIBUTION_H diff --git a/libcxx/include/__random/independent_bits_engine.h b/libcxx/include/__random/independent_bits_engine.h new file mode 100644 index 000000000000..f0e8c654246b --- /dev/null +++ b/libcxx/include/__random/independent_bits_engine.h @@ -0,0 +1,271 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___RANDOM_INDEPENDENT_BITS_ENGINE_H +#define _LIBCPP___RANDOM_INDEPENDENT_BITS_ENGINE_H + +#include <__config> +#include <__random/is_seed_sequence.h> +#include <__random/log2.h> +#include <__utility/move.h> +#include <iosfwd> +#include <limits> +#include <type_traits> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + +_LIBCPP_BEGIN_NAMESPACE_STD + +template<class _Engine, size_t __w, class _UIntType> +class _LIBCPP_TEMPLATE_VIS independent_bits_engine +{ + template <class _UInt, _UInt _R0, size_t _Wp, size_t _Mp> + class __get_n + { + static _LIBCPP_CONSTEXPR const size_t _Dt = numeric_limits<_UInt>::digits; + static _LIBCPP_CONSTEXPR const size_t _Np = _Wp / _Mp + (_Wp % _Mp != 0); + static _LIBCPP_CONSTEXPR const size_t _W0 = _Wp / _Np; + static _LIBCPP_CONSTEXPR const _UInt _Y0 = _W0 >= _Dt ? 0 : (_R0 >> _W0) << _W0; + public: + static _LIBCPP_CONSTEXPR const size_t value = _R0 - _Y0 > _Y0 / _Np ? _Np + 1 : _Np; + }; +public: + // types + typedef _UIntType result_type; + +private: + _Engine __e_; + + static _LIBCPP_CONSTEXPR const result_type _Dt = numeric_limits<result_type>::digits; + static_assert( 0 < __w, "independent_bits_engine invalid parameters"); + static_assert(__w <= _Dt, "independent_bits_engine invalid parameters"); + + typedef typename _Engine::result_type _Engine_result_type; + typedef typename conditional + < + sizeof(_Engine_result_type) <= sizeof(result_type), + result_type, + _Engine_result_type + >::type _Working_result_type; +#ifdef _LIBCPP_CXX03_LANG + static const _Working_result_type _Rp = _Engine::_Max - _Engine::_Min + + _Working_result_type(1); +#else + static _LIBCPP_CONSTEXPR const _Working_result_type _Rp = _Engine::max() - _Engine::min() + + _Working_result_type(1); +#endif + static _LIBCPP_CONSTEXPR const size_t __m = __log2<_Working_result_type, _Rp>::value; + static _LIBCPP_CONSTEXPR const size_t __n = __get_n<_Working_result_type, _Rp, __w, __m>::value; + static _LIBCPP_CONSTEXPR const size_t __w0 = __w / __n; + static _LIBCPP_CONSTEXPR const size_t __n0 = __n - __w % __n; + static _LIBCPP_CONSTEXPR const size_t _WDt = numeric_limits<_Working_result_type>::digits; + static _LIBCPP_CONSTEXPR const size_t _EDt = numeric_limits<_Engine_result_type>::digits; + static _LIBCPP_CONSTEXPR const _Working_result_type __y0 = __w0 >= _WDt ? 0 : + (_Rp >> __w0) << __w0; + static _LIBCPP_CONSTEXPR const _Working_result_type __y1 = __w0 >= _WDt - 1 ? 0 : + (_Rp >> (__w0+1)) << (__w0+1); + static _LIBCPP_CONSTEXPR const _Engine_result_type __mask0 = __w0 > 0 ? + _Engine_result_type(~0) >> (_EDt - __w0) : + _Engine_result_type(0); + static _LIBCPP_CONSTEXPR const _Engine_result_type __mask1 = __w0 < _EDt - 1 ? + _Engine_result_type(~0) >> (_EDt - (__w0 + 1)) : + _Engine_result_type(~0); +public: + static _LIBCPP_CONSTEXPR const result_type _Min = 0; + static _LIBCPP_CONSTEXPR const result_type _Max = __w == _Dt ? result_type(~0) : + (result_type(1) << __w) - result_type(1); + static_assert(_Min < _Max, "independent_bits_engine invalid parameters"); + + // engine characteristics + _LIBCPP_INLINE_VISIBILITY + static _LIBCPP_CONSTEXPR result_type min() { return _Min; } + _LIBCPP_INLINE_VISIBILITY + static _LIBCPP_CONSTEXPR result_type max() { return _Max; } + + // constructors and seeding functions + _LIBCPP_INLINE_VISIBILITY + independent_bits_engine() {} + _LIBCPP_INLINE_VISIBILITY + explicit independent_bits_engine(const _Engine& __e) + : __e_(__e) {} +#ifndef _LIBCPP_CXX03_LANG + _LIBCPP_INLINE_VISIBILITY + explicit independent_bits_engine(_Engine&& __e) + : __e_(_VSTD::move(__e)) {} +#endif // _LIBCPP_CXX03_LANG + _LIBCPP_INLINE_VISIBILITY + explicit independent_bits_engine(result_type __sd) : __e_(__sd) {} + template<class _Sseq> + _LIBCPP_INLINE_VISIBILITY + explicit independent_bits_engine(_Sseq& __q, + typename enable_if<__is_seed_sequence<_Sseq, independent_bits_engine>::value && + !is_convertible<_Sseq, _Engine>::value>::type* = 0) + : __e_(__q) {} + _LIBCPP_INLINE_VISIBILITY + void seed() {__e_.seed();} + _LIBCPP_INLINE_VISIBILITY + void seed(result_type __sd) {__e_.seed(__sd);} + template<class _Sseq> + _LIBCPP_INLINE_VISIBILITY + typename enable_if + < + __is_seed_sequence<_Sseq, independent_bits_engine>::value, + void + >::type + seed(_Sseq& __q) {__e_.seed(__q);} + + // generating functions + _LIBCPP_INLINE_VISIBILITY + result_type operator()() {return __eval(integral_constant<bool, _Rp != 0>());} + _LIBCPP_INLINE_VISIBILITY + void discard(unsigned long long __z) {for (; __z; --__z) operator()();} + + // property functions + _LIBCPP_INLINE_VISIBILITY + const _Engine& base() const _NOEXCEPT {return __e_;} + + template<class _Eng, size_t _Wp, class _UInt> + friend + bool + operator==( + const independent_bits_engine<_Eng, _Wp, _UInt>& __x, + const independent_bits_engine<_Eng, _Wp, _UInt>& __y); + + template<class _Eng, size_t _Wp, class _UInt> + friend + bool + operator!=( + const independent_bits_engine<_Eng, _Wp, _UInt>& __x, + const independent_bits_engine<_Eng, _Wp, _UInt>& __y); + + template <class _CharT, class _Traits, + class _Eng, size_t _Wp, class _UInt> + friend + basic_ostream<_CharT, _Traits>& + operator<<(basic_ostream<_CharT, _Traits>& __os, + const independent_bits_engine<_Eng, _Wp, _UInt>& __x); + + template <class _CharT, class _Traits, + class _Eng, size_t _Wp, class _UInt> + friend + basic_istream<_CharT, _Traits>& + operator>>(basic_istream<_CharT, _Traits>& __is, + independent_bits_engine<_Eng, _Wp, _UInt>& __x); + +private: + _LIBCPP_INLINE_VISIBILITY + result_type __eval(false_type); + result_type __eval(true_type); + + template <size_t __count> + _LIBCPP_INLINE_VISIBILITY + static + typename enable_if + < + __count < _Dt, + result_type + >::type + __lshift(result_type __x) {return __x << __count;} + + template <size_t __count> + _LIBCPP_INLINE_VISIBILITY + static + typename enable_if + < + (__count >= _Dt), + result_type + >::type + __lshift(result_type) {return result_type(0);} +}; + +template<class _Engine, size_t __w, class _UIntType> +inline +_UIntType +independent_bits_engine<_Engine, __w, _UIntType>::__eval(false_type) +{ + return static_cast<result_type>(__e_() & __mask0); +} + +template<class _Engine, size_t __w, class _UIntType> +_UIntType +independent_bits_engine<_Engine, __w, _UIntType>::__eval(true_type) +{ + result_type _Sp = 0; + for (size_t __k = 0; __k < __n0; ++__k) + { + _Engine_result_type __u; + do + { + __u = __e_() - _Engine::min(); + } while (__u >= __y0); + _Sp = static_cast<result_type>(__lshift<__w0>(_Sp) + (__u & __mask0)); + } + for (size_t __k = __n0; __k < __n; ++__k) + { + _Engine_result_type __u; + do + { + __u = __e_() - _Engine::min(); + } while (__u >= __y1); + _Sp = static_cast<result_type>(__lshift<__w0+1>(_Sp) + (__u & __mask1)); + } + return _Sp; +} + +template<class _Eng, size_t _Wp, class _UInt> +inline _LIBCPP_INLINE_VISIBILITY +bool +operator==( + const independent_bits_engine<_Eng, _Wp, _UInt>& __x, + const independent_bits_engine<_Eng, _Wp, _UInt>& __y) +{ + return __x.base() == __y.base(); +} + +template<class _Eng, size_t _Wp, class _UInt> +inline _LIBCPP_INLINE_VISIBILITY +bool +operator!=( + const independent_bits_engine<_Eng, _Wp, _UInt>& __x, + const independent_bits_engine<_Eng, _Wp, _UInt>& __y) +{ + return !(__x == __y); +} + +template <class _CharT, class _Traits, + class _Eng, size_t _Wp, class _UInt> +basic_ostream<_CharT, _Traits>& +operator<<(basic_ostream<_CharT, _Traits>& __os, + const independent_bits_engine<_Eng, _Wp, _UInt>& __x) +{ + return __os << __x.base(); +} + +template <class _CharT, class _Traits, + class _Eng, size_t _Wp, class _UInt> +basic_istream<_CharT, _Traits>& +operator>>(basic_istream<_CharT, _Traits>& __is, + independent_bits_engine<_Eng, _Wp, _UInt>& __x) +{ + _Eng __e; + __is >> __e; + if (!__is.fail()) + __x.__e_ = __e; + return __is; +} + +_LIBCPP_END_NAMESPACE_STD + +_LIBCPP_POP_MACROS + +#endif // _LIBCPP___RANDOM_INDEPENDENT_BITS_ENGINE_H diff --git a/libcxx/include/__random/is_seed_sequence.h b/libcxx/include/__random/is_seed_sequence.h new file mode 100644 index 000000000000..46b1d719ddfb --- /dev/null +++ b/libcxx/include/__random/is_seed_sequence.h @@ -0,0 +1,31 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___RANDOM_IS_SEED_SEQUENCE_H +#define _LIBCPP___RANDOM_IS_SEED_SEQUENCE_H + +#include <__config> +#include <type_traits> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +template <class _Sseq, class _Engine> +struct __is_seed_sequence +{ + static _LIBCPP_CONSTEXPR const bool value = + !is_convertible<_Sseq, typename _Engine::result_type>::value && + !is_same<typename remove_cv<_Sseq>::type, _Engine>::value; +}; + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___RANDOM_IS_SEED_SEQUENCE_H diff --git a/libcxx/include/__random/knuth_b.h b/libcxx/include/__random/knuth_b.h new file mode 100644 index 000000000000..ade853884dd3 --- /dev/null +++ b/libcxx/include/__random/knuth_b.h @@ -0,0 +1,26 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___RANDOM_KNUTH_B_H +#define _LIBCPP___RANDOM_KNUTH_B_H + +#include <__config> +#include <__random/linear_congruential_engine.h> +#include <__random/shuffle_order_engine.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +typedef shuffle_order_engine<minstd_rand0, 256> knuth_b; + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___RANDOM_KNUTH_B_H diff --git a/libcxx/include/__random/linear_congruential_engine.h b/libcxx/include/__random/linear_congruential_engine.h new file mode 100644 index 000000000000..64c9f584114c --- /dev/null +++ b/libcxx/include/__random/linear_congruential_engine.h @@ -0,0 +1,398 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___RANDOM_LINEAR_CONGRUENTIAL_ENGINE_H +#define _LIBCPP___RANDOM_LINEAR_CONGRUENTIAL_ENGINE_H + +#include <__config> +#include <__random/is_seed_sequence.h> +#include <cstdint> +#include <iosfwd> +#include <type_traits> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + +_LIBCPP_BEGIN_NAMESPACE_STD + +template <unsigned long long __a, unsigned long long __c, + unsigned long long __m, unsigned long long _Mp, + bool _MightOverflow = (__a != 0 && __m != 0 && __m-1 > (_Mp-__c)/__a), + bool _OverflowOK = ((__m | (__m-1)) > __m), // m = 2^n + bool _SchrageOK = (__a != 0 && __m != 0 && __m % __a <= __m / __a)> // r <= q +struct __lce_alg_picker +{ + static_assert(__a != 0 || __m != 0 || !_MightOverflow || _OverflowOK || _SchrageOK, + "The current values of a, c, and m cannot generate a number " + "within bounds of linear_congruential_engine."); + + static _LIBCPP_CONSTEXPR const bool __use_schrage = _MightOverflow && + !_OverflowOK && + _SchrageOK; +}; + +template <unsigned long long __a, unsigned long long __c, + unsigned long long __m, unsigned long long _Mp, + bool _UseSchrage = __lce_alg_picker<__a, __c, __m, _Mp>::__use_schrage> +struct __lce_ta; + +// 64 + +template <unsigned long long __a, unsigned long long __c, unsigned long long __m> +struct __lce_ta<__a, __c, __m, (unsigned long long)(~0), true> +{ + typedef unsigned long long result_type; + _LIBCPP_INLINE_VISIBILITY + static result_type next(result_type __x) + { + // Schrage's algorithm + const result_type __q = __m / __a; + const result_type __r = __m % __a; + const result_type __t0 = __a * (__x % __q); + const result_type __t1 = __r * (__x / __q); + __x = __t0 + (__t0 < __t1) * __m - __t1; + __x += __c - (__x >= __m - __c) * __m; + return __x; + } +}; + +template <unsigned long long __a, unsigned long long __m> +struct __lce_ta<__a, 0, __m, (unsigned long long)(~0), true> +{ + typedef unsigned long long result_type; + _LIBCPP_INLINE_VISIBILITY + static result_type next(result_type __x) + { + // Schrage's algorithm + const result_type __q = __m / __a; + const result_type __r = __m % __a; + const result_type __t0 = __a * (__x % __q); + const result_type __t1 = __r * (__x / __q); + __x = __t0 + (__t0 < __t1) * __m - __t1; + return __x; + } +}; + +template <unsigned long long __a, unsigned long long __c, unsigned long long __m> +struct __lce_ta<__a, __c, __m, (unsigned long long)(~0), false> +{ + typedef unsigned long long result_type; + _LIBCPP_INLINE_VISIBILITY + static result_type next(result_type __x) + { + return (__a * __x + __c) % __m; + } +}; + +template <unsigned long long __a, unsigned long long __c> +struct __lce_ta<__a, __c, 0, (unsigned long long)(~0), false> +{ + typedef unsigned long long result_type; + _LIBCPP_INLINE_VISIBILITY + static result_type next(result_type __x) + { + return __a * __x + __c; + } +}; + +// 32 + +template <unsigned long long _Ap, unsigned long long _Cp, unsigned long long _Mp> +struct __lce_ta<_Ap, _Cp, _Mp, unsigned(~0), true> +{ + typedef unsigned result_type; + _LIBCPP_INLINE_VISIBILITY + static result_type next(result_type __x) + { + const result_type __a = static_cast<result_type>(_Ap); + const result_type __c = static_cast<result_type>(_Cp); + const result_type __m = static_cast<result_type>(_Mp); + // Schrage's algorithm + const result_type __q = __m / __a; + const result_type __r = __m % __a; + const result_type __t0 = __a * (__x % __q); + const result_type __t1 = __r * (__x / __q); + __x = __t0 + (__t0 < __t1) * __m - __t1; + __x += __c - (__x >= __m - __c) * __m; + return __x; + } +}; + +template <unsigned long long _Ap, unsigned long long _Mp> +struct __lce_ta<_Ap, 0, _Mp, unsigned(~0), true> +{ + typedef unsigned result_type; + _LIBCPP_INLINE_VISIBILITY + static result_type next(result_type __x) + { + const result_type __a = static_cast<result_type>(_Ap); + const result_type __m = static_cast<result_type>(_Mp); + // Schrage's algorithm + const result_type __q = __m / __a; + const result_type __r = __m % __a; + const result_type __t0 = __a * (__x % __q); + const result_type __t1 = __r * (__x / __q); + __x = __t0 + (__t0 < __t1) * __m - __t1; + return __x; + } +}; + +template <unsigned long long _Ap, unsigned long long _Cp, unsigned long long _Mp> +struct __lce_ta<_Ap, _Cp, _Mp, unsigned(~0), false> +{ + typedef unsigned result_type; + _LIBCPP_INLINE_VISIBILITY + static result_type next(result_type __x) + { + const result_type __a = static_cast<result_type>(_Ap); + const result_type __c = static_cast<result_type>(_Cp); + const result_type __m = static_cast<result_type>(_Mp); + return (__a * __x + __c) % __m; + } +}; + +template <unsigned long long _Ap, unsigned long long _Cp> +struct __lce_ta<_Ap, _Cp, 0, unsigned(~0), false> +{ + typedef unsigned result_type; + _LIBCPP_INLINE_VISIBILITY + static result_type next(result_type __x) + { + const result_type __a = static_cast<result_type>(_Ap); + const result_type __c = static_cast<result_type>(_Cp); + return __a * __x + __c; + } +}; + +// 16 + +template <unsigned long long __a, unsigned long long __c, unsigned long long __m, bool __b> +struct __lce_ta<__a, __c, __m, (unsigned short)(~0), __b> +{ + typedef unsigned short result_type; + _LIBCPP_INLINE_VISIBILITY + static result_type next(result_type __x) + { + return static_cast<result_type>(__lce_ta<__a, __c, __m, unsigned(~0)>::next(__x)); + } +}; + +template <class _UIntType, _UIntType __a, _UIntType __c, _UIntType __m> +class _LIBCPP_TEMPLATE_VIS linear_congruential_engine; + +template <class _CharT, class _Traits, + class _Up, _Up _Ap, _Up _Cp, _Up _Np> +_LIBCPP_INLINE_VISIBILITY +basic_ostream<_CharT, _Traits>& +operator<<(basic_ostream<_CharT, _Traits>& __os, + const linear_congruential_engine<_Up, _Ap, _Cp, _Np>&); + +template <class _CharT, class _Traits, + class _Up, _Up _Ap, _Up _Cp, _Up _Np> +basic_istream<_CharT, _Traits>& +operator>>(basic_istream<_CharT, _Traits>& __is, + linear_congruential_engine<_Up, _Ap, _Cp, _Np>& __x); + +template <class _UIntType, _UIntType __a, _UIntType __c, _UIntType __m> +class _LIBCPP_TEMPLATE_VIS linear_congruential_engine +{ +public: + // types + typedef _UIntType result_type; + +private: + result_type __x_; + + static _LIBCPP_CONSTEXPR const result_type _Mp = result_type(~0); + + static_assert(__m == 0 || __a < __m, "linear_congruential_engine invalid parameters"); + static_assert(__m == 0 || __c < __m, "linear_congruential_engine invalid parameters"); + static_assert(is_unsigned<_UIntType>::value, "_UIntType must be unsigned type"); +public: + static _LIBCPP_CONSTEXPR const result_type _Min = __c == 0u ? 1u: 0u; + static _LIBCPP_CONSTEXPR const result_type _Max = __m - 1u; + static_assert(_Min < _Max, "linear_congruential_engine invalid parameters"); + + // engine characteristics + static _LIBCPP_CONSTEXPR const result_type multiplier = __a; + static _LIBCPP_CONSTEXPR const result_type increment = __c; + static _LIBCPP_CONSTEXPR const result_type modulus = __m; + _LIBCPP_INLINE_VISIBILITY + static _LIBCPP_CONSTEXPR result_type min() {return _Min;} + _LIBCPP_INLINE_VISIBILITY + static _LIBCPP_CONSTEXPR result_type max() {return _Max;} + static _LIBCPP_CONSTEXPR const result_type default_seed = 1u; + + // constructors and seeding functions +#ifndef _LIBCPP_CXX03_LANG + _LIBCPP_INLINE_VISIBILITY + linear_congruential_engine() : linear_congruential_engine(default_seed) {} + _LIBCPP_INLINE_VISIBILITY + explicit linear_congruential_engine(result_type __s) { seed(__s); } +#else + _LIBCPP_INLINE_VISIBILITY + explicit linear_congruential_engine(result_type __s = default_seed) { + seed(__s); + } +#endif + template<class _Sseq> + _LIBCPP_INLINE_VISIBILITY + explicit linear_congruential_engine(_Sseq& __q, + typename enable_if<__is_seed_sequence<_Sseq, linear_congruential_engine>::value>::type* = 0) + {seed(__q);} + _LIBCPP_INLINE_VISIBILITY + void seed(result_type __s = default_seed) + {seed(integral_constant<bool, __m == 0>(), + integral_constant<bool, __c == 0>(), __s);} + template<class _Sseq> + _LIBCPP_INLINE_VISIBILITY + typename enable_if + < + __is_seed_sequence<_Sseq, linear_congruential_engine>::value, + void + >::type + seed(_Sseq& __q) + {__seed(__q, integral_constant<unsigned, + 1 + (__m == 0 ? (sizeof(result_type) * __CHAR_BIT__ - 1)/32 + : (__m > 0x100000000ull))>());} + + // generating functions + _LIBCPP_INLINE_VISIBILITY + result_type operator()() + {return __x_ = static_cast<result_type>(__lce_ta<__a, __c, __m, _Mp>::next(__x_));} + _LIBCPP_INLINE_VISIBILITY + void discard(unsigned long long __z) {for (; __z; --__z) operator()();} + + friend _LIBCPP_INLINE_VISIBILITY + bool operator==(const linear_congruential_engine& __x, + const linear_congruential_engine& __y) + {return __x.__x_ == __y.__x_;} + friend _LIBCPP_INLINE_VISIBILITY + bool operator!=(const linear_congruential_engine& __x, + const linear_congruential_engine& __y) + {return !(__x == __y);} + +private: + + _LIBCPP_INLINE_VISIBILITY + void seed(true_type, true_type, result_type __s) {__x_ = __s == 0 ? 1 : __s;} + _LIBCPP_INLINE_VISIBILITY + void seed(true_type, false_type, result_type __s) {__x_ = __s;} + _LIBCPP_INLINE_VISIBILITY + void seed(false_type, true_type, result_type __s) {__x_ = __s % __m == 0 ? + 1 : __s % __m;} + _LIBCPP_INLINE_VISIBILITY + void seed(false_type, false_type, result_type __s) {__x_ = __s % __m;} + + template<class _Sseq> + void __seed(_Sseq& __q, integral_constant<unsigned, 1>); + template<class _Sseq> + void __seed(_Sseq& __q, integral_constant<unsigned, 2>); + + template <class _CharT, class _Traits, + class _Up, _Up _Ap, _Up _Cp, _Up _Np> + friend + basic_ostream<_CharT, _Traits>& + operator<<(basic_ostream<_CharT, _Traits>& __os, + const linear_congruential_engine<_Up, _Ap, _Cp, _Np>&); + + template <class _CharT, class _Traits, + class _Up, _Up _Ap, _Up _Cp, _Up _Np> + friend + basic_istream<_CharT, _Traits>& + operator>>(basic_istream<_CharT, _Traits>& __is, + linear_congruential_engine<_Up, _Ap, _Cp, _Np>& __x); +}; + +template <class _UIntType, _UIntType __a, _UIntType __c, _UIntType __m> + _LIBCPP_CONSTEXPR const typename linear_congruential_engine<_UIntType, __a, __c, __m>::result_type + linear_congruential_engine<_UIntType, __a, __c, __m>::multiplier; + +template <class _UIntType, _UIntType __a, _UIntType __c, _UIntType __m> + _LIBCPP_CONSTEXPR const typename linear_congruential_engine<_UIntType, __a, __c, __m>::result_type + linear_congruential_engine<_UIntType, __a, __c, __m>::increment; + +template <class _UIntType, _UIntType __a, _UIntType __c, _UIntType __m> + _LIBCPP_CONSTEXPR const typename linear_congruential_engine<_UIntType, __a, __c, __m>::result_type + linear_congruential_engine<_UIntType, __a, __c, __m>::modulus; + +template <class _UIntType, _UIntType __a, _UIntType __c, _UIntType __m> + _LIBCPP_CONSTEXPR const typename linear_congruential_engine<_UIntType, __a, __c, __m>::result_type + linear_congruential_engine<_UIntType, __a, __c, __m>::default_seed; + +template <class _UIntType, _UIntType __a, _UIntType __c, _UIntType __m> +template<class _Sseq> +void +linear_congruential_engine<_UIntType, __a, __c, __m>::__seed(_Sseq& __q, + integral_constant<unsigned, 1>) +{ + const unsigned __k = 1; + uint32_t __ar[__k+3]; + __q.generate(__ar, __ar + __k + 3); + result_type __s = static_cast<result_type>(__ar[3] % __m); + __x_ = __c == 0 && __s == 0 ? result_type(1) : __s; +} + +template <class _UIntType, _UIntType __a, _UIntType __c, _UIntType __m> +template<class _Sseq> +void +linear_congruential_engine<_UIntType, __a, __c, __m>::__seed(_Sseq& __q, + integral_constant<unsigned, 2>) +{ + const unsigned __k = 2; + uint32_t __ar[__k+3]; + __q.generate(__ar, __ar + __k + 3); + result_type __s = static_cast<result_type>((__ar[3] + + ((uint64_t)__ar[4] << 32)) % __m); + __x_ = __c == 0 && __s == 0 ? result_type(1) : __s; +} + +template <class _CharT, class _Traits, + class _UIntType, _UIntType __a, _UIntType __c, _UIntType __m> +inline _LIBCPP_INLINE_VISIBILITY +basic_ostream<_CharT, _Traits>& +operator<<(basic_ostream<_CharT, _Traits>& __os, + const linear_congruential_engine<_UIntType, __a, __c, __m>& __x) +{ + __save_flags<_CharT, _Traits> __lx(__os); + typedef basic_ostream<_CharT, _Traits> _Ostream; + __os.flags(_Ostream::dec | _Ostream::left); + __os.fill(__os.widen(' ')); + return __os << __x.__x_; +} + +template <class _CharT, class _Traits, + class _UIntType, _UIntType __a, _UIntType __c, _UIntType __m> +basic_istream<_CharT, _Traits>& +operator>>(basic_istream<_CharT, _Traits>& __is, + linear_congruential_engine<_UIntType, __a, __c, __m>& __x) +{ + __save_flags<_CharT, _Traits> __lx(__is); + typedef basic_istream<_CharT, _Traits> _Istream; + __is.flags(_Istream::dec | _Istream::skipws); + _UIntType __t; + __is >> __t; + if (!__is.fail()) + __x.__x_ = __t; + return __is; +} + +typedef linear_congruential_engine<uint_fast32_t, 16807, 0, 2147483647> + minstd_rand0; +typedef linear_congruential_engine<uint_fast32_t, 48271, 0, 2147483647> + minstd_rand; + +_LIBCPP_END_NAMESPACE_STD + +_LIBCPP_POP_MACROS + +#endif // _LIBCPP___RANDOM_LINEAR_CONGRUENTIAL_ENGINE_H diff --git a/libcxx/include/__random/log2.h b/libcxx/include/__random/log2.h new file mode 100644 index 000000000000..3d9640c1f787 --- /dev/null +++ b/libcxx/include/__random/log2.h @@ -0,0 +1,74 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___RANDOM_LOG2_H +#define _LIBCPP___RANDOM_LOG2_H + +#include <__config> +#include <cstddef> +#include <type_traits> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +template <class _UIntType, _UIntType _Xp, size_t _Rp> +struct __log2_imp; + +template <unsigned long long _Xp, size_t _Rp> +struct __log2_imp<unsigned long long, _Xp, _Rp> +{ + static const size_t value = _Xp & ((unsigned long long)(1) << _Rp) ? _Rp + : __log2_imp<unsigned long long, _Xp, _Rp - 1>::value; +}; + +template <unsigned long long _Xp> +struct __log2_imp<unsigned long long, _Xp, 0> +{ + static const size_t value = 0; +}; + +template <size_t _Rp> +struct __log2_imp<unsigned long long, 0, _Rp> +{ + static const size_t value = _Rp + 1; +}; + +#ifndef _LIBCPP_HAS_NO_INT128 + +template <__uint128_t _Xp, size_t _Rp> +struct __log2_imp<__uint128_t, _Xp, _Rp> +{ + static const size_t value = (_Xp >> 64) + ? (64 + __log2_imp<unsigned long long, (_Xp >> 64), 63>::value) + : __log2_imp<unsigned long long, _Xp, 63>::value; +}; + +#endif // _LIBCPP_HAS_NO_INT128 + +template <class _UIntType, _UIntType _Xp> +struct __log2 +{ + static const size_t value = __log2_imp< +#ifndef _LIBCPP_HAS_NO_INT128 + typename conditional< + sizeof(_UIntType) <= sizeof(unsigned long long), + unsigned long long, + __uint128_t + >::type, +#else + unsigned long long, +#endif // _LIBCPP_HAS_NO_INT128 + _Xp, sizeof(_UIntType) * __CHAR_BIT__ - 1>::value; +}; + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___RANDOM_LOG2_H diff --git a/libcxx/include/__random/lognormal_distribution.h b/libcxx/include/__random/lognormal_distribution.h new file mode 100644 index 000000000000..752861c3de0c --- /dev/null +++ b/libcxx/include/__random/lognormal_distribution.h @@ -0,0 +1,163 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___RANDOM_LOGNORMAL_DISTRIBUTION_H +#define _LIBCPP___RANDOM_LOGNORMAL_DISTRIBUTION_H + +#include <__config> +#include <__random/normal_distribution.h> +#include <cmath> +#include <iosfwd> +#include <limits> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + +_LIBCPP_BEGIN_NAMESPACE_STD + +template<class _RealType = double> +class _LIBCPP_TEMPLATE_VIS lognormal_distribution +{ +public: + // types + typedef _RealType result_type; + + class _LIBCPP_TEMPLATE_VIS param_type + { + normal_distribution<result_type> __nd_; + public: + typedef lognormal_distribution distribution_type; + + _LIBCPP_INLINE_VISIBILITY + explicit param_type(result_type __m = 0, result_type __s = 1) + : __nd_(__m, __s) {} + + _LIBCPP_INLINE_VISIBILITY + result_type m() const {return __nd_.mean();} + _LIBCPP_INLINE_VISIBILITY + result_type s() const {return __nd_.stddev();} + + friend _LIBCPP_INLINE_VISIBILITY + bool operator==(const param_type& __x, const param_type& __y) + {return __x.__nd_ == __y.__nd_;} + friend _LIBCPP_INLINE_VISIBILITY + bool operator!=(const param_type& __x, const param_type& __y) + {return !(__x == __y);} + friend class lognormal_distribution; + + template <class _CharT, class _Traits, class _RT> + friend + basic_ostream<_CharT, _Traits>& + operator<<(basic_ostream<_CharT, _Traits>& __os, + const lognormal_distribution<_RT>& __x); + + template <class _CharT, class _Traits, class _RT> + friend + basic_istream<_CharT, _Traits>& + operator>>(basic_istream<_CharT, _Traits>& __is, + lognormal_distribution<_RT>& __x); + }; + +private: + param_type __p_; + +public: + // constructor and reset functions +#ifndef _LIBCPP_CXX03_LANG + _LIBCPP_INLINE_VISIBILITY + lognormal_distribution() : lognormal_distribution(0) {} + _LIBCPP_INLINE_VISIBILITY + explicit lognormal_distribution(result_type __m, result_type __s = 1) + : __p_(param_type(__m, __s)) {} +#else + _LIBCPP_INLINE_VISIBILITY + explicit lognormal_distribution(result_type __m = 0, + result_type __s = 1) + : __p_(param_type(__m, __s)) {} +#endif + _LIBCPP_INLINE_VISIBILITY + explicit lognormal_distribution(const param_type& __p) + : __p_(__p) {} + _LIBCPP_INLINE_VISIBILITY + void reset() {__p_.__nd_.reset();} + + // generating functions + template<class _URNG> + _LIBCPP_INLINE_VISIBILITY + result_type operator()(_URNG& __g) + {return (*this)(__g, __p_);} + template<class _URNG> + _LIBCPP_INLINE_VISIBILITY + result_type operator()(_URNG& __g, const param_type& __p) + {return _VSTD::exp(const_cast<normal_distribution<result_type>&>(__p.__nd_)(__g));} + + // property functions + _LIBCPP_INLINE_VISIBILITY + result_type m() const {return __p_.m();} + _LIBCPP_INLINE_VISIBILITY + result_type s() const {return __p_.s();} + + _LIBCPP_INLINE_VISIBILITY + param_type param() const {return __p_;} + _LIBCPP_INLINE_VISIBILITY + void param(const param_type& __p) {__p_ = __p;} + + _LIBCPP_INLINE_VISIBILITY + result_type min() const {return 0;} + _LIBCPP_INLINE_VISIBILITY + result_type max() const {return numeric_limits<result_type>::infinity();} + + friend _LIBCPP_INLINE_VISIBILITY + bool operator==(const lognormal_distribution& __x, + const lognormal_distribution& __y) + {return __x.__p_ == __y.__p_;} + friend _LIBCPP_INLINE_VISIBILITY + bool operator!=(const lognormal_distribution& __x, + const lognormal_distribution& __y) + {return !(__x == __y);} + + template <class _CharT, class _Traits, class _RT> + friend + basic_ostream<_CharT, _Traits>& + operator<<(basic_ostream<_CharT, _Traits>& __os, + const lognormal_distribution<_RT>& __x); + + template <class _CharT, class _Traits, class _RT> + friend + basic_istream<_CharT, _Traits>& + operator>>(basic_istream<_CharT, _Traits>& __is, + lognormal_distribution<_RT>& __x); +}; + +template <class _CharT, class _Traits, class _RT> +inline _LIBCPP_INLINE_VISIBILITY +basic_ostream<_CharT, _Traits>& +operator<<(basic_ostream<_CharT, _Traits>& __os, + const lognormal_distribution<_RT>& __x) +{ + return __os << __x.__p_.__nd_; +} + +template <class _CharT, class _Traits, class _RT> +inline _LIBCPP_INLINE_VISIBILITY +basic_istream<_CharT, _Traits>& +operator>>(basic_istream<_CharT, _Traits>& __is, + lognormal_distribution<_RT>& __x) +{ + return __is >> __x.__p_.__nd_; +} + +_LIBCPP_END_NAMESPACE_STD + +_LIBCPP_POP_MACROS + +#endif // _LIBCPP___RANDOM_LOGNORMAL_DISTRIBUTION_H diff --git a/libcxx/include/__random/mersenne_twister_engine.h b/libcxx/include/__random/mersenne_twister_engine.h new file mode 100644 index 000000000000..121ffae37ec0 --- /dev/null +++ b/libcxx/include/__random/mersenne_twister_engine.h @@ -0,0 +1,534 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___RANDOM_MERSENNE_TWISTER_ENGINE_H +#define _LIBCPP___RANDOM_MERSENNE_TWISTER_ENGINE_H + +#include <__algorithm/equal.h> +#include <__algorithm/min.h> +#include <__config> +#include <__random/is_seed_sequence.h> +#include <cstddef> +#include <cstdint> +#include <iosfwd> +#include <limits> +#include <type_traits> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + +_LIBCPP_BEGIN_NAMESPACE_STD + +template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r, + _UIntType __a, size_t __u, _UIntType __d, size_t __s, + _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f> +class _LIBCPP_TEMPLATE_VIS mersenne_twister_engine; + +template <class _UInt, size_t _Wp, size_t _Np, size_t _Mp, size_t _Rp, + _UInt _Ap, size_t _Up, _UInt _Dp, size_t _Sp, + _UInt _Bp, size_t _Tp, _UInt _Cp, size_t _Lp, _UInt _Fp> +bool +operator==(const mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp, + _Bp, _Tp, _Cp, _Lp, _Fp>& __x, + const mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp, + _Bp, _Tp, _Cp, _Lp, _Fp>& __y); + +template <class _UInt, size_t _Wp, size_t _Np, size_t _Mp, size_t _Rp, + _UInt _Ap, size_t _Up, _UInt _Dp, size_t _Sp, + _UInt _Bp, size_t _Tp, _UInt _Cp, size_t _Lp, _UInt _Fp> +_LIBCPP_INLINE_VISIBILITY +bool +operator!=(const mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp, + _Bp, _Tp, _Cp, _Lp, _Fp>& __x, + const mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp, + _Bp, _Tp, _Cp, _Lp, _Fp>& __y); + +template <class _CharT, class _Traits, + class _UInt, size_t _Wp, size_t _Np, size_t _Mp, size_t _Rp, + _UInt _Ap, size_t _Up, _UInt _Dp, size_t _Sp, + _UInt _Bp, size_t _Tp, _UInt _Cp, size_t _Lp, _UInt _Fp> +basic_ostream<_CharT, _Traits>& +operator<<(basic_ostream<_CharT, _Traits>& __os, + const mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp, + _Bp, _Tp, _Cp, _Lp, _Fp>& __x); + +template <class _CharT, class _Traits, + class _UInt, size_t _Wp, size_t _Np, size_t _Mp, size_t _Rp, + _UInt _Ap, size_t _Up, _UInt _Dp, size_t _Sp, + _UInt _Bp, size_t _Tp, _UInt _Cp, size_t _Lp, _UInt _Fp> +basic_istream<_CharT, _Traits>& +operator>>(basic_istream<_CharT, _Traits>& __is, + mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp, + _Bp, _Tp, _Cp, _Lp, _Fp>& __x); + +template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r, + _UIntType __a, size_t __u, _UIntType __d, size_t __s, + _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f> +class _LIBCPP_TEMPLATE_VIS mersenne_twister_engine +{ +public: + // types + typedef _UIntType result_type; + +private: + result_type __x_[__n]; + size_t __i_; + + static_assert( 0 < __m, "mersenne_twister_engine invalid parameters"); + static_assert(__m <= __n, "mersenne_twister_engine invalid parameters"); + static _LIBCPP_CONSTEXPR const result_type _Dt = numeric_limits<result_type>::digits; + static_assert(__w <= _Dt, "mersenne_twister_engine invalid parameters"); + static_assert( 2 <= __w, "mersenne_twister_engine invalid parameters"); + static_assert(__r <= __w, "mersenne_twister_engine invalid parameters"); + static_assert(__u <= __w, "mersenne_twister_engine invalid parameters"); + static_assert(__s <= __w, "mersenne_twister_engine invalid parameters"); + static_assert(__t <= __w, "mersenne_twister_engine invalid parameters"); + static_assert(__l <= __w, "mersenne_twister_engine invalid parameters"); +public: + static _LIBCPP_CONSTEXPR const result_type _Min = 0; + static _LIBCPP_CONSTEXPR const result_type _Max = __w == _Dt ? result_type(~0) : + (result_type(1) << __w) - result_type(1); + static_assert(_Min < _Max, "mersenne_twister_engine invalid parameters"); + static_assert(__a <= _Max, "mersenne_twister_engine invalid parameters"); + static_assert(__b <= _Max, "mersenne_twister_engine invalid parameters"); + static_assert(__c <= _Max, "mersenne_twister_engine invalid parameters"); + static_assert(__d <= _Max, "mersenne_twister_engine invalid parameters"); + static_assert(__f <= _Max, "mersenne_twister_engine invalid parameters"); + + // engine characteristics + static _LIBCPP_CONSTEXPR const size_t word_size = __w; + static _LIBCPP_CONSTEXPR const size_t state_size = __n; + static _LIBCPP_CONSTEXPR const size_t shift_size = __m; + static _LIBCPP_CONSTEXPR const size_t mask_bits = __r; + static _LIBCPP_CONSTEXPR const result_type xor_mask = __a; + static _LIBCPP_CONSTEXPR const size_t tempering_u = __u; + static _LIBCPP_CONSTEXPR const result_type tempering_d = __d; + static _LIBCPP_CONSTEXPR const size_t tempering_s = __s; + static _LIBCPP_CONSTEXPR const result_type tempering_b = __b; + static _LIBCPP_CONSTEXPR const size_t tempering_t = __t; + static _LIBCPP_CONSTEXPR const result_type tempering_c = __c; + static _LIBCPP_CONSTEXPR const size_t tempering_l = __l; + static _LIBCPP_CONSTEXPR const result_type initialization_multiplier = __f; + _LIBCPP_INLINE_VISIBILITY + static _LIBCPP_CONSTEXPR result_type min() { return _Min; } + _LIBCPP_INLINE_VISIBILITY + static _LIBCPP_CONSTEXPR result_type max() { return _Max; } + static _LIBCPP_CONSTEXPR const result_type default_seed = 5489u; + + // constructors and seeding functions +#ifndef _LIBCPP_CXX03_LANG + _LIBCPP_INLINE_VISIBILITY + mersenne_twister_engine() : mersenne_twister_engine(default_seed) {} + _LIBCPP_INLINE_VISIBILITY + explicit mersenne_twister_engine(result_type __sd) { seed(__sd); } +#else + _LIBCPP_INLINE_VISIBILITY + explicit mersenne_twister_engine(result_type __sd = default_seed) { + seed(__sd); + } +#endif + template<class _Sseq> + _LIBCPP_INLINE_VISIBILITY + explicit mersenne_twister_engine(_Sseq& __q, + typename enable_if<__is_seed_sequence<_Sseq, mersenne_twister_engine>::value>::type* = 0) + {seed(__q);} + void seed(result_type __sd = default_seed); + template<class _Sseq> + _LIBCPP_INLINE_VISIBILITY + typename enable_if + < + __is_seed_sequence<_Sseq, mersenne_twister_engine>::value, + void + >::type + seed(_Sseq& __q) + {__seed(__q, integral_constant<unsigned, 1 + (__w - 1) / 32>());} + + // generating functions + result_type operator()(); + _LIBCPP_INLINE_VISIBILITY + void discard(unsigned long long __z) {for (; __z; --__z) operator()();} + + template <class _UInt, size_t _Wp, size_t _Np, size_t _Mp, size_t _Rp, + _UInt _Ap, size_t _Up, _UInt _Dp, size_t _Sp, + _UInt _Bp, size_t _Tp, _UInt _Cp, size_t _Lp, _UInt _Fp> + friend + bool + operator==(const mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp, + _Bp, _Tp, _Cp, _Lp, _Fp>& __x, + const mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp, + _Bp, _Tp, _Cp, _Lp, _Fp>& __y); + + template <class _UInt, size_t _Wp, size_t _Np, size_t _Mp, size_t _Rp, + _UInt _Ap, size_t _Up, _UInt _Dp, size_t _Sp, + _UInt _Bp, size_t _Tp, _UInt _Cp, size_t _Lp, _UInt _Fp> + friend + bool + operator!=(const mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp, + _Bp, _Tp, _Cp, _Lp, _Fp>& __x, + const mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp, + _Bp, _Tp, _Cp, _Lp, _Fp>& __y); + + template <class _CharT, class _Traits, + class _UInt, size_t _Wp, size_t _Np, size_t _Mp, size_t _Rp, + _UInt _Ap, size_t _Up, _UInt _Dp, size_t _Sp, + _UInt _Bp, size_t _Tp, _UInt _Cp, size_t _Lp, _UInt _Fp> + friend + basic_ostream<_CharT, _Traits>& + operator<<(basic_ostream<_CharT, _Traits>& __os, + const mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp, + _Bp, _Tp, _Cp, _Lp, _Fp>& __x); + + template <class _CharT, class _Traits, + class _UInt, size_t _Wp, size_t _Np, size_t _Mp, size_t _Rp, + _UInt _Ap, size_t _Up, _UInt _Dp, size_t _Sp, + _UInt _Bp, size_t _Tp, _UInt _Cp, size_t _Lp, _UInt _Fp> + friend + basic_istream<_CharT, _Traits>& + operator>>(basic_istream<_CharT, _Traits>& __is, + mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp, + _Bp, _Tp, _Cp, _Lp, _Fp>& __x); +private: + + template<class _Sseq> + void __seed(_Sseq& __q, integral_constant<unsigned, 1>); + template<class _Sseq> + void __seed(_Sseq& __q, integral_constant<unsigned, 2>); + + template <size_t __count> + _LIBCPP_INLINE_VISIBILITY + static + typename enable_if + < + __count < __w, + result_type + >::type + __lshift(result_type __x) {return (__x << __count) & _Max;} + + template <size_t __count> + _LIBCPP_INLINE_VISIBILITY + static + typename enable_if + < + (__count >= __w), + result_type + >::type + __lshift(result_type) {return result_type(0);} + + template <size_t __count> + _LIBCPP_INLINE_VISIBILITY + static + typename enable_if + < + __count < _Dt, + result_type + >::type + __rshift(result_type __x) {return __x >> __count;} + + template <size_t __count> + _LIBCPP_INLINE_VISIBILITY + static + typename enable_if + < + (__count >= _Dt), + result_type + >::type + __rshift(result_type) {return result_type(0);} +}; + +template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r, + _UIntType __a, size_t __u, _UIntType __d, size_t __s, + _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f> + _LIBCPP_CONSTEXPR const size_t + mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::word_size; + +template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r, + _UIntType __a, size_t __u, _UIntType __d, size_t __s, + _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f> + _LIBCPP_CONSTEXPR const size_t + mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::state_size; + +template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r, + _UIntType __a, size_t __u, _UIntType __d, size_t __s, + _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f> + _LIBCPP_CONSTEXPR const size_t + mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::shift_size; + +template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r, + _UIntType __a, size_t __u, _UIntType __d, size_t __s, + _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f> + _LIBCPP_CONSTEXPR const size_t + mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::mask_bits; + +template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r, + _UIntType __a, size_t __u, _UIntType __d, size_t __s, + _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f> + _LIBCPP_CONSTEXPR const typename mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::result_type + mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::xor_mask; + +template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r, + _UIntType __a, size_t __u, _UIntType __d, size_t __s, + _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f> + _LIBCPP_CONSTEXPR const size_t + mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::tempering_u; + +template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r, + _UIntType __a, size_t __u, _UIntType __d, size_t __s, + _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f> + _LIBCPP_CONSTEXPR const typename mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::result_type + mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::tempering_d; + +template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r, + _UIntType __a, size_t __u, _UIntType __d, size_t __s, + _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f> + _LIBCPP_CONSTEXPR const size_t + mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::tempering_s; + +template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r, + _UIntType __a, size_t __u, _UIntType __d, size_t __s, + _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f> + _LIBCPP_CONSTEXPR const typename mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::result_type + mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::tempering_b; + +template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r, + _UIntType __a, size_t __u, _UIntType __d, size_t __s, + _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f> + _LIBCPP_CONSTEXPR const size_t + mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::tempering_t; + +template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r, + _UIntType __a, size_t __u, _UIntType __d, size_t __s, + _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f> + _LIBCPP_CONSTEXPR const typename mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::result_type + mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::tempering_c; + +template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r, + _UIntType __a, size_t __u, _UIntType __d, size_t __s, + _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f> + _LIBCPP_CONSTEXPR const size_t + mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::tempering_l; + +template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r, + _UIntType __a, size_t __u, _UIntType __d, size_t __s, + _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f> + _LIBCPP_CONSTEXPR const typename mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::result_type + mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::initialization_multiplier; + +template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r, + _UIntType __a, size_t __u, _UIntType __d, size_t __s, + _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f> + _LIBCPP_CONSTEXPR const typename mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::result_type + mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::default_seed; + +template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r, + _UIntType __a, size_t __u, _UIntType __d, size_t __s, + _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f> +void +mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, + __t, __c, __l, __f>::seed(result_type __sd) + _LIBCPP_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK +{ // __w >= 2 + __x_[0] = __sd & _Max; + for (size_t __i = 1; __i < __n; ++__i) + __x_[__i] = (__f * (__x_[__i-1] ^ __rshift<__w - 2>(__x_[__i-1])) + __i) & _Max; + __i_ = 0; +} + +template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r, + _UIntType __a, size_t __u, _UIntType __d, size_t __s, + _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f> +template<class _Sseq> +void +mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, + __t, __c, __l, __f>::__seed(_Sseq& __q, integral_constant<unsigned, 1>) +{ + const unsigned __k = 1; + uint32_t __ar[__n * __k]; + __q.generate(__ar, __ar + __n * __k); + for (size_t __i = 0; __i < __n; ++__i) + __x_[__i] = static_cast<result_type>(__ar[__i] & _Max); + const result_type __mask = __r == _Dt ? result_type(~0) : + (result_type(1) << __r) - result_type(1); + __i_ = 0; + if ((__x_[0] & ~__mask) == 0) + { + for (size_t __i = 1; __i < __n; ++__i) + if (__x_[__i] != 0) + return; + __x_[0] = result_type(1) << (__w - 1); + } +} + +template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r, + _UIntType __a, size_t __u, _UIntType __d, size_t __s, + _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f> +template<class _Sseq> +void +mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, + __t, __c, __l, __f>::__seed(_Sseq& __q, integral_constant<unsigned, 2>) +{ + const unsigned __k = 2; + uint32_t __ar[__n * __k]; + __q.generate(__ar, __ar + __n * __k); + for (size_t __i = 0; __i < __n; ++__i) + __x_[__i] = static_cast<result_type>( + (__ar[2 * __i] + ((uint64_t)__ar[2 * __i + 1] << 32)) & _Max); + const result_type __mask = __r == _Dt ? result_type(~0) : + (result_type(1) << __r) - result_type(1); + __i_ = 0; + if ((__x_[0] & ~__mask) == 0) + { + for (size_t __i = 1; __i < __n; ++__i) + if (__x_[__i] != 0) + return; + __x_[0] = result_type(1) << (__w - 1); + } +} + +template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r, + _UIntType __a, size_t __u, _UIntType __d, size_t __s, + _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f> +_UIntType +mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, + __t, __c, __l, __f>::operator()() +{ + const size_t __j = (__i_ + 1) % __n; + const result_type __mask = __r == _Dt ? result_type(~0) : + (result_type(1) << __r) - result_type(1); + const result_type _Yp = (__x_[__i_] & ~__mask) | (__x_[__j] & __mask); + const size_t __k = (__i_ + __m) % __n; + __x_[__i_] = __x_[__k] ^ __rshift<1>(_Yp) ^ (__a * (_Yp & 1)); + result_type __z = __x_[__i_] ^ (__rshift<__u>(__x_[__i_]) & __d); + __i_ = __j; + __z ^= __lshift<__s>(__z) & __b; + __z ^= __lshift<__t>(__z) & __c; + return __z ^ __rshift<__l>(__z); +} + +template <class _UInt, size_t _Wp, size_t _Np, size_t _Mp, size_t _Rp, + _UInt _Ap, size_t _Up, _UInt _Dp, size_t _Sp, + _UInt _Bp, size_t _Tp, _UInt _Cp, size_t _Lp, _UInt _Fp> +bool +operator==(const mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp, + _Bp, _Tp, _Cp, _Lp, _Fp>& __x, + const mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp, + _Bp, _Tp, _Cp, _Lp, _Fp>& __y) +{ + if (__x.__i_ == __y.__i_) + return _VSTD::equal(__x.__x_, __x.__x_ + _Np, __y.__x_); + if (__x.__i_ == 0 || __y.__i_ == 0) + { + size_t __j = _VSTD::min(_Np - __x.__i_, _Np - __y.__i_); + if (!_VSTD::equal(__x.__x_ + __x.__i_, __x.__x_ + __x.__i_ + __j, + __y.__x_ + __y.__i_)) + return false; + if (__x.__i_ == 0) + return _VSTD::equal(__x.__x_ + __j, __x.__x_ + _Np, __y.__x_); + return _VSTD::equal(__x.__x_, __x.__x_ + (_Np - __j), __y.__x_ + __j); + } + if (__x.__i_ < __y.__i_) + { + size_t __j = _Np - __y.__i_; + if (!_VSTD::equal(__x.__x_ + __x.__i_, __x.__x_ + (__x.__i_ + __j), + __y.__x_ + __y.__i_)) + return false; + if (!_VSTD::equal(__x.__x_ + (__x.__i_ + __j), __x.__x_ + _Np, + __y.__x_)) + return false; + return _VSTD::equal(__x.__x_, __x.__x_ + __x.__i_, + __y.__x_ + (_Np - (__x.__i_ + __j))); + } + size_t __j = _Np - __x.__i_; + if (!_VSTD::equal(__y.__x_ + __y.__i_, __y.__x_ + (__y.__i_ + __j), + __x.__x_ + __x.__i_)) + return false; + if (!_VSTD::equal(__y.__x_ + (__y.__i_ + __j), __y.__x_ + _Np, + __x.__x_)) + return false; + return _VSTD::equal(__y.__x_, __y.__x_ + __y.__i_, + __x.__x_ + (_Np - (__y.__i_ + __j))); +} + +template <class _UInt, size_t _Wp, size_t _Np, size_t _Mp, size_t _Rp, + _UInt _Ap, size_t _Up, _UInt _Dp, size_t _Sp, + _UInt _Bp, size_t _Tp, _UInt _Cp, size_t _Lp, _UInt _Fp> +inline _LIBCPP_INLINE_VISIBILITY +bool +operator!=(const mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp, + _Bp, _Tp, _Cp, _Lp, _Fp>& __x, + const mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp, + _Bp, _Tp, _Cp, _Lp, _Fp>& __y) +{ + return !(__x == __y); +} + +template <class _CharT, class _Traits, + class _UInt, size_t _Wp, size_t _Np, size_t _Mp, size_t _Rp, + _UInt _Ap, size_t _Up, _UInt _Dp, size_t _Sp, + _UInt _Bp, size_t _Tp, _UInt _Cp, size_t _Lp, _UInt _Fp> +basic_ostream<_CharT, _Traits>& +operator<<(basic_ostream<_CharT, _Traits>& __os, + const mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp, + _Bp, _Tp, _Cp, _Lp, _Fp>& __x) +{ + __save_flags<_CharT, _Traits> __lx(__os); + typedef basic_ostream<_CharT, _Traits> _Ostream; + __os.flags(_Ostream::dec | _Ostream::left); + _CharT __sp = __os.widen(' '); + __os.fill(__sp); + __os << __x.__x_[__x.__i_]; + for (size_t __j = __x.__i_ + 1; __j < _Np; ++__j) + __os << __sp << __x.__x_[__j]; + for (size_t __j = 0; __j < __x.__i_; ++__j) + __os << __sp << __x.__x_[__j]; + return __os; +} + +template <class _CharT, class _Traits, + class _UInt, size_t _Wp, size_t _Np, size_t _Mp, size_t _Rp, + _UInt _Ap, size_t _Up, _UInt _Dp, size_t _Sp, + _UInt _Bp, size_t _Tp, _UInt _Cp, size_t _Lp, _UInt _Fp> +basic_istream<_CharT, _Traits>& +operator>>(basic_istream<_CharT, _Traits>& __is, + mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp, + _Bp, _Tp, _Cp, _Lp, _Fp>& __x) +{ + __save_flags<_CharT, _Traits> __lx(__is); + typedef basic_istream<_CharT, _Traits> _Istream; + __is.flags(_Istream::dec | _Istream::skipws); + _UInt __t[_Np]; + for (size_t __i = 0; __i < _Np; ++__i) + __is >> __t[__i]; + if (!__is.fail()) + { + for (size_t __i = 0; __i < _Np; ++__i) + __x.__x_[__i] = __t[__i]; + __x.__i_ = 0; + } + return __is; +} + +typedef mersenne_twister_engine<uint_fast32_t, 32, 624, 397, 31, + 0x9908b0df, 11, 0xffffffff, + 7, 0x9d2c5680, + 15, 0xefc60000, + 18, 1812433253> mt19937; +typedef mersenne_twister_engine<uint_fast64_t, 64, 312, 156, 31, + 0xb5026f5aa96619e9ULL, 29, 0x5555555555555555ULL, + 17, 0x71d67fffeda60000ULL, + 37, 0xfff7eee000000000ULL, + 43, 6364136223846793005ULL> mt19937_64; + +_LIBCPP_END_NAMESPACE_STD + +_LIBCPP_POP_MACROS + +#endif // _LIBCPP___RANDOM_MERSENNE_TWISTER_ENGINE_H diff --git a/libcxx/include/__random/negative_binomial_distribution.h b/libcxx/include/__random/negative_binomial_distribution.h new file mode 100644 index 000000000000..7329bac2ff85 --- /dev/null +++ b/libcxx/include/__random/negative_binomial_distribution.h @@ -0,0 +1,176 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___RANDOM_NEGATIVE_BINOMIAL_DISTRIBUTION_H +#define _LIBCPP___RANDOM_NEGATIVE_BINOMIAL_DISTRIBUTION_H + +#include <__config> +#include <__random/bernoulli_distribution.h> +#include <__random/gamma_distribution.h> +#include <__random/poisson_distribution.h> +#include <iosfwd> +#include <limits> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + +_LIBCPP_BEGIN_NAMESPACE_STD + +template<class _IntType = int> +class _LIBCPP_TEMPLATE_VIS negative_binomial_distribution +{ +public: + // types + typedef _IntType result_type; + + class _LIBCPP_TEMPLATE_VIS param_type + { + result_type __k_; + double __p_; + public: + typedef negative_binomial_distribution distribution_type; + + _LIBCPP_INLINE_VISIBILITY + explicit param_type(result_type __k = 1, double __p = 0.5) + : __k_(__k), __p_(__p) {} + + _LIBCPP_INLINE_VISIBILITY + result_type k() const {return __k_;} + _LIBCPP_INLINE_VISIBILITY + double p() const {return __p_;} + + friend _LIBCPP_INLINE_VISIBILITY + bool operator==(const param_type& __x, const param_type& __y) + {return __x.__k_ == __y.__k_ && __x.__p_ == __y.__p_;} + friend _LIBCPP_INLINE_VISIBILITY + bool operator!=(const param_type& __x, const param_type& __y) + {return !(__x == __y);} + }; + +private: + param_type __p_; + +public: + // constructor and reset functions +#ifndef _LIBCPP_CXX03_LANG + _LIBCPP_INLINE_VISIBILITY + negative_binomial_distribution() : negative_binomial_distribution(1) {} + _LIBCPP_INLINE_VISIBILITY + explicit negative_binomial_distribution(result_type __k, double __p = 0.5) + : __p_(__k, __p) {} +#else + _LIBCPP_INLINE_VISIBILITY + explicit negative_binomial_distribution(result_type __k = 1, + double __p = 0.5) + : __p_(__k, __p) {} +#endif + _LIBCPP_INLINE_VISIBILITY + explicit negative_binomial_distribution(const param_type& __p) : __p_(__p) {} + _LIBCPP_INLINE_VISIBILITY + void reset() {} + + // generating functions + template<class _URNG> + _LIBCPP_INLINE_VISIBILITY + result_type operator()(_URNG& __g) + {return (*this)(__g, __p_);} + template<class _URNG> result_type operator()(_URNG& __g, const param_type& __p); + + // property functions + _LIBCPP_INLINE_VISIBILITY + result_type k() const {return __p_.k();} + _LIBCPP_INLINE_VISIBILITY + double p() const {return __p_.p();} + + _LIBCPP_INLINE_VISIBILITY + param_type param() const {return __p_;} + _LIBCPP_INLINE_VISIBILITY + void param(const param_type& __p) {__p_ = __p;} + + _LIBCPP_INLINE_VISIBILITY + result_type min() const {return 0;} + _LIBCPP_INLINE_VISIBILITY + result_type max() const {return numeric_limits<result_type>::max();} + + friend _LIBCPP_INLINE_VISIBILITY + bool operator==(const negative_binomial_distribution& __x, + const negative_binomial_distribution& __y) + {return __x.__p_ == __y.__p_;} + friend _LIBCPP_INLINE_VISIBILITY + bool operator!=(const negative_binomial_distribution& __x, + const negative_binomial_distribution& __y) + {return !(__x == __y);} +}; + +template <class _IntType> +template<class _URNG> +_IntType +negative_binomial_distribution<_IntType>::operator()(_URNG& __urng, const param_type& __pr) +{ + result_type __k = __pr.k(); + double __p = __pr.p(); + if (__k <= 21 * __p) + { + bernoulli_distribution __gen(__p); + result_type __f = 0; + result_type __s = 0; + while (__s < __k) + { + if (__gen(__urng)) + ++__s; + else + ++__f; + } + return __f; + } + return poisson_distribution<result_type>(gamma_distribution<double> + (__k, (1-__p)/__p)(__urng))(__urng); +} + +template <class _CharT, class _Traits, class _IntType> +basic_ostream<_CharT, _Traits>& +operator<<(basic_ostream<_CharT, _Traits>& __os, + const negative_binomial_distribution<_IntType>& __x) +{ + __save_flags<_CharT, _Traits> __lx(__os); + typedef basic_ostream<_CharT, _Traits> _OStream; + __os.flags(_OStream::dec | _OStream::left | _OStream::fixed | + _OStream::scientific); + _CharT __sp = __os.widen(' '); + __os.fill(__sp); + return __os << __x.k() << __sp << __x.p(); +} + +template <class _CharT, class _Traits, class _IntType> +basic_istream<_CharT, _Traits>& +operator>>(basic_istream<_CharT, _Traits>& __is, + negative_binomial_distribution<_IntType>& __x) +{ + typedef negative_binomial_distribution<_IntType> _Eng; + typedef typename _Eng::result_type result_type; + typedef typename _Eng::param_type param_type; + __save_flags<_CharT, _Traits> __lx(__is); + typedef basic_istream<_CharT, _Traits> _Istream; + __is.flags(_Istream::dec | _Istream::skipws); + result_type __k; + double __p; + __is >> __k >> __p; + if (!__is.fail()) + __x.param(param_type(__k, __p)); + return __is; +} + +_LIBCPP_END_NAMESPACE_STD + +_LIBCPP_POP_MACROS + +#endif // _LIBCPP___RANDOM_NEGATIVE_BINOMIAL_DISTRIBUTION_H diff --git a/libcxx/include/__random/normal_distribution.h b/libcxx/include/__random/normal_distribution.h new file mode 100644 index 000000000000..b460ffb7ea9d --- /dev/null +++ b/libcxx/include/__random/normal_distribution.h @@ -0,0 +1,208 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___RANDOM_NORMAL_DISTRIBUTION_H +#define _LIBCPP___RANDOM_NORMAL_DISTRIBUTION_H + +#include <__config> +#include <__random/uniform_real_distribution.h> +#include <cmath> +#include <iosfwd> +#include <limits> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + +_LIBCPP_BEGIN_NAMESPACE_STD + +template<class _RealType = double> +class _LIBCPP_TEMPLATE_VIS normal_distribution +{ +public: + // types + typedef _RealType result_type; + + class _LIBCPP_TEMPLATE_VIS param_type + { + result_type __mean_; + result_type __stddev_; + public: + typedef normal_distribution distribution_type; + + _LIBCPP_INLINE_VISIBILITY + explicit param_type(result_type __mean = 0, result_type __stddev = 1) + : __mean_(__mean), __stddev_(__stddev) {} + + _LIBCPP_INLINE_VISIBILITY + result_type mean() const {return __mean_;} + _LIBCPP_INLINE_VISIBILITY + result_type stddev() const {return __stddev_;} + + friend _LIBCPP_INLINE_VISIBILITY + bool operator==(const param_type& __x, const param_type& __y) + {return __x.__mean_ == __y.__mean_ && __x.__stddev_ == __y.__stddev_;} + friend _LIBCPP_INLINE_VISIBILITY + bool operator!=(const param_type& __x, const param_type& __y) + {return !(__x == __y);} + }; + +private: + param_type __p_; + result_type _V_; + bool _V_hot_; + +public: + // constructors and reset functions +#ifndef _LIBCPP_CXX03_LANG + _LIBCPP_INLINE_VISIBILITY + normal_distribution() : normal_distribution(0) {} + _LIBCPP_INLINE_VISIBILITY + explicit normal_distribution(result_type __mean, result_type __stddev = 1) + : __p_(param_type(__mean, __stddev)), _V_hot_(false) {} +#else + _LIBCPP_INLINE_VISIBILITY + explicit normal_distribution(result_type __mean = 0, + result_type __stddev = 1) + : __p_(param_type(__mean, __stddev)), _V_hot_(false) {} +#endif + _LIBCPP_INLINE_VISIBILITY + explicit normal_distribution(const param_type& __p) + : __p_(__p), _V_hot_(false) {} + _LIBCPP_INLINE_VISIBILITY + void reset() {_V_hot_ = false;} + + // generating functions + template<class _URNG> + _LIBCPP_INLINE_VISIBILITY + result_type operator()(_URNG& __g) + {return (*this)(__g, __p_);} + template<class _URNG> result_type operator()(_URNG& __g, const param_type& __p); + + // property functions + _LIBCPP_INLINE_VISIBILITY + result_type mean() const {return __p_.mean();} + _LIBCPP_INLINE_VISIBILITY + result_type stddev() const {return __p_.stddev();} + + _LIBCPP_INLINE_VISIBILITY + param_type param() const {return __p_;} + _LIBCPP_INLINE_VISIBILITY + void param(const param_type& __p) {__p_ = __p;} + + _LIBCPP_INLINE_VISIBILITY + result_type min() const {return -numeric_limits<result_type>::infinity();} + _LIBCPP_INLINE_VISIBILITY + result_type max() const {return numeric_limits<result_type>::infinity();} + + friend _LIBCPP_INLINE_VISIBILITY + bool operator==(const normal_distribution& __x, + const normal_distribution& __y) + {return __x.__p_ == __y.__p_ && __x._V_hot_ == __y._V_hot_ && + (!__x._V_hot_ || __x._V_ == __y._V_);} + friend _LIBCPP_INLINE_VISIBILITY + bool operator!=(const normal_distribution& __x, + const normal_distribution& __y) + {return !(__x == __y);} + + template <class _CharT, class _Traits, class _RT> + friend + basic_ostream<_CharT, _Traits>& + operator<<(basic_ostream<_CharT, _Traits>& __os, + const normal_distribution<_RT>& __x); + + template <class _CharT, class _Traits, class _RT> + friend + basic_istream<_CharT, _Traits>& + operator>>(basic_istream<_CharT, _Traits>& __is, + normal_distribution<_RT>& __x); +}; + +template <class _RealType> +template<class _URNG> +_RealType +normal_distribution<_RealType>::operator()(_URNG& __g, const param_type& __p) +{ + result_type _Up; + if (_V_hot_) + { + _V_hot_ = false; + _Up = _V_; + } + else + { + uniform_real_distribution<result_type> _Uni(-1, 1); + result_type __u; + result_type __v; + result_type __s; + do + { + __u = _Uni(__g); + __v = _Uni(__g); + __s = __u * __u + __v * __v; + } while (__s > 1 || __s == 0); + result_type _Fp = _VSTD::sqrt(-2 * _VSTD::log(__s) / __s); + _V_ = __v * _Fp; + _V_hot_ = true; + _Up = __u * _Fp; + } + return _Up * __p.stddev() + __p.mean(); +} + +template <class _CharT, class _Traits, class _RT> +basic_ostream<_CharT, _Traits>& +operator<<(basic_ostream<_CharT, _Traits>& __os, + const normal_distribution<_RT>& __x) +{ + __save_flags<_CharT, _Traits> __lx(__os); + typedef basic_ostream<_CharT, _Traits> _OStream; + __os.flags(_OStream::dec | _OStream::left | _OStream::fixed | + _OStream::scientific); + _CharT __sp = __os.widen(' '); + __os.fill(__sp); + __os << __x.mean() << __sp << __x.stddev() << __sp << __x._V_hot_; + if (__x._V_hot_) + __os << __sp << __x._V_; + return __os; +} + +template <class _CharT, class _Traits, class _RT> +basic_istream<_CharT, _Traits>& +operator>>(basic_istream<_CharT, _Traits>& __is, + normal_distribution<_RT>& __x) +{ + typedef normal_distribution<_RT> _Eng; + typedef typename _Eng::result_type result_type; + typedef typename _Eng::param_type param_type; + __save_flags<_CharT, _Traits> __lx(__is); + typedef basic_istream<_CharT, _Traits> _Istream; + __is.flags(_Istream::dec | _Istream::skipws); + result_type __mean; + result_type __stddev; + result_type _Vp = 0; + bool _V_hot = false; + __is >> __mean >> __stddev >> _V_hot; + if (_V_hot) + __is >> _Vp; + if (!__is.fail()) + { + __x.param(param_type(__mean, __stddev)); + __x._V_hot_ = _V_hot; + __x._V_ = _Vp; + } + return __is; +} + +_LIBCPP_END_NAMESPACE_STD + +_LIBCPP_POP_MACROS + +#endif // _LIBCPP___RANDOM_NORMAL_DISTRIBUTION_H diff --git a/libcxx/include/__random/piecewise_constant_distribution.h b/libcxx/include/__random/piecewise_constant_distribution.h new file mode 100644 index 000000000000..ece20d1a1d6e --- /dev/null +++ b/libcxx/include/__random/piecewise_constant_distribution.h @@ -0,0 +1,356 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___RANDOM_PIECEWISE_CONSTANT_DISTRIBUTION_H +#define _LIBCPP___RANDOM_PIECEWISE_CONSTANT_DISTRIBUTION_H + +#include <__algorithm/upper_bound.h> +#include <__config> +#include <__random/uniform_real_distribution.h> +#include <iosfwd> +#include <numeric> +#include <vector> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + +_LIBCPP_BEGIN_NAMESPACE_STD + +template<class _RealType = double> +class _LIBCPP_TEMPLATE_VIS piecewise_constant_distribution +{ +public: + // types + typedef _RealType result_type; + + class _LIBCPP_TEMPLATE_VIS param_type + { + vector<result_type> __b_; + vector<result_type> __densities_; + vector<result_type> __areas_; + public: + typedef piecewise_constant_distribution distribution_type; + + param_type(); + template<class _InputIteratorB, class _InputIteratorW> + param_type(_InputIteratorB __fB, _InputIteratorB __lB, + _InputIteratorW __fW); +#ifndef _LIBCPP_CXX03_LANG + template<class _UnaryOperation> + param_type(initializer_list<result_type> __bl, _UnaryOperation __fw); +#endif // _LIBCPP_CXX03_LANG + template<class _UnaryOperation> + param_type(size_t __nw, result_type __xmin, result_type __xmax, + _UnaryOperation __fw); + param_type(param_type const&) = default; + param_type & operator=(const param_type& __rhs); + + _LIBCPP_INLINE_VISIBILITY + vector<result_type> intervals() const {return __b_;} + _LIBCPP_INLINE_VISIBILITY + vector<result_type> densities() const {return __densities_;} + + friend _LIBCPP_INLINE_VISIBILITY + bool operator==(const param_type& __x, const param_type& __y) + {return __x.__densities_ == __y.__densities_ && __x.__b_ == __y.__b_;} + friend _LIBCPP_INLINE_VISIBILITY + bool operator!=(const param_type& __x, const param_type& __y) + {return !(__x == __y);} + + private: + void __init(); + + friend class piecewise_constant_distribution; + + template <class _CharT, class _Traits, class _RT> + friend + basic_ostream<_CharT, _Traits>& + operator<<(basic_ostream<_CharT, _Traits>& __os, + const piecewise_constant_distribution<_RT>& __x); + + template <class _CharT, class _Traits, class _RT> + friend + basic_istream<_CharT, _Traits>& + operator>>(basic_istream<_CharT, _Traits>& __is, + piecewise_constant_distribution<_RT>& __x); + }; + +private: + param_type __p_; + +public: + // constructor and reset functions + _LIBCPP_INLINE_VISIBILITY + piecewise_constant_distribution() {} + template<class _InputIteratorB, class _InputIteratorW> + _LIBCPP_INLINE_VISIBILITY + piecewise_constant_distribution(_InputIteratorB __fB, + _InputIteratorB __lB, + _InputIteratorW __fW) + : __p_(__fB, __lB, __fW) {} + +#ifndef _LIBCPP_CXX03_LANG + template<class _UnaryOperation> + _LIBCPP_INLINE_VISIBILITY + piecewise_constant_distribution(initializer_list<result_type> __bl, + _UnaryOperation __fw) + : __p_(__bl, __fw) {} +#endif // _LIBCPP_CXX03_LANG + + template<class _UnaryOperation> + _LIBCPP_INLINE_VISIBILITY + piecewise_constant_distribution(size_t __nw, result_type __xmin, + result_type __xmax, _UnaryOperation __fw) + : __p_(__nw, __xmin, __xmax, __fw) {} + + _LIBCPP_INLINE_VISIBILITY + explicit piecewise_constant_distribution(const param_type& __p) + : __p_(__p) {} + + _LIBCPP_INLINE_VISIBILITY + void reset() {} + + // generating functions + template<class _URNG> + _LIBCPP_INLINE_VISIBILITY + result_type operator()(_URNG& __g) + {return (*this)(__g, __p_);} + template<class _URNG> result_type operator()(_URNG& __g, const param_type& __p); + + // property functions + _LIBCPP_INLINE_VISIBILITY + vector<result_type> intervals() const {return __p_.intervals();} + _LIBCPP_INLINE_VISIBILITY + vector<result_type> densities() const {return __p_.densities();} + + _LIBCPP_INLINE_VISIBILITY + param_type param() const {return __p_;} + _LIBCPP_INLINE_VISIBILITY + void param(const param_type& __p) {__p_ = __p;} + + _LIBCPP_INLINE_VISIBILITY + result_type min() const {return __p_.__b_.front();} + _LIBCPP_INLINE_VISIBILITY + result_type max() const {return __p_.__b_.back();} + + friend _LIBCPP_INLINE_VISIBILITY + bool operator==(const piecewise_constant_distribution& __x, + const piecewise_constant_distribution& __y) + {return __x.__p_ == __y.__p_;} + friend _LIBCPP_INLINE_VISIBILITY + bool operator!=(const piecewise_constant_distribution& __x, + const piecewise_constant_distribution& __y) + {return !(__x == __y);} + + template <class _CharT, class _Traits, class _RT> + friend + basic_ostream<_CharT, _Traits>& + operator<<(basic_ostream<_CharT, _Traits>& __os, + const piecewise_constant_distribution<_RT>& __x); + + template <class _CharT, class _Traits, class _RT> + friend + basic_istream<_CharT, _Traits>& + operator>>(basic_istream<_CharT, _Traits>& __is, + piecewise_constant_distribution<_RT>& __x); +}; + +template<class _RealType> +typename piecewise_constant_distribution<_RealType>::param_type & +piecewise_constant_distribution<_RealType>::param_type::operator= + (const param_type& __rhs) +{ +// These can throw + __b_.reserve (__rhs.__b_.size ()); + __densities_.reserve(__rhs.__densities_.size()); + __areas_.reserve (__rhs.__areas_.size()); + +// These can not throw + __b_ = __rhs.__b_; + __densities_ = __rhs.__densities_; + __areas_ = __rhs.__areas_; + return *this; +} + +template<class _RealType> +void +piecewise_constant_distribution<_RealType>::param_type::__init() +{ + // __densities_ contains non-normalized areas + result_type __total_area = _VSTD::accumulate(__densities_.begin(), + __densities_.end(), + result_type()); + for (size_t __i = 0; __i < __densities_.size(); ++__i) + __densities_[__i] /= __total_area; + // __densities_ contains normalized areas + __areas_.assign(__densities_.size(), result_type()); + _VSTD::partial_sum(__densities_.begin(), __densities_.end() - 1, + __areas_.begin() + 1); + // __areas_ contains partial sums of normalized areas: [0, __densities_ - 1] + __densities_.back() = 1 - __areas_.back(); // correct round off error + for (size_t __i = 0; __i < __densities_.size(); ++__i) + __densities_[__i] /= (__b_[__i+1] - __b_[__i]); + // __densities_ now contains __densities_ +} + +template<class _RealType> +piecewise_constant_distribution<_RealType>::param_type::param_type() + : __b_(2), + __densities_(1, 1.0), + __areas_(1, 0.0) +{ + __b_[1] = 1; +} + +template<class _RealType> +template<class _InputIteratorB, class _InputIteratorW> +piecewise_constant_distribution<_RealType>::param_type::param_type( + _InputIteratorB __fB, _InputIteratorB __lB, _InputIteratorW __fW) + : __b_(__fB, __lB) +{ + if (__b_.size() < 2) + { + __b_.resize(2); + __b_[0] = 0; + __b_[1] = 1; + __densities_.assign(1, 1.0); + __areas_.assign(1, 0.0); + } + else + { + __densities_.reserve(__b_.size() - 1); + for (size_t __i = 0; __i < __b_.size() - 1; ++__i, ++__fW) + __densities_.push_back(*__fW); + __init(); + } +} + +#ifndef _LIBCPP_CXX03_LANG + +template<class _RealType> +template<class _UnaryOperation> +piecewise_constant_distribution<_RealType>::param_type::param_type( + initializer_list<result_type> __bl, _UnaryOperation __fw) + : __b_(__bl.begin(), __bl.end()) +{ + if (__b_.size() < 2) + { + __b_.resize(2); + __b_[0] = 0; + __b_[1] = 1; + __densities_.assign(1, 1.0); + __areas_.assign(1, 0.0); + } + else + { + __densities_.reserve(__b_.size() - 1); + for (size_t __i = 0; __i < __b_.size() - 1; ++__i) + __densities_.push_back(__fw((__b_[__i+1] + __b_[__i])*.5)); + __init(); + } +} + +#endif // _LIBCPP_CXX03_LANG + +template<class _RealType> +template<class _UnaryOperation> +piecewise_constant_distribution<_RealType>::param_type::param_type( + size_t __nw, result_type __xmin, result_type __xmax, _UnaryOperation __fw) + : __b_(__nw == 0 ? 2 : __nw + 1) +{ + size_t __n = __b_.size() - 1; + result_type __d = (__xmax - __xmin) / __n; + __densities_.reserve(__n); + for (size_t __i = 0; __i < __n; ++__i) + { + __b_[__i] = __xmin + __i * __d; + __densities_.push_back(__fw(__b_[__i] + __d*.5)); + } + __b_[__n] = __xmax; + __init(); +} + +template<class _RealType> +template<class _URNG> +_RealType +piecewise_constant_distribution<_RealType>::operator()(_URNG& __g, const param_type& __p) +{ + typedef uniform_real_distribution<result_type> _Gen; + result_type __u = _Gen()(__g); + ptrdiff_t __k = _VSTD::upper_bound(__p.__areas_.begin(), __p.__areas_.end(), + __u) - __p.__areas_.begin() - 1; + return (__u - __p.__areas_[__k]) / __p.__densities_[__k] + __p.__b_[__k]; +} + +template <class _CharT, class _Traits, class _RT> +basic_ostream<_CharT, _Traits>& +operator<<(basic_ostream<_CharT, _Traits>& __os, + const piecewise_constant_distribution<_RT>& __x) +{ + __save_flags<_CharT, _Traits> __lx(__os); + typedef basic_ostream<_CharT, _Traits> _OStream; + __os.flags(_OStream::dec | _OStream::left | _OStream::fixed | + _OStream::scientific); + _CharT __sp = __os.widen(' '); + __os.fill(__sp); + size_t __n = __x.__p_.__b_.size(); + __os << __n; + for (size_t __i = 0; __i < __n; ++__i) + __os << __sp << __x.__p_.__b_[__i]; + __n = __x.__p_.__densities_.size(); + __os << __sp << __n; + for (size_t __i = 0; __i < __n; ++__i) + __os << __sp << __x.__p_.__densities_[__i]; + __n = __x.__p_.__areas_.size(); + __os << __sp << __n; + for (size_t __i = 0; __i < __n; ++__i) + __os << __sp << __x.__p_.__areas_[__i]; + return __os; +} + +template <class _CharT, class _Traits, class _RT> +basic_istream<_CharT, _Traits>& +operator>>(basic_istream<_CharT, _Traits>& __is, + piecewise_constant_distribution<_RT>& __x) +{ + typedef piecewise_constant_distribution<_RT> _Eng; + typedef typename _Eng::result_type result_type; + __save_flags<_CharT, _Traits> __lx(__is); + typedef basic_istream<_CharT, _Traits> _Istream; + __is.flags(_Istream::dec | _Istream::skipws); + size_t __n; + __is >> __n; + vector<result_type> __b(__n); + for (size_t __i = 0; __i < __n; ++__i) + __is >> __b[__i]; + __is >> __n; + vector<result_type> __densities(__n); + for (size_t __i = 0; __i < __n; ++__i) + __is >> __densities[__i]; + __is >> __n; + vector<result_type> __areas(__n); + for (size_t __i = 0; __i < __n; ++__i) + __is >> __areas[__i]; + if (!__is.fail()) + { + swap(__x.__p_.__b_, __b); + swap(__x.__p_.__densities_, __densities); + swap(__x.__p_.__areas_, __areas); + } + return __is; +} + +_LIBCPP_END_NAMESPACE_STD + +_LIBCPP_POP_MACROS + +#endif // _LIBCPP___RANDOM_PIECEWISE_CONSTANT_DISTRIBUTION_H diff --git a/libcxx/include/__random/piecewise_linear_distribution.h b/libcxx/include/__random/piecewise_linear_distribution.h new file mode 100644 index 000000000000..b2ba164d0707 --- /dev/null +++ b/libcxx/include/__random/piecewise_linear_distribution.h @@ -0,0 +1,372 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___RANDOM_PIECEWISE_LINEAR_DISTRIBUTION_H +#define _LIBCPP___RANDOM_PIECEWISE_LINEAR_DISTRIBUTION_H + +#include <__algorithm/upper_bound.h> +#include <__config> +#include <__random/uniform_real_distribution.h> +#include <iosfwd> +#include <numeric> +#include <vector> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + +_LIBCPP_BEGIN_NAMESPACE_STD + +template<class _RealType = double> +class _LIBCPP_TEMPLATE_VIS piecewise_linear_distribution +{ +public: + // types + typedef _RealType result_type; + + class _LIBCPP_TEMPLATE_VIS param_type + { + vector<result_type> __b_; + vector<result_type> __densities_; + vector<result_type> __areas_; + public: + typedef piecewise_linear_distribution distribution_type; + + param_type(); + template<class _InputIteratorB, class _InputIteratorW> + param_type(_InputIteratorB __fB, _InputIteratorB __lB, + _InputIteratorW __fW); +#ifndef _LIBCPP_CXX03_LANG + template<class _UnaryOperation> + param_type(initializer_list<result_type> __bl, _UnaryOperation __fw); +#endif // _LIBCPP_CXX03_LANG + template<class _UnaryOperation> + param_type(size_t __nw, result_type __xmin, result_type __xmax, + _UnaryOperation __fw); + param_type(param_type const&) = default; + param_type & operator=(const param_type& __rhs); + + _LIBCPP_INLINE_VISIBILITY + vector<result_type> intervals() const {return __b_;} + _LIBCPP_INLINE_VISIBILITY + vector<result_type> densities() const {return __densities_;} + + friend _LIBCPP_INLINE_VISIBILITY + bool operator==(const param_type& __x, const param_type& __y) + {return __x.__densities_ == __y.__densities_ && __x.__b_ == __y.__b_;} + friend _LIBCPP_INLINE_VISIBILITY + bool operator!=(const param_type& __x, const param_type& __y) + {return !(__x == __y);} + + private: + void __init(); + + friend class piecewise_linear_distribution; + + template <class _CharT, class _Traits, class _RT> + friend + basic_ostream<_CharT, _Traits>& + operator<<(basic_ostream<_CharT, _Traits>& __os, + const piecewise_linear_distribution<_RT>& __x); + + template <class _CharT, class _Traits, class _RT> + friend + basic_istream<_CharT, _Traits>& + operator>>(basic_istream<_CharT, _Traits>& __is, + piecewise_linear_distribution<_RT>& __x); + }; + +private: + param_type __p_; + +public: + // constructor and reset functions + _LIBCPP_INLINE_VISIBILITY + piecewise_linear_distribution() {} + template<class _InputIteratorB, class _InputIteratorW> + _LIBCPP_INLINE_VISIBILITY + piecewise_linear_distribution(_InputIteratorB __fB, + _InputIteratorB __lB, + _InputIteratorW __fW) + : __p_(__fB, __lB, __fW) {} + +#ifndef _LIBCPP_CXX03_LANG + template<class _UnaryOperation> + _LIBCPP_INLINE_VISIBILITY + piecewise_linear_distribution(initializer_list<result_type> __bl, + _UnaryOperation __fw) + : __p_(__bl, __fw) {} +#endif // _LIBCPP_CXX03_LANG + + template<class _UnaryOperation> + _LIBCPP_INLINE_VISIBILITY + piecewise_linear_distribution(size_t __nw, result_type __xmin, + result_type __xmax, _UnaryOperation __fw) + : __p_(__nw, __xmin, __xmax, __fw) {} + + _LIBCPP_INLINE_VISIBILITY + explicit piecewise_linear_distribution(const param_type& __p) + : __p_(__p) {} + + _LIBCPP_INLINE_VISIBILITY + void reset() {} + + // generating functions + template<class _URNG> + _LIBCPP_INLINE_VISIBILITY + result_type operator()(_URNG& __g) + {return (*this)(__g, __p_);} + template<class _URNG> result_type operator()(_URNG& __g, const param_type& __p); + + // property functions + _LIBCPP_INLINE_VISIBILITY + vector<result_type> intervals() const {return __p_.intervals();} + _LIBCPP_INLINE_VISIBILITY + vector<result_type> densities() const {return __p_.densities();} + + _LIBCPP_INLINE_VISIBILITY + param_type param() const {return __p_;} + _LIBCPP_INLINE_VISIBILITY + void param(const param_type& __p) {__p_ = __p;} + + _LIBCPP_INLINE_VISIBILITY + result_type min() const {return __p_.__b_.front();} + _LIBCPP_INLINE_VISIBILITY + result_type max() const {return __p_.__b_.back();} + + friend _LIBCPP_INLINE_VISIBILITY + bool operator==(const piecewise_linear_distribution& __x, + const piecewise_linear_distribution& __y) + {return __x.__p_ == __y.__p_;} + friend _LIBCPP_INLINE_VISIBILITY + bool operator!=(const piecewise_linear_distribution& __x, + const piecewise_linear_distribution& __y) + {return !(__x == __y);} + + template <class _CharT, class _Traits, class _RT> + friend + basic_ostream<_CharT, _Traits>& + operator<<(basic_ostream<_CharT, _Traits>& __os, + const piecewise_linear_distribution<_RT>& __x); + + template <class _CharT, class _Traits, class _RT> + friend + basic_istream<_CharT, _Traits>& + operator>>(basic_istream<_CharT, _Traits>& __is, + piecewise_linear_distribution<_RT>& __x); +}; + +template<class _RealType> +typename piecewise_linear_distribution<_RealType>::param_type & +piecewise_linear_distribution<_RealType>::param_type::operator= + (const param_type& __rhs) +{ +// These can throw + __b_.reserve (__rhs.__b_.size ()); + __densities_.reserve(__rhs.__densities_.size()); + __areas_.reserve (__rhs.__areas_.size()); + +// These can not throw + __b_ = __rhs.__b_; + __densities_ = __rhs.__densities_; + __areas_ = __rhs.__areas_; + return *this; +} + + +template<class _RealType> +void +piecewise_linear_distribution<_RealType>::param_type::__init() +{ + __areas_.assign(__densities_.size() - 1, result_type()); + result_type _Sp = 0; + for (size_t __i = 0; __i < __areas_.size(); ++__i) + { + __areas_[__i] = (__densities_[__i+1] + __densities_[__i]) * + (__b_[__i+1] - __b_[__i]) * .5; + _Sp += __areas_[__i]; + } + for (size_t __i = __areas_.size(); __i > 1;) + { + --__i; + __areas_[__i] = __areas_[__i-1] / _Sp; + } + __areas_[0] = 0; + for (size_t __i = 1; __i < __areas_.size(); ++__i) + __areas_[__i] += __areas_[__i-1]; + for (size_t __i = 0; __i < __densities_.size(); ++__i) + __densities_[__i] /= _Sp; +} + +template<class _RealType> +piecewise_linear_distribution<_RealType>::param_type::param_type() + : __b_(2), + __densities_(2, 1.0), + __areas_(1, 0.0) +{ + __b_[1] = 1; +} + +template<class _RealType> +template<class _InputIteratorB, class _InputIteratorW> +piecewise_linear_distribution<_RealType>::param_type::param_type( + _InputIteratorB __fB, _InputIteratorB __lB, _InputIteratorW __fW) + : __b_(__fB, __lB) +{ + if (__b_.size() < 2) + { + __b_.resize(2); + __b_[0] = 0; + __b_[1] = 1; + __densities_.assign(2, 1.0); + __areas_.assign(1, 0.0); + } + else + { + __densities_.reserve(__b_.size()); + for (size_t __i = 0; __i < __b_.size(); ++__i, ++__fW) + __densities_.push_back(*__fW); + __init(); + } +} + +#ifndef _LIBCPP_CXX03_LANG + +template<class _RealType> +template<class _UnaryOperation> +piecewise_linear_distribution<_RealType>::param_type::param_type( + initializer_list<result_type> __bl, _UnaryOperation __fw) + : __b_(__bl.begin(), __bl.end()) +{ + if (__b_.size() < 2) + { + __b_.resize(2); + __b_[0] = 0; + __b_[1] = 1; + __densities_.assign(2, 1.0); + __areas_.assign(1, 0.0); + } + else + { + __densities_.reserve(__b_.size()); + for (size_t __i = 0; __i < __b_.size(); ++__i) + __densities_.push_back(__fw(__b_[__i])); + __init(); + } +} + +#endif // _LIBCPP_CXX03_LANG + +template<class _RealType> +template<class _UnaryOperation> +piecewise_linear_distribution<_RealType>::param_type::param_type( + size_t __nw, result_type __xmin, result_type __xmax, _UnaryOperation __fw) + : __b_(__nw == 0 ? 2 : __nw + 1) +{ + size_t __n = __b_.size() - 1; + result_type __d = (__xmax - __xmin) / __n; + __densities_.reserve(__b_.size()); + for (size_t __i = 0; __i < __n; ++__i) + { + __b_[__i] = __xmin + __i * __d; + __densities_.push_back(__fw(__b_[__i])); + } + __b_[__n] = __xmax; + __densities_.push_back(__fw(__b_[__n])); + __init(); +} + +template<class _RealType> +template<class _URNG> +_RealType +piecewise_linear_distribution<_RealType>::operator()(_URNG& __g, const param_type& __p) +{ + typedef uniform_real_distribution<result_type> _Gen; + result_type __u = _Gen()(__g); + ptrdiff_t __k = _VSTD::upper_bound(__p.__areas_.begin(), __p.__areas_.end(), + __u) - __p.__areas_.begin() - 1; + __u -= __p.__areas_[__k]; + const result_type __dk = __p.__densities_[__k]; + const result_type __dk1 = __p.__densities_[__k+1]; + const result_type __deltad = __dk1 - __dk; + const result_type __bk = __p.__b_[__k]; + if (__deltad == 0) + return __u / __dk + __bk; + const result_type __bk1 = __p.__b_[__k+1]; + const result_type __deltab = __bk1 - __bk; + return (__bk * __dk1 - __bk1 * __dk + + _VSTD::sqrt(__deltab * (__deltab * __dk * __dk + 2 * __deltad * __u))) / + __deltad; +} + +template <class _CharT, class _Traits, class _RT> +basic_ostream<_CharT, _Traits>& +operator<<(basic_ostream<_CharT, _Traits>& __os, + const piecewise_linear_distribution<_RT>& __x) +{ + __save_flags<_CharT, _Traits> __lx(__os); + typedef basic_ostream<_CharT, _Traits> _OStream; + __os.flags(_OStream::dec | _OStream::left | _OStream::fixed | + _OStream::scientific); + _CharT __sp = __os.widen(' '); + __os.fill(__sp); + size_t __n = __x.__p_.__b_.size(); + __os << __n; + for (size_t __i = 0; __i < __n; ++__i) + __os << __sp << __x.__p_.__b_[__i]; + __n = __x.__p_.__densities_.size(); + __os << __sp << __n; + for (size_t __i = 0; __i < __n; ++__i) + __os << __sp << __x.__p_.__densities_[__i]; + __n = __x.__p_.__areas_.size(); + __os << __sp << __n; + for (size_t __i = 0; __i < __n; ++__i) + __os << __sp << __x.__p_.__areas_[__i]; + return __os; +} + +template <class _CharT, class _Traits, class _RT> +basic_istream<_CharT, _Traits>& +operator>>(basic_istream<_CharT, _Traits>& __is, + piecewise_linear_distribution<_RT>& __x) +{ + typedef piecewise_linear_distribution<_RT> _Eng; + typedef typename _Eng::result_type result_type; + __save_flags<_CharT, _Traits> __lx(__is); + typedef basic_istream<_CharT, _Traits> _Istream; + __is.flags(_Istream::dec | _Istream::skipws); + size_t __n; + __is >> __n; + vector<result_type> __b(__n); + for (size_t __i = 0; __i < __n; ++__i) + __is >> __b[__i]; + __is >> __n; + vector<result_type> __densities(__n); + for (size_t __i = 0; __i < __n; ++__i) + __is >> __densities[__i]; + __is >> __n; + vector<result_type> __areas(__n); + for (size_t __i = 0; __i < __n; ++__i) + __is >> __areas[__i]; + if (!__is.fail()) + { + swap(__x.__p_.__b_, __b); + swap(__x.__p_.__densities_, __densities); + swap(__x.__p_.__areas_, __areas); + } + return __is; +} + +_LIBCPP_END_NAMESPACE_STD + +_LIBCPP_POP_MACROS + +#endif // _LIBCPP___RANDOM_PIECEWISE_LINEAR_DISTRIBUTION_H diff --git a/libcxx/include/__random/poisson_distribution.h b/libcxx/include/__random/poisson_distribution.h new file mode 100644 index 000000000000..fb213b0103ad --- /dev/null +++ b/libcxx/include/__random/poisson_distribution.h @@ -0,0 +1,276 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___RANDOM_POISSON_DISTRIBUTION_H +#define _LIBCPP___RANDOM_POISSON_DISTRIBUTION_H + +#include <__config> +#include <__random/exponential_distribution.h> +#include <__random/normal_distribution.h> +#include <__random/uniform_real_distribution.h> +#include <cmath> +#include <iosfwd> +#include <limits> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + +_LIBCPP_BEGIN_NAMESPACE_STD + +template<class _IntType = int> +class _LIBCPP_TEMPLATE_VIS poisson_distribution +{ +public: + // types + typedef _IntType result_type; + + class _LIBCPP_TEMPLATE_VIS param_type + { + double __mean_; + double __s_; + double __d_; + double __l_; + double __omega_; + double __c0_; + double __c1_; + double __c2_; + double __c3_; + double __c_; + + public: + typedef poisson_distribution distribution_type; + + explicit param_type(double __mean = 1.0); + + _LIBCPP_INLINE_VISIBILITY + double mean() const {return __mean_;} + + friend _LIBCPP_INLINE_VISIBILITY + bool operator==(const param_type& __x, const param_type& __y) + {return __x.__mean_ == __y.__mean_;} + friend _LIBCPP_INLINE_VISIBILITY + bool operator!=(const param_type& __x, const param_type& __y) + {return !(__x == __y);} + + friend class poisson_distribution; + }; + +private: + param_type __p_; + +public: + // constructors and reset functions +#ifndef _LIBCPP_CXX03_LANG + _LIBCPP_INLINE_VISIBILITY + poisson_distribution() : poisson_distribution(1.0) {} + _LIBCPP_INLINE_VISIBILITY + explicit poisson_distribution(double __mean) + : __p_(__mean) {} +#else + _LIBCPP_INLINE_VISIBILITY + explicit poisson_distribution(double __mean = 1.0) + : __p_(__mean) {} +#endif + _LIBCPP_INLINE_VISIBILITY + explicit poisson_distribution(const param_type& __p) : __p_(__p) {} + _LIBCPP_INLINE_VISIBILITY + void reset() {} + + // generating functions + template<class _URNG> + _LIBCPP_INLINE_VISIBILITY + result_type operator()(_URNG& __g) + {return (*this)(__g, __p_);} + template<class _URNG> result_type operator()(_URNG& __g, const param_type& __p); + + // property functions + _LIBCPP_INLINE_VISIBILITY + double mean() const {return __p_.mean();} + + _LIBCPP_INLINE_VISIBILITY + param_type param() const {return __p_;} + _LIBCPP_INLINE_VISIBILITY + void param(const param_type& __p) {__p_ = __p;} + + _LIBCPP_INLINE_VISIBILITY + result_type min() const {return 0;} + _LIBCPP_INLINE_VISIBILITY + result_type max() const {return numeric_limits<result_type>::max();} + + friend _LIBCPP_INLINE_VISIBILITY + bool operator==(const poisson_distribution& __x, + const poisson_distribution& __y) + {return __x.__p_ == __y.__p_;} + friend _LIBCPP_INLINE_VISIBILITY + bool operator!=(const poisson_distribution& __x, + const poisson_distribution& __y) + {return !(__x == __y);} +}; + +template<class _IntType> +poisson_distribution<_IntType>::param_type::param_type(double __mean) + // According to the standard `inf` is a valid input, but it causes the + // distribution to hang, so we replace it with the maximum representable + // mean. + : __mean_(isinf(__mean) ? numeric_limits<double>::max() : __mean) +{ + if (__mean_ < 10) + { + __s_ = 0; + __d_ = 0; + __l_ = _VSTD::exp(-__mean_); + __omega_ = 0; + __c3_ = 0; + __c2_ = 0; + __c1_ = 0; + __c0_ = 0; + __c_ = 0; + } + else + { + __s_ = _VSTD::sqrt(__mean_); + __d_ = 6 * __mean_ * __mean_; + __l_ = _VSTD::trunc(__mean_ - 1.1484); + __omega_ = .3989423 / __s_; + double __b1_ = .4166667E-1 / __mean_; + double __b2_ = .3 * __b1_ * __b1_; + __c3_ = .1428571 * __b1_ * __b2_; + __c2_ = __b2_ - 15. * __c3_; + __c1_ = __b1_ - 6. * __b2_ + 45. * __c3_; + __c0_ = 1. - __b1_ + 3. * __b2_ - 15. * __c3_; + __c_ = .1069 / __mean_; + } +} + +template <class _IntType> +template<class _URNG> +_IntType +poisson_distribution<_IntType>::operator()(_URNG& __urng, const param_type& __pr) +{ + double __tx; + uniform_real_distribution<double> __urd; + if (__pr.__mean_ < 10) + { + __tx = 0; + for (double __p = __urd(__urng); __p > __pr.__l_; ++__tx) + __p *= __urd(__urng); + } + else + { + double __difmuk; + double __g = __pr.__mean_ + __pr.__s_ * normal_distribution<double>()(__urng); + double __u; + if (__g > 0) + { + __tx = _VSTD::trunc(__g); + if (__tx >= __pr.__l_) + return _VSTD::__clamp_to_integral<result_type>(__tx); + __difmuk = __pr.__mean_ - __tx; + __u = __urd(__urng); + if (__pr.__d_ * __u >= __difmuk * __difmuk * __difmuk) + return _VSTD::__clamp_to_integral<result_type>(__tx); + } + exponential_distribution<double> __edist; + for (bool __using_exp_dist = false; true; __using_exp_dist = true) + { + double __e; + if (__using_exp_dist || __g <= 0) + { + double __t; + do + { + __e = __edist(__urng); + __u = __urd(__urng); + __u += __u - 1; + __t = 1.8 + (__u < 0 ? -__e : __e); + } while (__t <= -.6744); + __tx = _VSTD::trunc(__pr.__mean_ + __pr.__s_ * __t); + __difmuk = __pr.__mean_ - __tx; + __using_exp_dist = true; + } + double __px; + double __py; + if (__tx < 10 && __tx >= 0) + { + const double __fac[] = {1, 1, 2, 6, 24, 120, 720, 5040, + 40320, 362880}; + __px = -__pr.__mean_; + __py = _VSTD::pow(__pr.__mean_, (double)__tx) / __fac[static_cast<int>(__tx)]; + } + else + { + double __del = .8333333E-1 / __tx; + __del -= 4.8 * __del * __del * __del; + double __v = __difmuk / __tx; + if (_VSTD::abs(__v) > 0.25) + __px = __tx * _VSTD::log(1 + __v) - __difmuk - __del; + else + __px = __tx * __v * __v * (((((((.1250060 * __v + -.1384794) * + __v + .1421878) * __v + -.1661269) * __v + .2000118) * + __v + -.2500068) * __v + .3333333) * __v + -.5) - __del; + __py = .3989423 / _VSTD::sqrt(__tx); + } + double __r = (0.5 - __difmuk) / __pr.__s_; + double __r2 = __r * __r; + double __fx = -0.5 * __r2; + double __fy = __pr.__omega_ * (((__pr.__c3_ * __r2 + __pr.__c2_) * + __r2 + __pr.__c1_) * __r2 + __pr.__c0_); + if (__using_exp_dist) + { + if (__pr.__c_ * _VSTD::abs(__u) <= __py * _VSTD::exp(__px + __e) - + __fy * _VSTD::exp(__fx + __e)) + break; + } + else + { + if (__fy - __u * __fy <= __py * _VSTD::exp(__px - __fx)) + break; + } + } + } + return _VSTD::__clamp_to_integral<result_type>(__tx); +} + +template <class _CharT, class _Traits, class _IntType> +basic_ostream<_CharT, _Traits>& +operator<<(basic_ostream<_CharT, _Traits>& __os, + const poisson_distribution<_IntType>& __x) +{ + __save_flags<_CharT, _Traits> __lx(__os); + typedef basic_ostream<_CharT, _Traits> _OStream; + __os.flags(_OStream::dec | _OStream::left | _OStream::fixed | + _OStream::scientific); + return __os << __x.mean(); +} + +template <class _CharT, class _Traits, class _IntType> +basic_istream<_CharT, _Traits>& +operator>>(basic_istream<_CharT, _Traits>& __is, + poisson_distribution<_IntType>& __x) +{ + typedef poisson_distribution<_IntType> _Eng; + typedef typename _Eng::param_type param_type; + __save_flags<_CharT, _Traits> __lx(__is); + typedef basic_istream<_CharT, _Traits> _Istream; + __is.flags(_Istream::dec | _Istream::skipws); + double __mean; + __is >> __mean; + if (!__is.fail()) + __x.param(param_type(__mean)); + return __is; +} + +_LIBCPP_END_NAMESPACE_STD + +_LIBCPP_POP_MACROS + +#endif // _LIBCPP___RANDOM_POISSON_DISTRIBUTION_H diff --git a/libcxx/include/__random/random_device.h b/libcxx/include/__random/random_device.h new file mode 100644 index 000000000000..f62f7a3d269b --- /dev/null +++ b/libcxx/include/__random/random_device.h @@ -0,0 +1,71 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___RANDOM_RANDOM_DEVICE_H +#define _LIBCPP___RANDOM_RANDOM_DEVICE_H + +#include <__config> +#include <string> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if !defined(_LIBCPP_HAS_NO_RANDOM_DEVICE) + +class _LIBCPP_TYPE_VIS random_device +{ +#ifdef _LIBCPP_USING_DEV_RANDOM + int __f_; +#endif // defined(_LIBCPP_USING_DEV_RANDOM) +public: + // types + typedef unsigned result_type; + + // generator characteristics + static _LIBCPP_CONSTEXPR const result_type _Min = 0; + static _LIBCPP_CONSTEXPR const result_type _Max = 0xFFFFFFFFu; + + _LIBCPP_INLINE_VISIBILITY + static _LIBCPP_CONSTEXPR result_type min() { return _Min;} + _LIBCPP_INLINE_VISIBILITY + static _LIBCPP_CONSTEXPR result_type max() { return _Max;} + + // constructors +#ifndef _LIBCPP_CXX03_LANG + random_device() : random_device("/dev/urandom") {} + explicit random_device(const string& __token); +#else + explicit random_device(const string& __token = "/dev/urandom"); +#endif + ~random_device(); + + // generating functions + result_type operator()(); + + // property functions + double entropy() const _NOEXCEPT; + +private: + // no copy functions + random_device(const random_device&); // = delete; + random_device& operator=(const random_device&); // = delete; +}; + +#endif // !_LIBCPP_HAS_NO_RANDOM_DEVICE + +_LIBCPP_END_NAMESPACE_STD + +_LIBCPP_POP_MACROS + +#endif // _LIBCPP___RANDOM_RANDOM_DEVICE_H diff --git a/libcxx/include/__random/ranlux.h b/libcxx/include/__random/ranlux.h new file mode 100644 index 000000000000..0b415928df4d --- /dev/null +++ b/libcxx/include/__random/ranlux.h @@ -0,0 +1,31 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___RANDOM_RANLUX_H +#define _LIBCPP___RANDOM_RANLUX_H + +#include <__config> +#include <__random/discard_block_engine.h> +#include <__random/subtract_with_carry_engine.h> +#include <cstdint> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +typedef subtract_with_carry_engine<uint_fast32_t, 24, 10, 24> ranlux24_base; +typedef subtract_with_carry_engine<uint_fast64_t, 48, 5, 12> ranlux48_base; + +typedef discard_block_engine<ranlux24_base, 223, 23> ranlux24; +typedef discard_block_engine<ranlux48_base, 389, 11> ranlux48; + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___RANDOM_RANLUX_H diff --git a/libcxx/include/__random/seed_seq.h b/libcxx/include/__random/seed_seq.h new file mode 100644 index 000000000000..97bc88d0d4d1 --- /dev/null +++ b/libcxx/include/__random/seed_seq.h @@ -0,0 +1,150 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___RANDOM_SEED_SEQ_H +#define _LIBCPP___RANDOM_SEED_SEQ_H + +#include <__algorithm/copy.h> +#include <__algorithm/fill.h> +#include <__algorithm/max.h> +#include <__config> +#include <initializer_list> +#include <vector> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + +_LIBCPP_BEGIN_NAMESPACE_STD + +class _LIBCPP_TEMPLATE_VIS seed_seq +{ +public: + // types + typedef uint32_t result_type; + +private: + vector<result_type> __v_; + + template<class _InputIterator> + void init(_InputIterator __first, _InputIterator __last); +public: + // constructors + _LIBCPP_INLINE_VISIBILITY + seed_seq() _NOEXCEPT {} +#ifndef _LIBCPP_CXX03_LANG + template<class _Tp> + _LIBCPP_INLINE_VISIBILITY + seed_seq(initializer_list<_Tp> __il) {init(__il.begin(), __il.end());} +#endif // _LIBCPP_CXX03_LANG + + template<class _InputIterator> + _LIBCPP_INLINE_VISIBILITY + seed_seq(_InputIterator __first, _InputIterator __last) + {init(__first, __last);} + + // generating functions + template<class _RandomAccessIterator> + void generate(_RandomAccessIterator __first, _RandomAccessIterator __last); + + // property functions + _LIBCPP_INLINE_VISIBILITY + size_t size() const _NOEXCEPT {return __v_.size();} + template<class _OutputIterator> + _LIBCPP_INLINE_VISIBILITY + void param(_OutputIterator __dest) const + {_VSTD::copy(__v_.begin(), __v_.end(), __dest);} + +private: + // no copy functions + seed_seq(const seed_seq&); // = delete; + void operator=(const seed_seq&); // = delete; + + _LIBCPP_INLINE_VISIBILITY + static result_type _Tp(result_type __x) {return __x ^ (__x >> 27);} +}; + +template<class _InputIterator> +void +seed_seq::init(_InputIterator __first, _InputIterator __last) +{ + for (_InputIterator __s = __first; __s != __last; ++__s) + __v_.push_back(*__s & 0xFFFFFFFF); +} + +template<class _RandomAccessIterator> +void +seed_seq::generate(_RandomAccessIterator __first, _RandomAccessIterator __last) +{ + if (__first != __last) + { + _VSTD::fill(__first, __last, 0x8b8b8b8b); + const size_t __n = static_cast<size_t>(__last - __first); + const size_t __s = __v_.size(); + const size_t __t = (__n >= 623) ? 11 + : (__n >= 68) ? 7 + : (__n >= 39) ? 5 + : (__n >= 7) ? 3 + : (__n - 1) / 2; + const size_t __p = (__n - __t) / 2; + const size_t __q = __p + __t; + const size_t __m = _VSTD::max(__s + 1, __n); + // __k = 0; + { + result_type __r = 1664525 * _Tp(__first[0] ^ __first[__p] + ^ __first[__n - 1]); + __first[__p] += __r; + __r += __s; + __first[__q] += __r; + __first[0] = __r; + } + for (size_t __k = 1; __k <= __s; ++__k) + { + const size_t __kmodn = __k % __n; + const size_t __kpmodn = (__k + __p) % __n; + result_type __r = 1664525 * _Tp(__first[__kmodn] ^ __first[__kpmodn] + ^ __first[(__k - 1) % __n]); + __first[__kpmodn] += __r; + __r += __kmodn + __v_[__k-1]; + __first[(__k + __q) % __n] += __r; + __first[__kmodn] = __r; + } + for (size_t __k = __s + 1; __k < __m; ++__k) + { + const size_t __kmodn = __k % __n; + const size_t __kpmodn = (__k + __p) % __n; + result_type __r = 1664525 * _Tp(__first[__kmodn] ^ __first[__kpmodn] + ^ __first[(__k - 1) % __n]); + __first[__kpmodn] += __r; + __r += __kmodn; + __first[(__k + __q) % __n] += __r; + __first[__kmodn] = __r; + } + for (size_t __k = __m; __k < __m + __n; ++__k) + { + const size_t __kmodn = __k % __n; + const size_t __kpmodn = (__k + __p) % __n; + result_type __r = 1566083941 * _Tp(__first[__kmodn] + + __first[__kpmodn] + + __first[(__k - 1) % __n]); + __first[__kpmodn] ^= __r; + __r -= __kmodn; + __first[(__k + __q) % __n] ^= __r; + __first[__kmodn] = __r; + } + } +} + +_LIBCPP_END_NAMESPACE_STD + +_LIBCPP_POP_MACROS + +#endif // _LIBCPP___RANDOM_SEED_SEQ_H diff --git a/libcxx/include/__random/shuffle_order_engine.h b/libcxx/include/__random/shuffle_order_engine.h new file mode 100644 index 000000000000..7a5735dd7933 --- /dev/null +++ b/libcxx/include/__random/shuffle_order_engine.h @@ -0,0 +1,283 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___RANDOM_SHUFFLE_ORDER_ENGINE_H +#define _LIBCPP___RANDOM_SHUFFLE_ORDER_ENGINE_H + +#include <__algorithm/equal.h> +#include <__config> +#include <__random/is_seed_sequence.h> +#include <__utility/move.h> +#include <cstdint> +#include <iosfwd> +#include <type_traits> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + +_LIBCPP_BEGIN_NAMESPACE_STD + +template <uint64_t _Xp, uint64_t _Yp> +struct __ugcd +{ + static _LIBCPP_CONSTEXPR const uint64_t value = __ugcd<_Yp, _Xp % _Yp>::value; +}; + +template <uint64_t _Xp> +struct __ugcd<_Xp, 0> +{ + static _LIBCPP_CONSTEXPR const uint64_t value = _Xp; +}; + +template <uint64_t _Np, uint64_t _Dp> +class __uratio +{ + static_assert(_Dp != 0, "__uratio divide by 0"); + static _LIBCPP_CONSTEXPR const uint64_t __gcd = __ugcd<_Np, _Dp>::value; +public: + static _LIBCPP_CONSTEXPR const uint64_t num = _Np / __gcd; + static _LIBCPP_CONSTEXPR const uint64_t den = _Dp / __gcd; + + typedef __uratio<num, den> type; +}; + +template<class _Engine, size_t __k> +class _LIBCPP_TEMPLATE_VIS shuffle_order_engine +{ + static_assert(0 < __k, "shuffle_order_engine invalid parameters"); +public: + // types + typedef typename _Engine::result_type result_type; + +private: + _Engine __e_; + result_type _V_[__k]; + result_type _Y_; + +public: + // engine characteristics + static _LIBCPP_CONSTEXPR const size_t table_size = __k; + +#ifdef _LIBCPP_CXX03_LANG + static const result_type _Min = _Engine::_Min; + static const result_type _Max = _Engine::_Max; +#else + static _LIBCPP_CONSTEXPR const result_type _Min = _Engine::min(); + static _LIBCPP_CONSTEXPR const result_type _Max = _Engine::max(); +#endif + static_assert(_Min < _Max, "shuffle_order_engine invalid parameters"); + _LIBCPP_INLINE_VISIBILITY + static _LIBCPP_CONSTEXPR result_type min() { return _Min; } + _LIBCPP_INLINE_VISIBILITY + static _LIBCPP_CONSTEXPR result_type max() { return _Max; } + + static _LIBCPP_CONSTEXPR const unsigned long long _Rp = _Max - _Min + 1ull; + + // constructors and seeding functions + _LIBCPP_INLINE_VISIBILITY + shuffle_order_engine() {__init();} + _LIBCPP_INLINE_VISIBILITY + explicit shuffle_order_engine(const _Engine& __e) + : __e_(__e) {__init();} +#ifndef _LIBCPP_CXX03_LANG + _LIBCPP_INLINE_VISIBILITY + explicit shuffle_order_engine(_Engine&& __e) + : __e_(_VSTD::move(__e)) {__init();} +#endif // _LIBCPP_CXX03_LANG + _LIBCPP_INLINE_VISIBILITY + explicit shuffle_order_engine(result_type __sd) : __e_(__sd) {__init();} + template<class _Sseq> + _LIBCPP_INLINE_VISIBILITY + explicit shuffle_order_engine(_Sseq& __q, + typename enable_if<__is_seed_sequence<_Sseq, shuffle_order_engine>::value && + !is_convertible<_Sseq, _Engine>::value>::type* = 0) + : __e_(__q) {__init();} + _LIBCPP_INLINE_VISIBILITY + void seed() {__e_.seed(); __init();} + _LIBCPP_INLINE_VISIBILITY + void seed(result_type __sd) {__e_.seed(__sd); __init();} + template<class _Sseq> + _LIBCPP_INLINE_VISIBILITY + typename enable_if + < + __is_seed_sequence<_Sseq, shuffle_order_engine>::value, + void + >::type + seed(_Sseq& __q) {__e_.seed(__q); __init();} + + // generating functions + _LIBCPP_INLINE_VISIBILITY + result_type operator()() {return __eval(integral_constant<bool, _Rp != 0>());} + _LIBCPP_INLINE_VISIBILITY + void discard(unsigned long long __z) {for (; __z; --__z) operator()();} + + // property functions + _LIBCPP_INLINE_VISIBILITY + const _Engine& base() const _NOEXCEPT {return __e_;} + +private: + template<class _Eng, size_t _Kp> + friend + bool + operator==( + const shuffle_order_engine<_Eng, _Kp>& __x, + const shuffle_order_engine<_Eng, _Kp>& __y); + + template<class _Eng, size_t _Kp> + friend + bool + operator!=( + const shuffle_order_engine<_Eng, _Kp>& __x, + const shuffle_order_engine<_Eng, _Kp>& __y); + + template <class _CharT, class _Traits, + class _Eng, size_t _Kp> + friend + basic_ostream<_CharT, _Traits>& + operator<<(basic_ostream<_CharT, _Traits>& __os, + const shuffle_order_engine<_Eng, _Kp>& __x); + + template <class _CharT, class _Traits, + class _Eng, size_t _Kp> + friend + basic_istream<_CharT, _Traits>& + operator>>(basic_istream<_CharT, _Traits>& __is, + shuffle_order_engine<_Eng, _Kp>& __x); + + _LIBCPP_INLINE_VISIBILITY + void __init() + { + for (size_t __i = 0; __i < __k; ++__i) + _V_[__i] = __e_(); + _Y_ = __e_(); + } + + _LIBCPP_INLINE_VISIBILITY + result_type __eval(false_type) {return __eval2(integral_constant<bool, __k & 1>());} + _LIBCPP_INLINE_VISIBILITY + result_type __eval(true_type) {return __eval(__uratio<__k, _Rp>());} + + _LIBCPP_INLINE_VISIBILITY + result_type __eval2(false_type) {return __eval(__uratio<__k/2, 0x8000000000000000ull>());} + _LIBCPP_INLINE_VISIBILITY + result_type __eval2(true_type) {return __evalf<__k, 0>();} + + template <uint64_t _Np, uint64_t _Dp> + _LIBCPP_INLINE_VISIBILITY + typename enable_if + < + (__uratio<_Np, _Dp>::num > 0xFFFFFFFFFFFFFFFFull / (_Max - _Min)), + result_type + >::type + __eval(__uratio<_Np, _Dp>) + {return __evalf<__uratio<_Np, _Dp>::num, __uratio<_Np, _Dp>::den>();} + + template <uint64_t _Np, uint64_t _Dp> + _LIBCPP_INLINE_VISIBILITY + typename enable_if + < + __uratio<_Np, _Dp>::num <= 0xFFFFFFFFFFFFFFFFull / (_Max - _Min), + result_type + >::type + __eval(__uratio<_Np, _Dp>) + { + const size_t __j = static_cast<size_t>(__uratio<_Np, _Dp>::num * (_Y_ - _Min) + / __uratio<_Np, _Dp>::den); + _Y_ = _V_[__j]; + _V_[__j] = __e_(); + return _Y_; + } + + template <uint64_t __n, uint64_t __d> + _LIBCPP_INLINE_VISIBILITY + result_type __evalf() + { + const double _Fp = __d == 0 ? + __n / (2. * 0x8000000000000000ull) : + __n / (double)__d; + const size_t __j = static_cast<size_t>(_Fp * (_Y_ - _Min)); + _Y_ = _V_[__j]; + _V_[__j] = __e_(); + return _Y_; + } +}; + +template<class _Engine, size_t __k> + _LIBCPP_CONSTEXPR const size_t shuffle_order_engine<_Engine, __k>::table_size; + +template<class _Eng, size_t _Kp> +bool +operator==( + const shuffle_order_engine<_Eng, _Kp>& __x, + const shuffle_order_engine<_Eng, _Kp>& __y) +{ + return __x._Y_ == __y._Y_ && _VSTD::equal(__x._V_, __x._V_ + _Kp, __y._V_) && + __x.__e_ == __y.__e_; +} + +template<class _Eng, size_t _Kp> +inline _LIBCPP_INLINE_VISIBILITY +bool +operator!=( + const shuffle_order_engine<_Eng, _Kp>& __x, + const shuffle_order_engine<_Eng, _Kp>& __y) +{ + return !(__x == __y); +} + +template <class _CharT, class _Traits, + class _Eng, size_t _Kp> +basic_ostream<_CharT, _Traits>& +operator<<(basic_ostream<_CharT, _Traits>& __os, + const shuffle_order_engine<_Eng, _Kp>& __x) +{ + __save_flags<_CharT, _Traits> __lx(__os); + typedef basic_ostream<_CharT, _Traits> _Ostream; + __os.flags(_Ostream::dec | _Ostream::left); + _CharT __sp = __os.widen(' '); + __os.fill(__sp); + __os << __x.__e_ << __sp << __x._V_[0]; + for (size_t __i = 1; __i < _Kp; ++__i) + __os << __sp << __x._V_[__i]; + return __os << __sp << __x._Y_; +} + +template <class _CharT, class _Traits, + class _Eng, size_t _Kp> +basic_istream<_CharT, _Traits>& +operator>>(basic_istream<_CharT, _Traits>& __is, + shuffle_order_engine<_Eng, _Kp>& __x) +{ + typedef typename shuffle_order_engine<_Eng, _Kp>::result_type result_type; + __save_flags<_CharT, _Traits> __lx(__is); + typedef basic_istream<_CharT, _Traits> _Istream; + __is.flags(_Istream::dec | _Istream::skipws); + _Eng __e; + result_type _Vp[_Kp+1]; + __is >> __e; + for (size_t __i = 0; __i < _Kp+1; ++__i) + __is >> _Vp[__i]; + if (!__is.fail()) + { + __x.__e_ = __e; + for (size_t __i = 0; __i < _Kp; ++__i) + __x._V_[__i] = _Vp[__i]; + __x._Y_ = _Vp[_Kp]; + } + return __is; +} + +_LIBCPP_END_NAMESPACE_STD + +_LIBCPP_POP_MACROS + +#endif // _LIBCPP___RANDOM_SHUFFLE_ORDER_ENGINE_H diff --git a/libcxx/include/__random/student_t_distribution.h b/libcxx/include/__random/student_t_distribution.h new file mode 100644 index 000000000000..0cf911e4cd76 --- /dev/null +++ b/libcxx/include/__random/student_t_distribution.h @@ -0,0 +1,153 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___RANDOM_STUDENT_T_DISTRIBUTION_H +#define _LIBCPP___RANDOM_STUDENT_T_DISTRIBUTION_H + +#include <__config> +#include <__random/gamma_distribution.h> +#include <__random/normal_distribution.h> +#include <cmath> +#include <iosfwd> +#include <limits> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + +_LIBCPP_BEGIN_NAMESPACE_STD + +template<class _RealType = double> +class _LIBCPP_TEMPLATE_VIS student_t_distribution +{ +public: + // types + typedef _RealType result_type; + + class _LIBCPP_TEMPLATE_VIS param_type + { + result_type __n_; + public: + typedef student_t_distribution distribution_type; + + _LIBCPP_INLINE_VISIBILITY + explicit param_type(result_type __n = 1) : __n_(__n) {} + + _LIBCPP_INLINE_VISIBILITY + result_type n() const {return __n_;} + + friend _LIBCPP_INLINE_VISIBILITY + bool operator==(const param_type& __x, const param_type& __y) + {return __x.__n_ == __y.__n_;} + friend _LIBCPP_INLINE_VISIBILITY + bool operator!=(const param_type& __x, const param_type& __y) + {return !(__x == __y);} + }; + +private: + param_type __p_; + normal_distribution<result_type> __nd_; + +public: + // constructor and reset functions +#ifndef _LIBCPP_CXX03_LANG + _LIBCPP_INLINE_VISIBILITY + student_t_distribution() : student_t_distribution(1) {} + _LIBCPP_INLINE_VISIBILITY + explicit student_t_distribution(result_type __n) + : __p_(param_type(__n)) {} +#else + _LIBCPP_INLINE_VISIBILITY + explicit student_t_distribution(result_type __n = 1) + : __p_(param_type(__n)) {} +#endif + _LIBCPP_INLINE_VISIBILITY + explicit student_t_distribution(const param_type& __p) + : __p_(__p) {} + _LIBCPP_INLINE_VISIBILITY + void reset() {__nd_.reset();} + + // generating functions + template<class _URNG> + _LIBCPP_INLINE_VISIBILITY + result_type operator()(_URNG& __g) + {return (*this)(__g, __p_);} + template<class _URNG> result_type operator()(_URNG& __g, const param_type& __p); + + // property functions + _LIBCPP_INLINE_VISIBILITY + result_type n() const {return __p_.n();} + + _LIBCPP_INLINE_VISIBILITY + param_type param() const {return __p_;} + _LIBCPP_INLINE_VISIBILITY + void param(const param_type& __p) {__p_ = __p;} + + _LIBCPP_INLINE_VISIBILITY + result_type min() const {return -numeric_limits<result_type>::infinity();} + _LIBCPP_INLINE_VISIBILITY + result_type max() const {return numeric_limits<result_type>::infinity();} + + friend _LIBCPP_INLINE_VISIBILITY + bool operator==(const student_t_distribution& __x, + const student_t_distribution& __y) + {return __x.__p_ == __y.__p_;} + friend _LIBCPP_INLINE_VISIBILITY + bool operator!=(const student_t_distribution& __x, + const student_t_distribution& __y) + {return !(__x == __y);} +}; + +template <class _RealType> +template<class _URNG> +_RealType +student_t_distribution<_RealType>::operator()(_URNG& __g, const param_type& __p) +{ + gamma_distribution<result_type> __gd(__p.n() * .5, 2); + return __nd_(__g) * _VSTD::sqrt(__p.n()/__gd(__g)); +} + +template <class _CharT, class _Traits, class _RT> +basic_ostream<_CharT, _Traits>& +operator<<(basic_ostream<_CharT, _Traits>& __os, + const student_t_distribution<_RT>& __x) +{ + __save_flags<_CharT, _Traits> __lx(__os); + typedef basic_ostream<_CharT, _Traits> _OStream; + __os.flags(_OStream::dec | _OStream::left | _OStream::fixed | + _OStream::scientific); + __os << __x.n(); + return __os; +} + +template <class _CharT, class _Traits, class _RT> +basic_istream<_CharT, _Traits>& +operator>>(basic_istream<_CharT, _Traits>& __is, + student_t_distribution<_RT>& __x) +{ + typedef student_t_distribution<_RT> _Eng; + typedef typename _Eng::result_type result_type; + typedef typename _Eng::param_type param_type; + __save_flags<_CharT, _Traits> __lx(__is); + typedef basic_istream<_CharT, _Traits> _Istream; + __is.flags(_Istream::dec | _Istream::skipws); + result_type __n; + __is >> __n; + if (!__is.fail()) + __x.param(param_type(__n)); + return __is; +} + +_LIBCPP_END_NAMESPACE_STD + +_LIBCPP_POP_MACROS + +#endif // _LIBCPP___RANDOM_STUDENT_T_DISTRIBUTION_H diff --git a/libcxx/include/__random/subtract_with_carry_engine.h b/libcxx/include/__random/subtract_with_carry_engine.h new file mode 100644 index 000000000000..073f84dccff6 --- /dev/null +++ b/libcxx/include/__random/subtract_with_carry_engine.h @@ -0,0 +1,352 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___RANDOM_SUBTRACT_WITH_CARRY_ENGINE_H +#define _LIBCPP___RANDOM_SUBTRACT_WITH_CARRY_ENGINE_H + +#include <__algorithm/equal.h> +#include <__algorithm/min.h> +#include <__config> +#include <__random/is_seed_sequence.h> +#include <__random/linear_congruential_engine.h> +#include <cstddef> +#include <cstdint> +#include <iosfwd> +#include <limits> +#include <type_traits> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + +_LIBCPP_BEGIN_NAMESPACE_STD + +template<class _UIntType, size_t __w, size_t __s, size_t __r> +class _LIBCPP_TEMPLATE_VIS subtract_with_carry_engine; + +template<class _UInt, size_t _Wp, size_t _Sp, size_t _Rp> +bool +operator==( + const subtract_with_carry_engine<_UInt, _Wp, _Sp, _Rp>& __x, + const subtract_with_carry_engine<_UInt, _Wp, _Sp, _Rp>& __y); + +template<class _UInt, size_t _Wp, size_t _Sp, size_t _Rp> +_LIBCPP_INLINE_VISIBILITY +bool +operator!=( + const subtract_with_carry_engine<_UInt, _Wp, _Sp, _Rp>& __x, + const subtract_with_carry_engine<_UInt, _Wp, _Sp, _Rp>& __y); + +template <class _CharT, class _Traits, + class _UInt, size_t _Wp, size_t _Sp, size_t _Rp> +basic_ostream<_CharT, _Traits>& +operator<<(basic_ostream<_CharT, _Traits>& __os, + const subtract_with_carry_engine<_UInt, _Wp, _Sp, _Rp>& __x); + +template <class _CharT, class _Traits, + class _UInt, size_t _Wp, size_t _Sp, size_t _Rp> +basic_istream<_CharT, _Traits>& +operator>>(basic_istream<_CharT, _Traits>& __is, + subtract_with_carry_engine<_UInt, _Wp, _Sp, _Rp>& __x); + +template<class _UIntType, size_t __w, size_t __s, size_t __r> +class _LIBCPP_TEMPLATE_VIS subtract_with_carry_engine +{ +public: + // types + typedef _UIntType result_type; + +private: + result_type __x_[__r]; + result_type __c_; + size_t __i_; + + static _LIBCPP_CONSTEXPR const result_type _Dt = numeric_limits<result_type>::digits; + static_assert( 0 < __w, "subtract_with_carry_engine invalid parameters"); + static_assert(__w <= _Dt, "subtract_with_carry_engine invalid parameters"); + static_assert( 0 < __s, "subtract_with_carry_engine invalid parameters"); + static_assert(__s < __r, "subtract_with_carry_engine invalid parameters"); +public: + static _LIBCPP_CONSTEXPR const result_type _Min = 0; + static _LIBCPP_CONSTEXPR const result_type _Max = __w == _Dt ? result_type(~0) : + (result_type(1) << __w) - result_type(1); + static_assert(_Min < _Max, "subtract_with_carry_engine invalid parameters"); + + // engine characteristics + static _LIBCPP_CONSTEXPR const size_t word_size = __w; + static _LIBCPP_CONSTEXPR const size_t short_lag = __s; + static _LIBCPP_CONSTEXPR const size_t long_lag = __r; + _LIBCPP_INLINE_VISIBILITY + static _LIBCPP_CONSTEXPR result_type min() { return _Min; } + _LIBCPP_INLINE_VISIBILITY + static _LIBCPP_CONSTEXPR result_type max() { return _Max; } + static _LIBCPP_CONSTEXPR const result_type default_seed = 19780503u; + + // constructors and seeding functions +#ifndef _LIBCPP_CXX03_LANG + _LIBCPP_INLINE_VISIBILITY + subtract_with_carry_engine() : subtract_with_carry_engine(default_seed) {} + _LIBCPP_INLINE_VISIBILITY + explicit subtract_with_carry_engine(result_type __sd) { seed(__sd); } +#else + _LIBCPP_INLINE_VISIBILITY + explicit subtract_with_carry_engine(result_type __sd = default_seed) { + seed(__sd); + } +#endif + template<class _Sseq> + _LIBCPP_INLINE_VISIBILITY + explicit subtract_with_carry_engine(_Sseq& __q, + typename enable_if<__is_seed_sequence<_Sseq, subtract_with_carry_engine>::value>::type* = 0) + {seed(__q);} + _LIBCPP_INLINE_VISIBILITY + void seed(result_type __sd = default_seed) + {seed(__sd, integral_constant<unsigned, 1 + (__w - 1) / 32>());} + template<class _Sseq> + _LIBCPP_INLINE_VISIBILITY + typename enable_if + < + __is_seed_sequence<_Sseq, subtract_with_carry_engine>::value, + void + >::type + seed(_Sseq& __q) + {__seed(__q, integral_constant<unsigned, 1 + (__w - 1) / 32>());} + + // generating functions + result_type operator()(); + _LIBCPP_INLINE_VISIBILITY + void discard(unsigned long long __z) {for (; __z; --__z) operator()();} + + template<class _UInt, size_t _Wp, size_t _Sp, size_t _Rp> + friend + bool + operator==( + const subtract_with_carry_engine<_UInt, _Wp, _Sp, _Rp>& __x, + const subtract_with_carry_engine<_UInt, _Wp, _Sp, _Rp>& __y); + + template<class _UInt, size_t _Wp, size_t _Sp, size_t _Rp> + friend + bool + operator!=( + const subtract_with_carry_engine<_UInt, _Wp, _Sp, _Rp>& __x, + const subtract_with_carry_engine<_UInt, _Wp, _Sp, _Rp>& __y); + + template <class _CharT, class _Traits, + class _UInt, size_t _Wp, size_t _Sp, size_t _Rp> + friend + basic_ostream<_CharT, _Traits>& + operator<<(basic_ostream<_CharT, _Traits>& __os, + const subtract_with_carry_engine<_UInt, _Wp, _Sp, _Rp>& __x); + + template <class _CharT, class _Traits, + class _UInt, size_t _Wp, size_t _Sp, size_t _Rp> + friend + basic_istream<_CharT, _Traits>& + operator>>(basic_istream<_CharT, _Traits>& __is, + subtract_with_carry_engine<_UInt, _Wp, _Sp, _Rp>& __x); + +private: + + void seed(result_type __sd, integral_constant<unsigned, 1>); + void seed(result_type __sd, integral_constant<unsigned, 2>); + template<class _Sseq> + void __seed(_Sseq& __q, integral_constant<unsigned, 1>); + template<class _Sseq> + void __seed(_Sseq& __q, integral_constant<unsigned, 2>); +}; + +template<class _UIntType, size_t __w, size_t __s, size_t __r> + _LIBCPP_CONSTEXPR const size_t subtract_with_carry_engine<_UIntType, __w, __s, __r>::word_size; + +template<class _UIntType, size_t __w, size_t __s, size_t __r> + _LIBCPP_CONSTEXPR const size_t subtract_with_carry_engine<_UIntType, __w, __s, __r>::short_lag; + +template<class _UIntType, size_t __w, size_t __s, size_t __r> + _LIBCPP_CONSTEXPR const size_t subtract_with_carry_engine<_UIntType, __w, __s, __r>::long_lag; + +template<class _UIntType, size_t __w, size_t __s, size_t __r> + _LIBCPP_CONSTEXPR const typename subtract_with_carry_engine<_UIntType, __w, __s, __r>::result_type + subtract_with_carry_engine<_UIntType, __w, __s, __r>::default_seed; + +template<class _UIntType, size_t __w, size_t __s, size_t __r> +void +subtract_with_carry_engine<_UIntType, __w, __s, __r>::seed(result_type __sd, + integral_constant<unsigned, 1>) +{ + linear_congruential_engine<result_type, 40014u, 0u, 2147483563u> + __e(__sd == 0u ? default_seed : __sd); + for (size_t __i = 0; __i < __r; ++__i) + __x_[__i] = static_cast<result_type>(__e() & _Max); + __c_ = __x_[__r-1] == 0; + __i_ = 0; +} + +template<class _UIntType, size_t __w, size_t __s, size_t __r> +void +subtract_with_carry_engine<_UIntType, __w, __s, __r>::seed(result_type __sd, + integral_constant<unsigned, 2>) +{ + linear_congruential_engine<result_type, 40014u, 0u, 2147483563u> + __e(__sd == 0u ? default_seed : __sd); + for (size_t __i = 0; __i < __r; ++__i) + { + result_type __e0 = __e(); + __x_[__i] = static_cast<result_type>( + (__e0 + ((uint64_t)__e() << 32)) & _Max); + } + __c_ = __x_[__r-1] == 0; + __i_ = 0; +} + +template<class _UIntType, size_t __w, size_t __s, size_t __r> +template<class _Sseq> +void +subtract_with_carry_engine<_UIntType, __w, __s, __r>::__seed(_Sseq& __q, + integral_constant<unsigned, 1>) +{ + const unsigned __k = 1; + uint32_t __ar[__r * __k]; + __q.generate(__ar, __ar + __r * __k); + for (size_t __i = 0; __i < __r; ++__i) + __x_[__i] = static_cast<result_type>(__ar[__i] & _Max); + __c_ = __x_[__r-1] == 0; + __i_ = 0; +} + +template<class _UIntType, size_t __w, size_t __s, size_t __r> +template<class _Sseq> +void +subtract_with_carry_engine<_UIntType, __w, __s, __r>::__seed(_Sseq& __q, + integral_constant<unsigned, 2>) +{ + const unsigned __k = 2; + uint32_t __ar[__r * __k]; + __q.generate(__ar, __ar + __r * __k); + for (size_t __i = 0; __i < __r; ++__i) + __x_[__i] = static_cast<result_type>( + (__ar[2 * __i] + ((uint64_t)__ar[2 * __i + 1] << 32)) & _Max); + __c_ = __x_[__r-1] == 0; + __i_ = 0; +} + +template<class _UIntType, size_t __w, size_t __s, size_t __r> +_UIntType +subtract_with_carry_engine<_UIntType, __w, __s, __r>::operator()() +{ + const result_type& __xs = __x_[(__i_ + (__r - __s)) % __r]; + result_type& __xr = __x_[__i_]; + result_type __new_c = __c_ == 0 ? __xs < __xr : __xs != 0 ? __xs <= __xr : 1; + __xr = (__xs - __xr - __c_) & _Max; + __c_ = __new_c; + __i_ = (__i_ + 1) % __r; + return __xr; +} + +template<class _UInt, size_t _Wp, size_t _Sp, size_t _Rp> +bool +operator==( + const subtract_with_carry_engine<_UInt, _Wp, _Sp, _Rp>& __x, + const subtract_with_carry_engine<_UInt, _Wp, _Sp, _Rp>& __y) +{ + if (__x.__c_ != __y.__c_) + return false; + if (__x.__i_ == __y.__i_) + return _VSTD::equal(__x.__x_, __x.__x_ + _Rp, __y.__x_); + if (__x.__i_ == 0 || __y.__i_ == 0) + { + size_t __j = _VSTD::min(_Rp - __x.__i_, _Rp - __y.__i_); + if (!_VSTD::equal(__x.__x_ + __x.__i_, __x.__x_ + __x.__i_ + __j, + __y.__x_ + __y.__i_)) + return false; + if (__x.__i_ == 0) + return _VSTD::equal(__x.__x_ + __j, __x.__x_ + _Rp, __y.__x_); + return _VSTD::equal(__x.__x_, __x.__x_ + (_Rp - __j), __y.__x_ + __j); + } + if (__x.__i_ < __y.__i_) + { + size_t __j = _Rp - __y.__i_; + if (!_VSTD::equal(__x.__x_ + __x.__i_, __x.__x_ + (__x.__i_ + __j), + __y.__x_ + __y.__i_)) + return false; + if (!_VSTD::equal(__x.__x_ + (__x.__i_ + __j), __x.__x_ + _Rp, + __y.__x_)) + return false; + return _VSTD::equal(__x.__x_, __x.__x_ + __x.__i_, + __y.__x_ + (_Rp - (__x.__i_ + __j))); + } + size_t __j = _Rp - __x.__i_; + if (!_VSTD::equal(__y.__x_ + __y.__i_, __y.__x_ + (__y.__i_ + __j), + __x.__x_ + __x.__i_)) + return false; + if (!_VSTD::equal(__y.__x_ + (__y.__i_ + __j), __y.__x_ + _Rp, + __x.__x_)) + return false; + return _VSTD::equal(__y.__x_, __y.__x_ + __y.__i_, + __x.__x_ + (_Rp - (__y.__i_ + __j))); +} + +template<class _UInt, size_t _Wp, size_t _Sp, size_t _Rp> +inline _LIBCPP_INLINE_VISIBILITY +bool +operator!=( + const subtract_with_carry_engine<_UInt, _Wp, _Sp, _Rp>& __x, + const subtract_with_carry_engine<_UInt, _Wp, _Sp, _Rp>& __y) +{ + return !(__x == __y); +} + +template <class _CharT, class _Traits, + class _UInt, size_t _Wp, size_t _Sp, size_t _Rp> +basic_ostream<_CharT, _Traits>& +operator<<(basic_ostream<_CharT, _Traits>& __os, + const subtract_with_carry_engine<_UInt, _Wp, _Sp, _Rp>& __x) +{ + __save_flags<_CharT, _Traits> __lx(__os); + typedef basic_ostream<_CharT, _Traits> _Ostream; + __os.flags(_Ostream::dec | _Ostream::left); + _CharT __sp = __os.widen(' '); + __os.fill(__sp); + __os << __x.__x_[__x.__i_]; + for (size_t __j = __x.__i_ + 1; __j < _Rp; ++__j) + __os << __sp << __x.__x_[__j]; + for (size_t __j = 0; __j < __x.__i_; ++__j) + __os << __sp << __x.__x_[__j]; + __os << __sp << __x.__c_; + return __os; +} + +template <class _CharT, class _Traits, + class _UInt, size_t _Wp, size_t _Sp, size_t _Rp> +basic_istream<_CharT, _Traits>& +operator>>(basic_istream<_CharT, _Traits>& __is, + subtract_with_carry_engine<_UInt, _Wp, _Sp, _Rp>& __x) +{ + __save_flags<_CharT, _Traits> __lx(__is); + typedef basic_istream<_CharT, _Traits> _Istream; + __is.flags(_Istream::dec | _Istream::skipws); + _UInt __t[_Rp+1]; + for (size_t __i = 0; __i < _Rp+1; ++__i) + __is >> __t[__i]; + if (!__is.fail()) + { + for (size_t __i = 0; __i < _Rp; ++__i) + __x.__x_[__i] = __t[__i]; + __x.__c_ = __t[_Rp]; + __x.__i_ = 0; + } + return __is; +} + +_LIBCPP_END_NAMESPACE_STD + +_LIBCPP_POP_MACROS + +#endif // _LIBCPP___RANDOM_SUBTRACT_WITH_CARRY_ENGINE_H diff --git a/libcxx/include/__random/uniform_int_distribution.h b/libcxx/include/__random/uniform_int_distribution.h index a7cfa1ec7305..55b4761637f0 100644 --- a/libcxx/include/__random/uniform_int_distribution.h +++ b/libcxx/include/__random/uniform_int_distribution.h @@ -11,6 +11,8 @@ #include <__bits> #include <__config> +#include <__random/log2.h> +#include <bit> #include <cstddef> #include <cstdint> #include <iosfwd> @@ -26,34 +28,6 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD -// __independent_bits_engine - -template <unsigned long long _Xp, size_t _Rp> -struct __log2_imp -{ - static const size_t value = _Xp & ((unsigned long long)(1) << _Rp) ? _Rp - : __log2_imp<_Xp, _Rp - 1>::value; -}; - -template <unsigned long long _Xp> -struct __log2_imp<_Xp, 0> -{ - static const size_t value = 0; -}; - -template <size_t _Rp> -struct __log2_imp<0, _Rp> -{ - static const size_t value = _Rp + 1; -}; - -template <class _UIntType, _UIntType _Xp> -struct __log2 -{ - static const size_t value = __log2_imp<_Xp, - sizeof(_UIntType) * __CHAR_BIT__ - 1>::value; -}; - template<class _Engine, class _UIntType> class __independent_bits_engine { @@ -181,7 +155,7 @@ __independent_bits_engine<_Engine, _UIntType>::__eval(true_type) return _Sp; } -template<class _IntType = int> +template<class _IntType = int> // __int128_t is also supported as an extension here class uniform_int_distribution { public: @@ -256,8 +230,8 @@ typename uniform_int_distribution<_IntType>::result_type uniform_int_distribution<_IntType>::operator()(_URNG& __g, const param_type& __p) _LIBCPP_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK { - typedef typename conditional<sizeof(result_type) <= sizeof(uint32_t), - uint32_t, uint64_t>::type _UIntType; + typedef typename conditional<sizeof(result_type) <= sizeof(uint32_t), uint32_t, + typename make_unsigned<result_type>::type>::type _UIntType; const _UIntType _Rp = _UIntType(__p.b()) - _UIntType(__p.a()) + _UIntType(1); if (_Rp == 1) return __p.a(); @@ -265,7 +239,7 @@ _LIBCPP_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK typedef __independent_bits_engine<_URNG, _UIntType> _Eng; if (_Rp == 0) return static_cast<result_type>(_Eng(__g, _Dt)()); - size_t __w = _Dt - __libcpp_clz(_Rp) - 1; + size_t __w = _Dt - __countl_zero(_Rp) - 1; if ((_Rp & (numeric_limits<_UIntType>::max() >> (_Dt - __w))) != 0) ++__w; _Eng __e(__g, __w); diff --git a/libcxx/include/__random/uniform_random_bit_generator.h b/libcxx/include/__random/uniform_random_bit_generator.h new file mode 100644 index 000000000000..7b2f0df868d7 --- /dev/null +++ b/libcxx/include/__random/uniform_random_bit_generator.h @@ -0,0 +1,45 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___RANDOM_UNIFORM_RANDOM_BIT_GENERATOR_H +#define _LIBCPP___RANDOM_UNIFORM_RANDOM_BIT_GENERATOR_H + +#include <__concepts/arithmetic.h> +#include <__concepts/invocable.h> +#include <__concepts/same_as.h> +#include <__config> +#include <type_traits> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS) + +// [rand.req.urng] +template<class _Gen> +concept uniform_random_bit_generator = + invocable<_Gen&> && unsigned_integral<invoke_result_t<_Gen&>> && + requires { + { _Gen::min() } -> same_as<invoke_result_t<_Gen&>>; + { _Gen::max() } -> same_as<invoke_result_t<_Gen&>>; + requires bool_constant<(_Gen::min() < _Gen::max())>::value; + }; + +#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS) + +_LIBCPP_END_NAMESPACE_STD + +_LIBCPP_POP_MACROS + +#endif // _LIBCPP___RANDOM_UNIFORM_RANDOM_BIT_GENERATOR_H diff --git a/libcxx/include/__random/uniform_real_distribution.h b/libcxx/include/__random/uniform_real_distribution.h new file mode 100644 index 000000000000..967e4e26fd0c --- /dev/null +++ b/libcxx/include/__random/uniform_real_distribution.h @@ -0,0 +1,160 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___RANDOM_UNIFORM_REAL_DISTRIBUTION_H +#define _LIBCPP___RANDOM_UNIFORM_REAL_DISTRIBUTION_H + +#include <__config> +#include <__random/generate_canonical.h> +#include <iosfwd> +#include <limits> +#include <type_traits> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + +_LIBCPP_BEGIN_NAMESPACE_STD + +template<class _RealType = double> +class _LIBCPP_TEMPLATE_VIS uniform_real_distribution +{ +public: + // types + typedef _RealType result_type; + + class _LIBCPP_TEMPLATE_VIS param_type + { + result_type __a_; + result_type __b_; + public: + typedef uniform_real_distribution distribution_type; + + _LIBCPP_INLINE_VISIBILITY + explicit param_type(result_type __a = 0, + result_type __b = 1) + : __a_(__a), __b_(__b) {} + + _LIBCPP_INLINE_VISIBILITY + result_type a() const {return __a_;} + _LIBCPP_INLINE_VISIBILITY + result_type b() const {return __b_;} + + friend _LIBCPP_INLINE_VISIBILITY + bool operator==(const param_type& __x, const param_type& __y) + {return __x.__a_ == __y.__a_ && __x.__b_ == __y.__b_;} + friend _LIBCPP_INLINE_VISIBILITY + bool operator!=(const param_type& __x, const param_type& __y) + {return !(__x == __y);} + }; + +private: + param_type __p_; + +public: + // constructors and reset functions +#ifndef _LIBCPP_CXX03_LANG + _LIBCPP_INLINE_VISIBILITY + uniform_real_distribution() : uniform_real_distribution(0) {} + explicit uniform_real_distribution(result_type __a, result_type __b = 1) + : __p_(param_type(__a, __b)) {} +#else + _LIBCPP_INLINE_VISIBILITY + explicit uniform_real_distribution(result_type __a = 0, result_type __b = 1) + : __p_(param_type(__a, __b)) {} +#endif + _LIBCPP_INLINE_VISIBILITY + explicit uniform_real_distribution(const param_type& __p) : __p_(__p) {} + _LIBCPP_INLINE_VISIBILITY + void reset() {} + + // generating functions + template<class _URNG> + _LIBCPP_INLINE_VISIBILITY + result_type operator()(_URNG& __g) + {return (*this)(__g, __p_);} + template<class _URNG> _LIBCPP_INLINE_VISIBILITY result_type operator()(_URNG& __g, const param_type& __p); + + // property functions + _LIBCPP_INLINE_VISIBILITY + result_type a() const {return __p_.a();} + _LIBCPP_INLINE_VISIBILITY + result_type b() const {return __p_.b();} + + _LIBCPP_INLINE_VISIBILITY + param_type param() const {return __p_;} + _LIBCPP_INLINE_VISIBILITY + void param(const param_type& __p) {__p_ = __p;} + + _LIBCPP_INLINE_VISIBILITY + result_type min() const {return a();} + _LIBCPP_INLINE_VISIBILITY + result_type max() const {return b();} + + friend _LIBCPP_INLINE_VISIBILITY + bool operator==(const uniform_real_distribution& __x, + const uniform_real_distribution& __y) + {return __x.__p_ == __y.__p_;} + friend _LIBCPP_INLINE_VISIBILITY + bool operator!=(const uniform_real_distribution& __x, + const uniform_real_distribution& __y) + {return !(__x == __y);} +}; + +template<class _RealType> +template<class _URNG> +inline +typename uniform_real_distribution<_RealType>::result_type +uniform_real_distribution<_RealType>::operator()(_URNG& __g, const param_type& __p) +{ + return (__p.b() - __p.a()) + * _VSTD::generate_canonical<_RealType, numeric_limits<_RealType>::digits>(__g) + + __p.a(); +} + +template <class _CharT, class _Traits, class _RT> +basic_ostream<_CharT, _Traits>& +operator<<(basic_ostream<_CharT, _Traits>& __os, + const uniform_real_distribution<_RT>& __x) +{ + __save_flags<_CharT, _Traits> __lx(__os); + typedef basic_ostream<_CharT, _Traits> _OStream; + __os.flags(_OStream::dec | _OStream::left | _OStream::fixed | + _OStream::scientific); + _CharT __sp = __os.widen(' '); + __os.fill(__sp); + return __os << __x.a() << __sp << __x.b(); +} + +template <class _CharT, class _Traits, class _RT> +basic_istream<_CharT, _Traits>& +operator>>(basic_istream<_CharT, _Traits>& __is, + uniform_real_distribution<_RT>& __x) +{ + typedef uniform_real_distribution<_RT> _Eng; + typedef typename _Eng::result_type result_type; + typedef typename _Eng::param_type param_type; + __save_flags<_CharT, _Traits> __lx(__is); + typedef basic_istream<_CharT, _Traits> _Istream; + __is.flags(_Istream::dec | _Istream::skipws); + result_type __a; + result_type __b; + __is >> __a >> __b; + if (!__is.fail()) + __x.param(param_type(__a, __b)); + return __is; +} + +_LIBCPP_END_NAMESPACE_STD + +_LIBCPP_POP_MACROS + +#endif // _LIBCPP___RANDOM_UNIFORM_REAL_DISTRIBUTION_H diff --git a/libcxx/include/__random/weibull_distribution.h b/libcxx/include/__random/weibull_distribution.h new file mode 100644 index 000000000000..4c5e4e8fff1c --- /dev/null +++ b/libcxx/include/__random/weibull_distribution.h @@ -0,0 +1,155 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___RANDOM_WEIBULL_DISTRIBUTION_H +#define _LIBCPP___RANDOM_WEIBULL_DISTRIBUTION_H + +#include <__config> +#include <__random/exponential_distribution.h> +#include <cmath> +#include <iosfwd> +#include <limits> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + +_LIBCPP_BEGIN_NAMESPACE_STD + +template<class _RealType = double> +class _LIBCPP_TEMPLATE_VIS weibull_distribution +{ +public: + // types + typedef _RealType result_type; + + class _LIBCPP_TEMPLATE_VIS param_type + { + result_type __a_; + result_type __b_; + public: + typedef weibull_distribution distribution_type; + + _LIBCPP_INLINE_VISIBILITY + explicit param_type(result_type __a = 1, result_type __b = 1) + : __a_(__a), __b_(__b) {} + + _LIBCPP_INLINE_VISIBILITY + result_type a() const {return __a_;} + _LIBCPP_INLINE_VISIBILITY + result_type b() const {return __b_;} + + friend _LIBCPP_INLINE_VISIBILITY + bool operator==(const param_type& __x, const param_type& __y) + {return __x.__a_ == __y.__a_ && __x.__b_ == __y.__b_;} + friend _LIBCPP_INLINE_VISIBILITY + bool operator!=(const param_type& __x, const param_type& __y) + {return !(__x == __y);} + }; + +private: + param_type __p_; + +public: + // constructor and reset functions +#ifndef _LIBCPP_CXX03_LANG + _LIBCPP_INLINE_VISIBILITY + weibull_distribution() : weibull_distribution(1) {} + _LIBCPP_INLINE_VISIBILITY + explicit weibull_distribution(result_type __a, result_type __b = 1) + : __p_(param_type(__a, __b)) {} +#else + _LIBCPP_INLINE_VISIBILITY + explicit weibull_distribution(result_type __a = 1, result_type __b = 1) + : __p_(param_type(__a, __b)) {} +#endif + _LIBCPP_INLINE_VISIBILITY + explicit weibull_distribution(const param_type& __p) + : __p_(__p) {} + _LIBCPP_INLINE_VISIBILITY + void reset() {} + + // generating functions + template<class _URNG> + _LIBCPP_INLINE_VISIBILITY + result_type operator()(_URNG& __g) + {return (*this)(__g, __p_);} + template<class _URNG> + _LIBCPP_INLINE_VISIBILITY + result_type operator()(_URNG& __g, const param_type& __p) + {return __p.b() * + _VSTD::pow(exponential_distribution<result_type>()(__g), 1/__p.a());} + + // property functions + _LIBCPP_INLINE_VISIBILITY + result_type a() const {return __p_.a();} + _LIBCPP_INLINE_VISIBILITY + result_type b() const {return __p_.b();} + + _LIBCPP_INLINE_VISIBILITY + param_type param() const {return __p_;} + _LIBCPP_INLINE_VISIBILITY + void param(const param_type& __p) {__p_ = __p;} + + _LIBCPP_INLINE_VISIBILITY + result_type min() const {return 0;} + _LIBCPP_INLINE_VISIBILITY + result_type max() const {return numeric_limits<result_type>::infinity();} + + friend _LIBCPP_INLINE_VISIBILITY + bool operator==(const weibull_distribution& __x, + const weibull_distribution& __y) + {return __x.__p_ == __y.__p_;} + friend _LIBCPP_INLINE_VISIBILITY + bool operator!=(const weibull_distribution& __x, + const weibull_distribution& __y) + {return !(__x == __y);} +}; + +template <class _CharT, class _Traits, class _RT> +basic_ostream<_CharT, _Traits>& +operator<<(basic_ostream<_CharT, _Traits>& __os, + const weibull_distribution<_RT>& __x) +{ + __save_flags<_CharT, _Traits> __lx(__os); + typedef basic_ostream<_CharT, _Traits> _OStream; + __os.flags(_OStream::dec | _OStream::left | _OStream::fixed | + _OStream::scientific); + _CharT __sp = __os.widen(' '); + __os.fill(__sp); + __os << __x.a() << __sp << __x.b(); + return __os; +} + +template <class _CharT, class _Traits, class _RT> +basic_istream<_CharT, _Traits>& +operator>>(basic_istream<_CharT, _Traits>& __is, + weibull_distribution<_RT>& __x) +{ + typedef weibull_distribution<_RT> _Eng; + typedef typename _Eng::result_type result_type; + typedef typename _Eng::param_type param_type; + __save_flags<_CharT, _Traits> __lx(__is); + typedef basic_istream<_CharT, _Traits> _Istream; + __is.flags(_Istream::dec | _Istream::skipws); + result_type __a; + result_type __b; + __is >> __a >> __b; + if (!__is.fail()) + __x.param(param_type(__a, __b)); + return __is; +} + +_LIBCPP_END_NAMESPACE_STD + +_LIBCPP_POP_MACROS + +#endif // _LIBCPP___RANDOM_WEIBULL_DISTRIBUTION_H diff --git a/libcxx/include/__ranges/concepts.h b/libcxx/include/__ranges/concepts.h index dc1cece33b8d..6a8364006beb 100644 --- a/libcxx/include/__ranges/concepts.h +++ b/libcxx/include/__ranges/concepts.h @@ -16,8 +16,8 @@ #include <__iterator/iterator_traits.h> #include <__iterator/readable_traits.h> #include <__ranges/access.h> -#include <__ranges/enable_borrowed_range.h> #include <__ranges/data.h> +#include <__ranges/enable_borrowed_range.h> #include <__ranges/enable_view.h> #include <__ranges/size.h> #include <concepts> diff --git a/libcxx/include/__utility/priority_tag.h b/libcxx/include/__utility/priority_tag.h new file mode 100644 index 000000000000..45d9e5ec4c8f --- /dev/null +++ b/libcxx/include/__utility/priority_tag.h @@ -0,0 +1,26 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___UTILITY_PRIORITY_TAG_H +#define _LIBCPP___UTILITY_PRIORITY_TAG_H + +#include <__config> +#include <cstddef> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +template<size_t _Ip> struct __priority_tag : __priority_tag<_Ip - 1> {}; +template<> struct __priority_tag<0> {}; + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___UTILITY_PRIORITY_TAG_H diff --git a/libcxx/include/bit b/libcxx/include/bit index 634475b99879..0aab83e7a6eb 100644 --- a/libcxx/include/bit +++ b/libcxx/include/bit @@ -14,9 +14,13 @@ bit synopsis namespace std { - // [bit.cast], bit_cast - template<class To, class From> - constexpr To bit_cast(const From& from) noexcept; // C++20 + // [bit.cast], bit_cast + template<class To, class From> + constexpr To bit_cast(const From& from) noexcept; // C++20 + + // [bit.byteswap], byteswap + template<class T> + constexpr T byteswap(T value) noexcept; // C++23 // [bit.pow.two], integral powers of 2 template <class T> @@ -51,13 +55,14 @@ namespace std { little = see below, // C++20 big = see below, // C++20 native = see below // C++20 -}; + }; } // namespace std */ #include <__bit/bit_cast.h> +#include <__bit/byteswap.h> #include <__bits> // __libcpp_clz #include <__config> #include <__debug> diff --git a/libcxx/include/compare b/libcxx/include/compare index 8a2a82907062..5c4578da0b89 100644 --- a/libcxx/include/compare +++ b/libcxx/include/compare @@ -140,25 +140,10 @@ namespace std { #include <__compare/compare_three_way_result.h> #include <__compare/is_eq.h> #include <__compare/ordering.h> +#include <__compare/partial_order.h> +#include <__compare/strong_order.h> #include <__compare/three_way_comparable.h> +#include <__compare/weak_order.h> #include <__config> -#ifndef _LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER -#pragma GCC system_header -#endif - -_LIBCPP_BEGIN_NAMESPACE_STD - -#if _LIBCPP_STD_VER > 17 - -// [cmp.alg], comparison algorithms -// TODO: unimplemented -template<class _Tp> constexpr strong_ordering strong_order(const _Tp& __lhs, const _Tp& __rhs); -template<class _Tp> constexpr weak_ordering weak_order(const _Tp& __lhs, const _Tp& __rhs); -template<class _Tp> constexpr partial_ordering partial_order(const _Tp& __lhs, const _Tp& __rhs); - -#endif // _LIBCPP_STD_VER > 17 - -_LIBCPP_END_NAMESPACE_STD - #endif // _LIBCPP_COMPARE diff --git a/libcxx/include/deque b/libcxx/include/deque index 9ab6ea748d53..e45d780e274f 100644 --- a/libcxx/include/deque +++ b/libcxx/include/deque @@ -915,16 +915,16 @@ class __deque_base __deque_base(const __deque_base& __c); __deque_base& operator=(const __deque_base& __c); public: - typedef _Allocator allocator_type; - typedef allocator_traits<allocator_type> __alloc_traits; - typedef typename __alloc_traits::size_type size_type; + typedef _Allocator allocator_type; + typedef allocator_traits<allocator_type> __alloc_traits; + typedef typename __alloc_traits::size_type size_type; - typedef _Tp value_type; - typedef value_type& reference; - typedef const value_type& const_reference; - typedef typename __alloc_traits::difference_type difference_type; - typedef typename __alloc_traits::pointer pointer; - typedef typename __alloc_traits::const_pointer const_pointer; + typedef _Tp value_type; + typedef value_type& reference; + typedef const value_type& const_reference; + typedef typename __alloc_traits::difference_type difference_type; + typedef typename __alloc_traits::pointer pointer; + typedef typename __alloc_traits::const_pointer const_pointer; static const difference_type __block_size; @@ -1259,20 +1259,20 @@ public: static_assert((is_same<typename allocator_type::value_type, value_type>::value), "Allocator::value_type must be same type as value_type"); - typedef __deque_base<value_type, allocator_type> __base; + typedef __deque_base<value_type, allocator_type> __base; - typedef typename __base::__alloc_traits __alloc_traits; - typedef typename __base::reference reference; - typedef typename __base::const_reference const_reference; - typedef typename __base::iterator iterator; - typedef typename __base::const_iterator const_iterator; - typedef typename __allocator_traits<allocator_type>::size_type size_type; - typedef typename __base::difference_type difference_type; + typedef typename __base::__alloc_traits __alloc_traits; + typedef typename __base::reference reference; + typedef typename __base::const_reference const_reference; + typedef typename __base::iterator iterator; + typedef typename __base::const_iterator const_iterator; + typedef typename __base::size_type size_type; + typedef typename __base::difference_type difference_type; - typedef typename __base::pointer pointer; - typedef typename __base::const_pointer const_pointer; - typedef _VSTD::reverse_iterator<iterator> reverse_iterator; - typedef _VSTD::reverse_iterator<const_iterator> const_reverse_iterator; + typedef typename __base::pointer pointer; + typedef typename __base::const_pointer const_pointer; + typedef _VSTD::reverse_iterator<iterator> reverse_iterator; + typedef _VSTD::reverse_iterator<const_iterator> const_reverse_iterator; using typename __base::__deque_range; using typename __base::__deque_block_range; @@ -1289,7 +1289,14 @@ public: explicit deque(size_type __n, const _Allocator& __a); #endif deque(size_type __n, const value_type& __v); - deque(size_type __n, const value_type& __v, const allocator_type& __a); + + template <class = __enable_if_t<__is_allocator<_Allocator>::value> > + deque(size_type __n, const value_type& __v, const allocator_type& __a) : __base(__a) + { + if (__n > 0) + __append(__n, __v); + } + template <class _InputIter> deque(_InputIter __f, _InputIter __l, typename enable_if<__is_cpp17_input_iterator<_InputIter>::value>::type* = 0); @@ -1609,14 +1616,6 @@ deque<_Tp, _Allocator>::deque(size_type __n, const value_type& __v) } template <class _Tp, class _Allocator> -deque<_Tp, _Allocator>::deque(size_type __n, const value_type& __v, const allocator_type& __a) - : __base(__a) -{ - if (__n > 0) - __append(__n, __v); -} - -template <class _Tp, class _Allocator> template <class _InputIter> deque<_Tp, _Allocator>::deque(_InputIter __f, _InputIter __l, typename enable_if<__is_cpp17_input_iterator<_InputIter>::value>::type*) diff --git a/libcxx/include/filesystem b/libcxx/include/filesystem index dcbdbbae6985..39e8ca2e814b 100644 --- a/libcxx/include/filesystem +++ b/libcxx/include/filesystem @@ -1033,7 +1033,7 @@ public: auto __p_root_name = __p.__root_name(); auto __p_root_name_size = __p_root_name.size(); if (__p.is_absolute() || - (!__p_root_name.empty() && __p_root_name != root_name())) { + (!__p_root_name.empty() && __p_root_name != __string_view(root_name().__pn_))) { __pn_ = __p.__pn_; return *this; } @@ -1492,22 +1492,22 @@ public: #endif // !_LIBCPP_HAS_NO_LOCALIZATION friend _LIBCPP_INLINE_VISIBILITY bool operator==(const path& __lhs, const path& __rhs) noexcept { - return __lhs.compare(__rhs) == 0; + return __lhs.__compare(__rhs.__pn_) == 0; } friend _LIBCPP_INLINE_VISIBILITY bool operator!=(const path& __lhs, const path& __rhs) noexcept { - return __lhs.compare(__rhs) != 0; + return __lhs.__compare(__rhs.__pn_) != 0; } friend _LIBCPP_INLINE_VISIBILITY bool operator<(const path& __lhs, const path& __rhs) noexcept { - return __lhs.compare(__rhs) < 0; + return __lhs.__compare(__rhs.__pn_) < 0; } friend _LIBCPP_INLINE_VISIBILITY bool operator<=(const path& __lhs, const path& __rhs) noexcept { - return __lhs.compare(__rhs) <= 0; + return __lhs.__compare(__rhs.__pn_) <= 0; } friend _LIBCPP_INLINE_VISIBILITY bool operator>(const path& __lhs, const path& __rhs) noexcept { - return __lhs.compare(__rhs) > 0; + return __lhs.__compare(__rhs.__pn_) > 0; } friend _LIBCPP_INLINE_VISIBILITY bool operator>=(const path& __lhs, const path& __rhs) noexcept { - return __lhs.compare(__rhs) >= 0; + return __lhs.__compare(__rhs.__pn_) >= 0; } friend _LIBCPP_INLINE_VISIBILITY path operator/(const path& __lhs, @@ -3024,13 +3024,17 @@ _LIBCPP_END_NAMESPACE_FILESYSTEM #if !defined(_LIBCPP_HAS_NO_RANGES) template <> +_LIBCPP_AVAILABILITY_FILESYSTEM inline constexpr bool _VSTD::ranges::enable_borrowed_range<_VSTD_FS::directory_iterator> = true; template <> +_LIBCPP_AVAILABILITY_FILESYSTEM inline constexpr bool _VSTD::ranges::enable_borrowed_range<_VSTD_FS::recursive_directory_iterator> = true; template <> +_LIBCPP_AVAILABILITY_FILESYSTEM inline constexpr bool _VSTD::ranges::enable_view<_VSTD_FS::directory_iterator> = true; template <> +_LIBCPP_AVAILABILITY_FILESYSTEM inline constexpr bool _VSTD::ranges::enable_view<_VSTD_FS::recursive_directory_iterator> = true; #endif diff --git a/libcxx/include/format b/libcxx/include/format index e1d47c9f84dd..788b9c299abc 100644 --- a/libcxx/include/format +++ b/libcxx/include/format @@ -51,9 +51,6 @@ namespace std { using wformat_args = basic_format_args<wformat_context>; - template<class Out, class charT> - using format_args_t = basic_format_args<basic_format_context<Out, charT>>; - // [format.functions], formatting functions template<class... Args> string format(string_view fmt, const Args&... args); @@ -79,17 +76,15 @@ namespace std { Out format_to(Out out, const locale& loc, wstring_view fmt, const Args&... args); template<class Out> - Out vformat_to(Out out, string_view fmt, - format_args_t<type_identity_t<Out>, char> args); + Out vformat_to(Out out, string_view fmt, format_args args); template<class Out> - Out vformat_to(Out out, wstring_view fmt, - format_args_t<type_identity_t<Out>, wchar_t> args); + Out vformat_to(Out out, wstring_view fmt, wformat_args args); template<class Out> Out vformat_to(Out out, const locale& loc, string_view fmt, - format_args_t<type_identity_t<Out>, char> args); + format_args char> args); template<class Out> Out vformat_to(Out out, const locale& loc, wstring_view fmt, - format_args_t<type_identity_t<Out>, wchar_t> args); + wformat_args args); template<class Out> struct format_to_n_result { Out out; @@ -325,9 +320,6 @@ using format_args = basic_format_args<format_context>; using wformat_args = basic_format_args<wformat_context>; #endif -template <class _OutIt, class _CharT> -using format_args_t = basic_format_args<basic_format_context<_OutIt, _CharT>>; - template <class _Context, class... _Args> struct _LIBCPP_TEMPLATE_VIS __format_arg_store { // TODO FMT Use a built-in array. @@ -436,51 +428,55 @@ __vformat_to(_ParseCtx&& __parse_ctx, _Ctx&& __ctx) { } // namespace __format -template <class _OutIt, class _CharT> +template <class _OutIt, class _CharT, class _FormatOutIt> requires(output_iterator<_OutIt, const _CharT&>) _LIBCPP_HIDE_FROM_ABI _OutIt - __vformat_to(_OutIt __out_it, basic_string_view<_CharT> __fmt, - format_args_t<type_identity_t<_OutIt>, _CharT> __args) { - return __format::__vformat_to( - basic_format_parse_context{__fmt, __args.__size()}, - _VSTD::__format_context_create(_VSTD::move(__out_it), __args)); + __vformat_to( + _OutIt __out_it, basic_string_view<_CharT> __fmt, + basic_format_args<basic_format_context<_FormatOutIt, _CharT>> __args) { + if constexpr (same_as<_OutIt, _FormatOutIt>) + return _VSTD::__format::__vformat_to( + basic_format_parse_context{__fmt, __args.__size()}, + _VSTD::__format_context_create(_VSTD::move(__out_it), __args)); + else { + basic_string<_CharT> __str; + _VSTD::__format::__vformat_to( + basic_format_parse_context{__fmt, __args.__size()}, + _VSTD::__format_context_create(_VSTD::back_inserter(__str), __args)); + return _VSTD::copy_n(__str.begin(), __str.size(), _VSTD::move(__out_it)); + } } template <output_iterator<const char&> _OutIt> -_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT _OutIt -vformat_to(_OutIt __out_it, string_view __fmt, - format_args_t<type_identity_t<_OutIt>, char> __args) { +_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT _OutIt +vformat_to(_OutIt __out_it, string_view __fmt, format_args __args) { return _VSTD::__vformat_to(_VSTD::move(__out_it), __fmt, __args); } #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS template <output_iterator<const wchar_t&> _OutIt> -_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT _OutIt -vformat_to(_OutIt __out_it, wstring_view __fmt, - format_args_t<type_identity_t<_OutIt>, wchar_t> __args) { +_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT _OutIt +vformat_to(_OutIt __out_it, wstring_view __fmt, wformat_args __args) { return _VSTD::__vformat_to(_VSTD::move(__out_it), __fmt, __args); } #endif template <output_iterator<const char&> _OutIt, class... _Args> -_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT _OutIt +_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT _OutIt format_to(_OutIt __out_it, string_view __fmt, const _Args&... __args) { - return _VSTD::vformat_to( - _VSTD::move(__out_it), __fmt, - _VSTD::make_format_args<basic_format_context<_OutIt, char>>(__args...)); + return _VSTD::vformat_to(_VSTD::move(__out_it), __fmt, + _VSTD::make_format_args(__args...)); } #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS template <output_iterator<const wchar_t&> _OutIt, class... _Args> -_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT _OutIt +_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT _OutIt format_to(_OutIt __out_it, wstring_view __fmt, const _Args&... __args) { - return _VSTD::vformat_to( - _VSTD::move(__out_it), __fmt, - _VSTD::make_format_args<basic_format_context<_OutIt, wchar_t>>( - __args...)); + return _VSTD::vformat_to(_VSTD::move(__out_it), __fmt, + _VSTD::make_wformat_args(__args...)); } #endif -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT string +_LIBCPP_ALWAYS_INLINE inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT string vformat(string_view __fmt, format_args __args) { string __res; _VSTD::vformat_to(_VSTD::back_inserter(__res), __fmt, __args); @@ -488,7 +484,7 @@ vformat(string_view __fmt, format_args __args) { } #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT wstring +_LIBCPP_ALWAYS_INLINE inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT wstring vformat(wstring_view __fmt, wformat_args __args) { wstring __res; _VSTD::vformat_to(_VSTD::back_inserter(__res), __fmt, __args); @@ -497,14 +493,14 @@ vformat(wstring_view __fmt, wformat_args __args) { #endif template <class... _Args> -_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT string +_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT string format(string_view __fmt, const _Args&... __args) { return _VSTD::vformat(__fmt, _VSTD::make_format_args(__args...)); } #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS template <class... _Args> -_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT wstring +_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT wstring format(wstring_view __fmt, const _Args&... __args) { return _VSTD::vformat(__fmt, _VSTD::make_wformat_args(__args...)); } @@ -556,54 +552,59 @@ formatted_size(wstring_view __fmt, const _Args&... __args) { #ifndef _LIBCPP_HAS_NO_LOCALIZATION -template <class _OutIt, class _CharT> +template <class _OutIt, class _CharT, class _FormatOutIt> requires(output_iterator<_OutIt, const _CharT&>) _LIBCPP_HIDE_FROM_ABI _OutIt - __vformat_to(_OutIt __out_it, locale __loc, basic_string_view<_CharT> __fmt, - format_args_t<type_identity_t<_OutIt>, _CharT> __args) { - return __format::__vformat_to( - basic_format_parse_context{__fmt, __args.__size()}, - _VSTD::__format_context_create(_VSTD::move(__out_it), __args, - _VSTD::move(__loc))); + __vformat_to( + _OutIt __out_it, locale __loc, basic_string_view<_CharT> __fmt, + basic_format_args<basic_format_context<_FormatOutIt, _CharT>> __args) { + if constexpr (same_as<_OutIt, _FormatOutIt>) + return _VSTD::__format::__vformat_to( + basic_format_parse_context{__fmt, __args.__size()}, + _VSTD::__format_context_create(_VSTD::move(__out_it), __args, + _VSTD::move(__loc))); + else { + basic_string<_CharT> __str; + _VSTD::__format::__vformat_to( + basic_format_parse_context{__fmt, __args.__size()}, + _VSTD::__format_context_create(_VSTD::back_inserter(__str), __args, + _VSTD::move(__loc))); + return _VSTD::copy_n(__str.begin(), __str.size(), _VSTD::move(__out_it)); + } } template <output_iterator<const char&> _OutIt> -_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT _OutIt -vformat_to(_OutIt __out_it, locale __loc, string_view __fmt, - format_args_t<type_identity_t<_OutIt>, char> __args) { +_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT _OutIt vformat_to( + _OutIt __out_it, locale __loc, string_view __fmt, format_args __args) { return _VSTD::__vformat_to(_VSTD::move(__out_it), _VSTD::move(__loc), __fmt, __args); } #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS template <output_iterator<const wchar_t&> _OutIt> -_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT _OutIt -vformat_to(_OutIt __out_it, locale __loc, wstring_view __fmt, - format_args_t<type_identity_t<_OutIt>, wchar_t> __args) { +_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT _OutIt vformat_to( + _OutIt __out_it, locale __loc, wstring_view __fmt, wformat_args __args) { return _VSTD::__vformat_to(_VSTD::move(__out_it), _VSTD::move(__loc), __fmt, __args); } #endif template <output_iterator<const char&> _OutIt, class... _Args> -_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT _OutIt format_to( +_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT _OutIt format_to( _OutIt __out_it, locale __loc, string_view __fmt, const _Args&... __args) { - return _VSTD::vformat_to( - _VSTD::move(__out_it), _VSTD::move(__loc), __fmt, - _VSTD::make_format_args<basic_format_context<_OutIt, char>>(__args...)); + return _VSTD::vformat_to(_VSTD::move(__out_it), _VSTD::move(__loc), __fmt, + _VSTD::make_format_args(__args...)); } #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS template <output_iterator<const wchar_t&> _OutIt, class... _Args> -_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT _OutIt format_to( +_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT _OutIt format_to( _OutIt __out_it, locale __loc, wstring_view __fmt, const _Args&... __args) { - return _VSTD::vformat_to( - _VSTD::move(__out_it), _VSTD::move(__loc), __fmt, - _VSTD::make_format_args<basic_format_context<_OutIt, wchar_t>>( - __args...)); + return _VSTD::vformat_to(_VSTD::move(__out_it), _VSTD::move(__loc), __fmt, + _VSTD::make_wformat_args(__args...)); } #endif -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT string +_LIBCPP_ALWAYS_INLINE inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT string vformat(locale __loc, string_view __fmt, format_args __args) { string __res; _VSTD::vformat_to(_VSTD::back_inserter(__res), _VSTD::move(__loc), __fmt, @@ -612,7 +613,7 @@ vformat(locale __loc, string_view __fmt, format_args __args) { } #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT wstring +_LIBCPP_ALWAYS_INLINE inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT wstring vformat(locale __loc, wstring_view __fmt, wformat_args __args) { wstring __res; _VSTD::vformat_to(_VSTD::back_inserter(__res), _VSTD::move(__loc), __fmt, @@ -622,7 +623,7 @@ vformat(locale __loc, wstring_view __fmt, wformat_args __args) { #endif template <class... _Args> -_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT string +_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT string format(locale __loc, string_view __fmt, const _Args&... __args) { return _VSTD::vformat(_VSTD::move(__loc), __fmt, _VSTD::make_format_args(__args...)); @@ -630,7 +631,7 @@ format(locale __loc, string_view __fmt, const _Args&... __args) { #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS template <class... _Args> -_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT wstring +_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT wstring format(locale __loc, wstring_view __fmt, const _Args&... __args) { return _VSTD::vformat(_VSTD::move(__loc), __fmt, _VSTD::make_wformat_args(__args...)); diff --git a/libcxx/include/forward_list b/libcxx/include/forward_list index 9d19e741f061..34168e88746e 100644 --- a/libcxx/include/forward_list +++ b/libcxx/include/forward_list @@ -186,6 +186,7 @@ template <class T, class Allocator, class Predicate> #include <iterator> #include <limits> #include <memory> +#include <type_traits> #include <version> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -643,12 +644,12 @@ public: static_assert((is_same<typename allocator_type::value_type, value_type>::value), "Allocator::value_type must be same type as value_type"); - typedef value_type& reference; - typedef const value_type& const_reference; - typedef typename allocator_traits<allocator_type>::pointer pointer; - typedef typename allocator_traits<allocator_type>::const_pointer const_pointer; - typedef typename __allocator_traits<allocator_type>::size_type size_type; - typedef typename allocator_traits<allocator_type>::difference_type difference_type; + typedef value_type& reference; + typedef const value_type& const_reference; + typedef typename allocator_traits<allocator_type>::pointer pointer; + typedef typename allocator_traits<allocator_type>::const_pointer const_pointer; + typedef typename allocator_traits<allocator_type>::size_type size_type; + typedef typename allocator_traits<allocator_type>::difference_type difference_type; typedef typename base::iterator iterator; typedef typename base::const_iterator const_iterator; @@ -669,7 +670,13 @@ public: explicit forward_list(size_type __n, const allocator_type& __a); #endif forward_list(size_type __n, const value_type& __v); - forward_list(size_type __n, const value_type& __v, const allocator_type& __a); + + template <class = __enable_if_t<__is_allocator<_Alloc>::value> > + forward_list(size_type __n, const value_type& __v, const allocator_type& __a) : base(__a) + { + insert_after(cbefore_begin(), __n, __v); + } + template <class _InputIterator> forward_list(_InputIterator __f, _InputIterator __l, typename enable_if< @@ -944,14 +951,6 @@ forward_list<_Tp, _Alloc>::forward_list(size_type __n, const value_type& __v) } template <class _Tp, class _Alloc> -forward_list<_Tp, _Alloc>::forward_list(size_type __n, const value_type& __v, - const allocator_type& __a) - : base(__a) -{ - insert_after(cbefore_begin(), __n, __v); -} - -template <class _Tp, class _Alloc> template <class _InputIterator> forward_list<_Tp, _Alloc>::forward_list(_InputIterator __f, _InputIterator __l, typename enable_if< diff --git a/libcxx/include/list b/libcxx/include/list index 6282983ad20a..c9c050a4f1f0 100644 --- a/libcxx/include/list +++ b/libcxx/include/list @@ -845,24 +845,24 @@ class _LIBCPP_TEMPLATE_VIS list typedef typename base::__link_pointer __link_pointer; public: - typedef _Tp value_type; - typedef _Alloc allocator_type; + typedef _Tp value_type; + typedef _Alloc allocator_type; static_assert((is_same<value_type, typename allocator_type::value_type>::value), "Invalid allocator::value_type"); - typedef value_type& reference; - typedef const value_type& const_reference; - typedef typename base::pointer pointer; - typedef typename base::const_pointer const_pointer; - typedef typename __allocator_traits<allocator_type>::size_type size_type; - typedef typename base::difference_type difference_type; - typedef typename base::iterator iterator; - typedef typename base::const_iterator const_iterator; - typedef _VSTD::reverse_iterator<iterator> reverse_iterator; - typedef _VSTD::reverse_iterator<const_iterator> const_reverse_iterator; + typedef value_type& reference; + typedef const value_type& const_reference; + typedef typename base::pointer pointer; + typedef typename base::const_pointer const_pointer; + typedef typename base::size_type size_type; + typedef typename base::difference_type difference_type; + typedef typename base::iterator iterator; + typedef typename base::const_iterator const_iterator; + typedef _VSTD::reverse_iterator<iterator> reverse_iterator; + typedef _VSTD::reverse_iterator<const_iterator> const_reverse_iterator; #if _LIBCPP_STD_VER > 17 - typedef size_type __remove_return_type; + typedef size_type __remove_return_type; #else - typedef void __remove_return_type; + typedef void __remove_return_type; #endif _LIBCPP_INLINE_VISIBILITY @@ -885,7 +885,16 @@ public: explicit list(size_type __n, const allocator_type& __a); #endif list(size_type __n, const value_type& __x); - list(size_type __n, const value_type& __x, const allocator_type& __a); + template <class = __enable_if_t<__is_allocator<_Alloc>::value> > + list(size_type __n, const value_type& __x, const allocator_type& __a) : base(__a) + { +#if _LIBCPP_DEBUG_LEVEL == 2 + __get_db()->__insert_c(this); +#endif + for (; __n > 0; --__n) + push_back(__x); + } + template <class _InpIter> list(_InpIter __f, _InpIter __l, typename enable_if<__is_cpp17_input_iterator<_InpIter>::value>::type* = 0); @@ -1242,17 +1251,6 @@ list<_Tp, _Alloc>::list(size_type __n, const value_type& __x) } template <class _Tp, class _Alloc> -list<_Tp, _Alloc>::list(size_type __n, const value_type& __x, const allocator_type& __a) - : base(__a) -{ -#if _LIBCPP_DEBUG_LEVEL == 2 - __get_db()->__insert_c(this); -#endif - for (; __n > 0; --__n) - push_back(__x); -} - -template <class _Tp, class _Alloc> template <class _InpIter> list<_Tp, _Alloc>::list(_InpIter __f, _InpIter __l, typename enable_if<__is_cpp17_input_iterator<_InpIter>::value>::type*) diff --git a/libcxx/include/module.modulemap b/libcxx/include/module.modulemap index f34442ed5c9a..a4a264bd9147 100644 --- a/libcxx/include/module.modulemap +++ b/libcxx/include/module.modulemap @@ -338,6 +338,7 @@ module std [system] { module __bit { module bit_cast { private header "__bit/bit_cast.h" } + module byteswap { private header "__bit/byteswap.h" } } } module bitset { @@ -376,8 +377,11 @@ module std [system] { module compare_three_way_result { private header "__compare/compare_three_way_result.h" } module is_eq { private header "__compare/is_eq.h" } module ordering { private header "__compare/ordering.h" } + module partial_order { private header "__compare/partial_order.h" } + module strong_order { private header "__compare/strong_order.h" } module synth_three_way { private header "__compare/synth_three_way.h" } module three_way_comparable { private header "__compare/three_way_comparable.h" } + module weak_order { private header "__compare/weak_order.h" } } } module complex { @@ -658,6 +662,22 @@ module std [system] { module numeric { header "numeric" export * + + module __numeric { + module accumulate { private header "__numeric/accumulate.h" } + module adjacent_difference { private header "__numeric/adjacent_difference.h" } + module exclusive_scan { private header "__numeric/exclusive_scan.h" } + module gcd_lcm { private header "__numeric/gcd_lcm.h" } + module inclusive_scan { private header "__numeric/inclusive_scan.h" } + module inner_product { private header "__numeric/inner_product.h" } + module iota { private header "__numeric/iota.h" } + module midpoint { private header "__numeric/midpoint.h" } + module partial_sum { private header "__numeric/partial_sum.h" } + module reduce { private header "__numeric/reduce.h" } + module transform_exclusive_scan { private header "__numeric/transform_exclusive_scan.h" } + module transform_inclusive_scan { private header "__numeric/transform_inclusive_scan.h" } + module transform_reduce { private header "__numeric/transform_reduce.h" } + } } module optional { header "optional" @@ -679,7 +699,41 @@ module std [system] { export * module __random { - module uniform_int_distribution { private header "__random/uniform_int_distribution.h" } + module bernoulli_distribution { private header "__random/bernoulli_distribution.h" } + module binomial_distribution { private header "__random/binomial_distribution.h" } + module cauchy_distribution { private header "__random/cauchy_distribution.h" } + module chi_squared_distribution { private header "__random/chi_squared_distribution.h" } + module default_random_engine { private header "__random/default_random_engine.h" } + module discard_block_engine { private header "__random/discard_block_engine.h" } + module discrete_distribution { private header "__random/discrete_distribution.h" } + module exponential_distribution { private header "__random/exponential_distribution.h" } + module extreme_value_distribution { private header "__random/extreme_value_distribution.h" } + module fisher_f_distribution { private header "__random/fisher_f_distribution.h" } + module gamma_distribution { private header "__random/gamma_distribution.h" } + module generate_canonical { private header "__random/generate_canonical.h" } + module geometric_distribution { private header "__random/geometric_distribution.h" } + module independent_bits_engine { private header "__random/independent_bits_engine.h" } + module is_seed_sequence { private header "__random/is_seed_sequence.h" } + module knuth_b { private header "__random/knuth_b.h" } + module linear_congruential_engine { private header "__random/linear_congruential_engine.h" } + module log2 { private header "__random/log2.h" } + module lognormal_distribution { private header "__random/lognormal_distribution.h" } + module mersenne_twister_engine { private header "__random/mersenne_twister_engine.h" } + module negative_binomial_distribution { private header "__random/negative_binomial_distribution.h" } + module normal_distribution { private header "__random/normal_distribution.h" } + module piecewise_constant_distribution { private header "__random/piecewise_constant_distribution.h" } + module piecewise_linear_distribution { private header "__random/piecewise_linear_distribution.h" } + module poisson_distribution { private header "__random/poisson_distribution.h" } + module random_device { private header "__random/random_device.h" } + module ranlux { private header "__random/ranlux.h" } + module seed_seq { private header "__random/seed_seq.h" } + module shuffle_order_engine { private header "__random/shuffle_order_engine.h" } + module student_t_distribution { private header "__random/student_t_distribution.h" } + module subtract_with_carry_engine { private header "__random/subtract_with_carry_engine.h" } + module uniform_int_distribution { private header "__random/uniform_int_distribution.h" } + module uniform_random_bit_generator { private header "__random/uniform_random_bit_generator.h" } + module uniform_real_distribution { private header "__random/uniform_real_distribution.h" } + module weibull_distribution { private header "__random/weibull_distribution.h" } } } module ranges { @@ -848,6 +902,7 @@ module std [system] { module move { private header "__utility/move.h" } module pair { private header "__utility/pair.h" } module piecewise_construct { private header "__utility/piecewise_construct.h" } + module priority_tag { private header "__utility/priority_tag.h" } module rel_ops { private header "__utility/rel_ops.h" } module swap { private header "__utility/swap.h" } module to_underlying { private header "__utility/to_underlying.h" } diff --git a/libcxx/include/numeric b/libcxx/include/numeric index fc44efff761d..09d15a6024de 100644 --- a/libcxx/include/numeric +++ b/libcxx/include/numeric @@ -145,490 +145,29 @@ template<class T> */ #include <__config> -#include <__debug> #include <cmath> // for isnormal #include <functional> #include <iterator> -#include <limits> // for numeric_limits #include <version> +#include <__numeric/accumulate.h> +#include <__numeric/adjacent_difference.h> +#include <__numeric/exclusive_scan.h> +#include <__numeric/gcd_lcm.h> +#include <__numeric/inclusive_scan.h> +#include <__numeric/inner_product.h> +#include <__numeric/iota.h> +#include <__numeric/midpoint.h> +#include <__numeric/partial_sum.h> +#include <__numeric/reduce.h> +#include <__numeric/transform_exclusive_scan.h> +#include <__numeric/transform_inclusive_scan.h> +#include <__numeric/transform_reduce.h> + #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header #endif -_LIBCPP_PUSH_MACROS -#include <__undef_macros> - -_LIBCPP_BEGIN_NAMESPACE_STD - -template <class _InputIterator, class _Tp> -_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 -_Tp -accumulate(_InputIterator __first, _InputIterator __last, _Tp __init) -{ - for (; __first != __last; ++__first) -#if _LIBCPP_STD_VER > 17 - __init = _VSTD::move(__init) + *__first; -#else - __init = __init + *__first; -#endif - return __init; -} - -template <class _InputIterator, class _Tp, class _BinaryOperation> -_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 -_Tp -accumulate(_InputIterator __first, _InputIterator __last, _Tp __init, _BinaryOperation __binary_op) -{ - for (; __first != __last; ++__first) -#if _LIBCPP_STD_VER > 17 - __init = __binary_op(_VSTD::move(__init), *__first); -#else - __init = __binary_op(__init, *__first); -#endif - return __init; -} - -#if _LIBCPP_STD_VER > 14 -template <class _InputIterator, class _Tp, class _BinaryOp> -_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 -_Tp -reduce(_InputIterator __first, _InputIterator __last, _Tp __init, _BinaryOp __b) -{ - for (; __first != __last; ++__first) - __init = __b(__init, *__first); - return __init; -} - -template <class _InputIterator, class _Tp> -_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 -_Tp -reduce(_InputIterator __first, _InputIterator __last, _Tp __init) -{ - return _VSTD::reduce(__first, __last, __init, _VSTD::plus<>()); -} - -template <class _InputIterator> -_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 -typename iterator_traits<_InputIterator>::value_type -reduce(_InputIterator __first, _InputIterator __last) -{ - return _VSTD::reduce(__first, __last, - typename iterator_traits<_InputIterator>::value_type{}); -} -#endif - -template <class _InputIterator1, class _InputIterator2, class _Tp> -_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 -_Tp -inner_product(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, _Tp __init) -{ - for (; __first1 != __last1; ++__first1, (void) ++__first2) -#if _LIBCPP_STD_VER > 17 - __init = _VSTD::move(__init) + *__first1 * *__first2; -#else - __init = __init + *__first1 * *__first2; -#endif - return __init; -} - -template <class _InputIterator1, class _InputIterator2, class _Tp, class _BinaryOperation1, class _BinaryOperation2> -_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 -_Tp -inner_product(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, - _Tp __init, _BinaryOperation1 __binary_op1, _BinaryOperation2 __binary_op2) -{ - for (; __first1 != __last1; ++__first1, (void) ++__first2) -#if _LIBCPP_STD_VER > 17 - __init = __binary_op1(_VSTD::move(__init), __binary_op2(*__first1, *__first2)); -#else - __init = __binary_op1(__init, __binary_op2(*__first1, *__first2)); -#endif - return __init; -} - -#if _LIBCPP_STD_VER > 14 -template <class _InputIterator, class _Tp, class _BinaryOp, class _UnaryOp> -_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 -_Tp -transform_reduce(_InputIterator __first, _InputIterator __last, - _Tp __init, _BinaryOp __b, _UnaryOp __u) -{ - for (; __first != __last; ++__first) - __init = __b(__init, __u(*__first)); - return __init; -} - -template <class _InputIterator1, class _InputIterator2, - class _Tp, class _BinaryOp1, class _BinaryOp2> -_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 -_Tp -transform_reduce(_InputIterator1 __first1, _InputIterator1 __last1, - _InputIterator2 __first2, _Tp __init, _BinaryOp1 __b1, _BinaryOp2 __b2) -{ - for (; __first1 != __last1; ++__first1, (void) ++__first2) - __init = __b1(__init, __b2(*__first1, *__first2)); - return __init; -} - -template <class _InputIterator1, class _InputIterator2, class _Tp> -_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 -_Tp -transform_reduce(_InputIterator1 __first1, _InputIterator1 __last1, - _InputIterator2 __first2, _Tp __init) -{ - return _VSTD::transform_reduce(__first1, __last1, __first2, _VSTD::move(__init), - _VSTD::plus<>(), _VSTD::multiplies<>()); -} -#endif - -template <class _InputIterator, class _OutputIterator> -_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 -_OutputIterator -partial_sum(_InputIterator __first, _InputIterator __last, _OutputIterator __result) -{ - if (__first != __last) - { - typename iterator_traits<_InputIterator>::value_type __t(*__first); - *__result = __t; - for (++__first, (void) ++__result; __first != __last; ++__first, (void) ++__result) - { -#if _LIBCPP_STD_VER > 17 - __t = _VSTD::move(__t) + *__first; -#else - __t = __t + *__first; -#endif - *__result = __t; - } - } - return __result; -} - -template <class _InputIterator, class _OutputIterator, class _BinaryOperation> -_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 -_OutputIterator -partial_sum(_InputIterator __first, _InputIterator __last, _OutputIterator __result, - _BinaryOperation __binary_op) -{ - if (__first != __last) - { - typename iterator_traits<_InputIterator>::value_type __t(*__first); - *__result = __t; - for (++__first, (void) ++__result; __first != __last; ++__first, (void) ++__result) - { -#if _LIBCPP_STD_VER > 17 - __t = __binary_op(_VSTD::move(__t), *__first); -#else - __t = __binary_op(__t, *__first); -#endif - *__result = __t; - } - } - return __result; -} - -#if _LIBCPP_STD_VER > 14 -template <class _InputIterator, class _OutputIterator, class _Tp, class _BinaryOp> -_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 -_OutputIterator -exclusive_scan(_InputIterator __first, _InputIterator __last, - _OutputIterator __result, _Tp __init, _BinaryOp __b) -{ - if (__first != __last) - { - _Tp __tmp(__b(__init, *__first)); - while (true) - { - *__result = _VSTD::move(__init); - ++__result; - ++__first; - if (__first == __last) - break; - __init = _VSTD::move(__tmp); - __tmp = __b(__init, *__first); - } - } - return __result; -} - -template <class _InputIterator, class _OutputIterator, class _Tp> -_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 -_OutputIterator -exclusive_scan(_InputIterator __first, _InputIterator __last, - _OutputIterator __result, _Tp __init) -{ - return _VSTD::exclusive_scan(__first, __last, __result, __init, _VSTD::plus<>()); -} - -template <class _InputIterator, class _OutputIterator, class _Tp, class _BinaryOp> -_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 -_OutputIterator inclusive_scan(_InputIterator __first, _InputIterator __last, - _OutputIterator __result, _BinaryOp __b, _Tp __init) -{ - for (; __first != __last; ++__first, (void) ++__result) { - __init = __b(__init, *__first); - *__result = __init; - } - return __result; -} - -template <class _InputIterator, class _OutputIterator, class _BinaryOp> -_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 -_OutputIterator inclusive_scan(_InputIterator __first, _InputIterator __last, - _OutputIterator __result, _BinaryOp __b) -{ - if (__first != __last) { - typename iterator_traits<_InputIterator>::value_type __init = *__first; - *__result++ = __init; - if (++__first != __last) - return _VSTD::inclusive_scan(__first, __last, __result, __b, __init); - } - - return __result; -} - -template <class _InputIterator, class _OutputIterator> -_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 -_OutputIterator inclusive_scan(_InputIterator __first, _InputIterator __last, - _OutputIterator __result) -{ - return _VSTD::inclusive_scan(__first, __last, __result, _VSTD::plus<>()); -} - -template <class _InputIterator, class _OutputIterator, class _Tp, - class _BinaryOp, class _UnaryOp> -_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 -_OutputIterator -transform_exclusive_scan(_InputIterator __first, _InputIterator __last, - _OutputIterator __result, _Tp __init, - _BinaryOp __b, _UnaryOp __u) -{ - if (__first != __last) - { - _Tp __saved = __init; - do - { - __init = __b(__init, __u(*__first)); - *__result = __saved; - __saved = __init; - ++__result; - } while (++__first != __last); - } - return __result; -} - -template <class _InputIterator, class _OutputIterator, class _Tp, class _BinaryOp, class _UnaryOp> -_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 -_OutputIterator -transform_inclusive_scan(_InputIterator __first, _InputIterator __last, - _OutputIterator __result, _BinaryOp __b, _UnaryOp __u, _Tp __init) -{ - for (; __first != __last; ++__first, (void) ++__result) { - __init = __b(__init, __u(*__first)); - *__result = __init; - } - - return __result; -} - -template <class _InputIterator, class _OutputIterator, class _BinaryOp, class _UnaryOp> -_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 -_OutputIterator -transform_inclusive_scan(_InputIterator __first, _InputIterator __last, - _OutputIterator __result, _BinaryOp __b, _UnaryOp __u) -{ - if (__first != __last) { - typename iterator_traits<_InputIterator>::value_type __init = __u(*__first); - *__result++ = __init; - if (++__first != __last) - return _VSTD::transform_inclusive_scan(__first, __last, __result, __b, __u, __init); - } - - return __result; -} -#endif - -template <class _InputIterator, class _OutputIterator> -_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 -_OutputIterator -adjacent_difference(_InputIterator __first, _InputIterator __last, _OutputIterator __result) -{ - if (__first != __last) - { - typename iterator_traits<_InputIterator>::value_type __acc(*__first); - *__result = __acc; - for (++__first, (void) ++__result; __first != __last; ++__first, (void) ++__result) - { - typename iterator_traits<_InputIterator>::value_type __val(*__first); -#if _LIBCPP_STD_VER > 17 - *__result = __val - _VSTD::move(__acc); -#else - *__result = __val - __acc; -#endif - __acc = _VSTD::move(__val); - } - } - return __result; -} - -template <class _InputIterator, class _OutputIterator, class _BinaryOperation> -_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 -_OutputIterator -adjacent_difference(_InputIterator __first, _InputIterator __last, _OutputIterator __result, - _BinaryOperation __binary_op) -{ - if (__first != __last) - { - typename iterator_traits<_InputIterator>::value_type __acc(*__first); - *__result = __acc; - for (++__first, (void) ++__result; __first != __last; ++__first, (void) ++__result) - { - typename iterator_traits<_InputIterator>::value_type __val(*__first); -#if _LIBCPP_STD_VER > 17 - *__result = __binary_op(__val, _VSTD::move(__acc)); -#else - *__result = __binary_op(__val, __acc); -#endif - __acc = _VSTD::move(__val); - } - } - return __result; -} - -template <class _ForwardIterator, class _Tp> -_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 -void -iota(_ForwardIterator __first, _ForwardIterator __last, _Tp __value_) -{ - for (; __first != __last; ++__first, (void) ++__value_) - *__first = __value_; -} - - -#if _LIBCPP_STD_VER > 14 -template <typename _Result, typename _Source, bool _IsSigned = is_signed<_Source>::value> struct __ct_abs; - -template <typename _Result, typename _Source> -struct __ct_abs<_Result, _Source, true> { - _LIBCPP_CONSTEXPR _LIBCPP_INLINE_VISIBILITY - _Result operator()(_Source __t) const noexcept - { - if (__t >= 0) return __t; - if (__t == numeric_limits<_Source>::min()) return -static_cast<_Result>(__t); - return -__t; - } -}; - -template <typename _Result, typename _Source> -struct __ct_abs<_Result, _Source, false> { - _LIBCPP_CONSTEXPR _LIBCPP_INLINE_VISIBILITY - _Result operator()(_Source __t) const noexcept { return __t; } -}; - - -template<class _Tp> -_LIBCPP_CONSTEXPR _LIBCPP_HIDDEN -_Tp __gcd(_Tp __m, _Tp __n) -{ - static_assert((!is_signed<_Tp>::value), ""); - return __n == 0 ? __m : _VSTD::__gcd<_Tp>(__n, __m % __n); -} - - -template<class _Tp, class _Up> -_LIBCPP_CONSTEXPR _LIBCPP_INLINE_VISIBILITY -common_type_t<_Tp,_Up> -gcd(_Tp __m, _Up __n) -{ - static_assert((is_integral<_Tp>::value && is_integral<_Up>::value), "Arguments to gcd must be integer types"); - static_assert((!is_same<typename remove_cv<_Tp>::type, bool>::value), "First argument to gcd cannot be bool" ); - static_assert((!is_same<typename remove_cv<_Up>::type, bool>::value), "Second argument to gcd cannot be bool" ); - using _Rp = common_type_t<_Tp,_Up>; - using _Wp = make_unsigned_t<_Rp>; - return static_cast<_Rp>(_VSTD::__gcd( - static_cast<_Wp>(__ct_abs<_Rp, _Tp>()(__m)), - static_cast<_Wp>(__ct_abs<_Rp, _Up>()(__n)))); -} - -template<class _Tp, class _Up> -_LIBCPP_CONSTEXPR _LIBCPP_INLINE_VISIBILITY -common_type_t<_Tp,_Up> -lcm(_Tp __m, _Up __n) -{ - static_assert((is_integral<_Tp>::value && is_integral<_Up>::value), "Arguments to lcm must be integer types"); - static_assert((!is_same<typename remove_cv<_Tp>::type, bool>::value), "First argument to lcm cannot be bool" ); - static_assert((!is_same<typename remove_cv<_Up>::type, bool>::value), "Second argument to lcm cannot be bool" ); - if (__m == 0 || __n == 0) - return 0; - - using _Rp = common_type_t<_Tp,_Up>; - _Rp __val1 = __ct_abs<_Rp, _Tp>()(__m) / _VSTD::gcd(__m, __n); - _Rp __val2 = __ct_abs<_Rp, _Up>()(__n); - _LIBCPP_ASSERT((numeric_limits<_Rp>::max() / __val1 > __val2), "Overflow in lcm"); - return __val1 * __val2; -} - -#endif /* _LIBCPP_STD_VER > 14 */ - -#if _LIBCPP_STD_VER > 17 -template <class _Tp> -_LIBCPP_INLINE_VISIBILITY constexpr -enable_if_t<is_integral_v<_Tp> && !is_same_v<bool, _Tp> && !is_null_pointer_v<_Tp>, _Tp> -midpoint(_Tp __a, _Tp __b) noexcept -_LIBCPP_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK -{ - using _Up = make_unsigned_t<_Tp>; - constexpr _Up __bitshift = numeric_limits<_Up>::digits - 1; - - _Up __diff = _Up(__b) - _Up(__a); - _Up __sign_bit = __b < __a; - - _Up __half_diff = (__diff / 2) + (__sign_bit << __bitshift) + (__sign_bit & __diff); - - return __a + __half_diff; -} - - -template <class _TPtr> -_LIBCPP_INLINE_VISIBILITY constexpr -enable_if_t<is_pointer_v<_TPtr> - && is_object_v<remove_pointer_t<_TPtr>> - && ! is_void_v<remove_pointer_t<_TPtr>> - && (sizeof(remove_pointer_t<_TPtr>) > 0), _TPtr> -midpoint(_TPtr __a, _TPtr __b) noexcept -{ - return __a + _VSTD::midpoint(ptrdiff_t(0), __b - __a); -} - - -template <typename _Tp> -constexpr int __sign(_Tp __val) { - return (_Tp(0) < __val) - (__val < _Tp(0)); -} - -template <typename _Fp> -constexpr _Fp __fp_abs(_Fp __f) { return __f >= 0 ? __f : -__f; } - -template <class _Fp> -_LIBCPP_INLINE_VISIBILITY constexpr -enable_if_t<is_floating_point_v<_Fp>, _Fp> -midpoint(_Fp __a, _Fp __b) noexcept -{ - constexpr _Fp __lo = numeric_limits<_Fp>::min()*2; - constexpr _Fp __hi = numeric_limits<_Fp>::max()/2; - return __fp_abs(__a) <= __hi && __fp_abs(__b) <= __hi ? // typical case: overflow is impossible - (__a + __b)/2 : // always correctly rounded - __fp_abs(__a) < __lo ? __a + __b/2 : // not safe to halve a - __fp_abs(__b) < __lo ? __a/2 + __b : // not safe to halve b - __a/2 + __b/2; // otherwise correctly rounded -} - -#endif // _LIBCPP_STD_VER > 17 - -_LIBCPP_END_NAMESPACE_STD - -_LIBCPP_POP_MACROS - #if defined(_LIBCPP_HAS_PARALLEL_ALGORITHMS) && _LIBCPP_STD_VER >= 17 # include <__pstl_numeric> #endif diff --git a/libcxx/include/random b/libcxx/include/random index 72d9855765f8..9eb70bac00b9 100644 --- a/libcxx/include/random +++ b/libcxx/include/random @@ -1678,5330 +1678,56 @@ class piecewise_linear_distribution */ #include <__config> +#include <__random/bernoulli_distribution.h> +#include <__random/binomial_distribution.h> +#include <__random/cauchy_distribution.h> +#include <__random/chi_squared_distribution.h> +#include <__random/default_random_engine.h> +#include <__random/discard_block_engine.h> +#include <__random/discrete_distribution.h> +#include <__random/exponential_distribution.h> +#include <__random/extreme_value_distribution.h> +#include <__random/fisher_f_distribution.h> +#include <__random/gamma_distribution.h> +#include <__random/generate_canonical.h> +#include <__random/geometric_distribution.h> +#include <__random/independent_bits_engine.h> +#include <__random/is_seed_sequence.h> +#include <__random/knuth_b.h> +#include <__random/linear_congruential_engine.h> +#include <__random/log2.h> +#include <__random/lognormal_distribution.h> +#include <__random/mersenne_twister_engine.h> +#include <__random/negative_binomial_distribution.h> +#include <__random/normal_distribution.h> +#include <__random/piecewise_constant_distribution.h> +#include <__random/piecewise_linear_distribution.h> +#include <__random/poisson_distribution.h> +#include <__random/random_device.h> +#include <__random/ranlux.h> +#include <__random/seed_seq.h> +#include <__random/shuffle_order_engine.h> +#include <__random/student_t_distribution.h> +#include <__random/subtract_with_carry_engine.h> #include <__random/uniform_int_distribution.h> -#include <algorithm> -#include <cmath> -#include <concepts> -#include <cstddef> -#include <cstdint> +#include <__random/uniform_random_bit_generator.h> +#include <__random/uniform_real_distribution.h> +#include <__random/weibull_distribution.h> #include <initializer_list> -#include <iosfwd> -#include <limits> -#include <numeric> -#include <string> -#include <type_traits> -#include <vector> + +#include <algorithm> // for backward compatibility; TODO remove it +#include <cmath> // for backward compatibility; TODO remove it +#include <cstddef> // for backward compatibility; TODO remove it +#include <cstdint> // for backward compatibility; TODO remove it +#include <iosfwd> // for backward compatibility; TODO remove it +#include <limits> // for backward compatibility; TODO remove it +#include <numeric> // for backward compatibility; TODO remove it +#include <string> // for backward compatibility; TODO remove it +#include <type_traits> // for backward compatibility; TODO remove it +#include <vector> // for backward compatibility; TODO remove it #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header #endif -_LIBCPP_PUSH_MACROS -#include <__undef_macros> - - -_LIBCPP_BEGIN_NAMESPACE_STD - -#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS) - -// [rand.req.urng] -template<class _Gen> -concept uniform_random_bit_generator = - invocable<_Gen&> && unsigned_integral<invoke_result_t<_Gen&>> && - requires { - { _Gen::min() } -> same_as<invoke_result_t<_Gen&>>; - { _Gen::max() } -> same_as<invoke_result_t<_Gen&>>; - requires bool_constant<(_Gen::min() < _Gen::max())>::value; - }; - -#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS) - -// __is_seed_sequence - -template <class _Sseq, class _Engine> -struct __is_seed_sequence -{ - static _LIBCPP_CONSTEXPR const bool value = - !is_convertible<_Sseq, typename _Engine::result_type>::value && - !is_same<typename remove_cv<_Sseq>::type, _Engine>::value; -}; - -// linear_congruential_engine - -template <unsigned long long __a, unsigned long long __c, - unsigned long long __m, unsigned long long _Mp, - bool _MightOverflow = (__a != 0 && __m != 0 && __m-1 > (_Mp-__c)/__a), - bool _OverflowOK = ((__m | (__m-1)) > __m), // m = 2^n - bool _SchrageOK = (__a != 0 && __m != 0 && __m % __a <= __m / __a)> // r <= q -struct __lce_alg_picker -{ - static_assert(__a != 0 || __m != 0 || !_MightOverflow || _OverflowOK || _SchrageOK, - "The current values of a, c, and m cannot generate a number " - "within bounds of linear_congruential_engine."); - - static _LIBCPP_CONSTEXPR const bool __use_schrage = _MightOverflow && - !_OverflowOK && - _SchrageOK; -}; - -template <unsigned long long __a, unsigned long long __c, - unsigned long long __m, unsigned long long _Mp, - bool _UseSchrage = __lce_alg_picker<__a, __c, __m, _Mp>::__use_schrage> -struct __lce_ta; - -// 64 - -template <unsigned long long __a, unsigned long long __c, unsigned long long __m> -struct __lce_ta<__a, __c, __m, (unsigned long long)(~0), true> -{ - typedef unsigned long long result_type; - _LIBCPP_INLINE_VISIBILITY - static result_type next(result_type __x) - { - // Schrage's algorithm - const result_type __q = __m / __a; - const result_type __r = __m % __a; - const result_type __t0 = __a * (__x % __q); - const result_type __t1 = __r * (__x / __q); - __x = __t0 + (__t0 < __t1) * __m - __t1; - __x += __c - (__x >= __m - __c) * __m; - return __x; - } -}; - -template <unsigned long long __a, unsigned long long __m> -struct __lce_ta<__a, 0, __m, (unsigned long long)(~0), true> -{ - typedef unsigned long long result_type; - _LIBCPP_INLINE_VISIBILITY - static result_type next(result_type __x) - { - // Schrage's algorithm - const result_type __q = __m / __a; - const result_type __r = __m % __a; - const result_type __t0 = __a * (__x % __q); - const result_type __t1 = __r * (__x / __q); - __x = __t0 + (__t0 < __t1) * __m - __t1; - return __x; - } -}; - -template <unsigned long long __a, unsigned long long __c, unsigned long long __m> -struct __lce_ta<__a, __c, __m, (unsigned long long)(~0), false> -{ - typedef unsigned long long result_type; - _LIBCPP_INLINE_VISIBILITY - static result_type next(result_type __x) - { - return (__a * __x + __c) % __m; - } -}; - -template <unsigned long long __a, unsigned long long __c> -struct __lce_ta<__a, __c, 0, (unsigned long long)(~0), false> -{ - typedef unsigned long long result_type; - _LIBCPP_INLINE_VISIBILITY - static result_type next(result_type __x) - { - return __a * __x + __c; - } -}; - -// 32 - -template <unsigned long long _Ap, unsigned long long _Cp, unsigned long long _Mp> -struct __lce_ta<_Ap, _Cp, _Mp, unsigned(~0), true> -{ - typedef unsigned result_type; - _LIBCPP_INLINE_VISIBILITY - static result_type next(result_type __x) - { - const result_type __a = static_cast<result_type>(_Ap); - const result_type __c = static_cast<result_type>(_Cp); - const result_type __m = static_cast<result_type>(_Mp); - // Schrage's algorithm - const result_type __q = __m / __a; - const result_type __r = __m % __a; - const result_type __t0 = __a * (__x % __q); - const result_type __t1 = __r * (__x / __q); - __x = __t0 + (__t0 < __t1) * __m - __t1; - __x += __c - (__x >= __m - __c) * __m; - return __x; - } -}; - -template <unsigned long long _Ap, unsigned long long _Mp> -struct __lce_ta<_Ap, 0, _Mp, unsigned(~0), true> -{ - typedef unsigned result_type; - _LIBCPP_INLINE_VISIBILITY - static result_type next(result_type __x) - { - const result_type __a = static_cast<result_type>(_Ap); - const result_type __m = static_cast<result_type>(_Mp); - // Schrage's algorithm - const result_type __q = __m / __a; - const result_type __r = __m % __a; - const result_type __t0 = __a * (__x % __q); - const result_type __t1 = __r * (__x / __q); - __x = __t0 + (__t0 < __t1) * __m - __t1; - return __x; - } -}; - -template <unsigned long long _Ap, unsigned long long _Cp, unsigned long long _Mp> -struct __lce_ta<_Ap, _Cp, _Mp, unsigned(~0), false> -{ - typedef unsigned result_type; - _LIBCPP_INLINE_VISIBILITY - static result_type next(result_type __x) - { - const result_type __a = static_cast<result_type>(_Ap); - const result_type __c = static_cast<result_type>(_Cp); - const result_type __m = static_cast<result_type>(_Mp); - return (__a * __x + __c) % __m; - } -}; - -template <unsigned long long _Ap, unsigned long long _Cp> -struct __lce_ta<_Ap, _Cp, 0, unsigned(~0), false> -{ - typedef unsigned result_type; - _LIBCPP_INLINE_VISIBILITY - static result_type next(result_type __x) - { - const result_type __a = static_cast<result_type>(_Ap); - const result_type __c = static_cast<result_type>(_Cp); - return __a * __x + __c; - } -}; - -// 16 - -template <unsigned long long __a, unsigned long long __c, unsigned long long __m, bool __b> -struct __lce_ta<__a, __c, __m, (unsigned short)(~0), __b> -{ - typedef unsigned short result_type; - _LIBCPP_INLINE_VISIBILITY - static result_type next(result_type __x) - { - return static_cast<result_type>(__lce_ta<__a, __c, __m, unsigned(~0)>::next(__x)); - } -}; - -template <class _UIntType, _UIntType __a, _UIntType __c, _UIntType __m> -class _LIBCPP_TEMPLATE_VIS linear_congruential_engine; - -template <class _CharT, class _Traits, - class _Up, _Up _Ap, _Up _Cp, _Up _Np> -_LIBCPP_INLINE_VISIBILITY -basic_ostream<_CharT, _Traits>& -operator<<(basic_ostream<_CharT, _Traits>& __os, - const linear_congruential_engine<_Up, _Ap, _Cp, _Np>&); - -template <class _CharT, class _Traits, - class _Up, _Up _Ap, _Up _Cp, _Up _Np> -basic_istream<_CharT, _Traits>& -operator>>(basic_istream<_CharT, _Traits>& __is, - linear_congruential_engine<_Up, _Ap, _Cp, _Np>& __x); - -template <class _UIntType, _UIntType __a, _UIntType __c, _UIntType __m> -class _LIBCPP_TEMPLATE_VIS linear_congruential_engine -{ -public: - // types - typedef _UIntType result_type; - -private: - result_type __x_; - - static _LIBCPP_CONSTEXPR const result_type _Mp = result_type(~0); - - static_assert(__m == 0 || __a < __m, "linear_congruential_engine invalid parameters"); - static_assert(__m == 0 || __c < __m, "linear_congruential_engine invalid parameters"); - static_assert(is_unsigned<_UIntType>::value, "_UIntType must be unsigned type"); -public: - static _LIBCPP_CONSTEXPR const result_type _Min = __c == 0u ? 1u: 0u; - static _LIBCPP_CONSTEXPR const result_type _Max = __m - 1u; - static_assert(_Min < _Max, "linear_congruential_engine invalid parameters"); - - // engine characteristics - static _LIBCPP_CONSTEXPR const result_type multiplier = __a; - static _LIBCPP_CONSTEXPR const result_type increment = __c; - static _LIBCPP_CONSTEXPR const result_type modulus = __m; - _LIBCPP_INLINE_VISIBILITY - static _LIBCPP_CONSTEXPR result_type min() {return _Min;} - _LIBCPP_INLINE_VISIBILITY - static _LIBCPP_CONSTEXPR result_type max() {return _Max;} - static _LIBCPP_CONSTEXPR const result_type default_seed = 1u; - - // constructors and seeding functions -#ifndef _LIBCPP_CXX03_LANG - _LIBCPP_INLINE_VISIBILITY - linear_congruential_engine() : linear_congruential_engine(default_seed) {} - _LIBCPP_INLINE_VISIBILITY - explicit linear_congruential_engine(result_type __s) { seed(__s); } -#else - _LIBCPP_INLINE_VISIBILITY - explicit linear_congruential_engine(result_type __s = default_seed) { - seed(__s); - } -#endif - template<class _Sseq> - _LIBCPP_INLINE_VISIBILITY - explicit linear_congruential_engine(_Sseq& __q, - typename enable_if<__is_seed_sequence<_Sseq, linear_congruential_engine>::value>::type* = 0) - {seed(__q);} - _LIBCPP_INLINE_VISIBILITY - void seed(result_type __s = default_seed) - {seed(integral_constant<bool, __m == 0>(), - integral_constant<bool, __c == 0>(), __s);} - template<class _Sseq> - _LIBCPP_INLINE_VISIBILITY - typename enable_if - < - __is_seed_sequence<_Sseq, linear_congruential_engine>::value, - void - >::type - seed(_Sseq& __q) - {__seed(__q, integral_constant<unsigned, - 1 + (__m == 0 ? (sizeof(result_type) * __CHAR_BIT__ - 1)/32 - : (__m > 0x100000000ull))>());} - - // generating functions - _LIBCPP_INLINE_VISIBILITY - result_type operator()() - {return __x_ = static_cast<result_type>(__lce_ta<__a, __c, __m, _Mp>::next(__x_));} - _LIBCPP_INLINE_VISIBILITY - void discard(unsigned long long __z) {for (; __z; --__z) operator()();} - - friend _LIBCPP_INLINE_VISIBILITY - bool operator==(const linear_congruential_engine& __x, - const linear_congruential_engine& __y) - {return __x.__x_ == __y.__x_;} - friend _LIBCPP_INLINE_VISIBILITY - bool operator!=(const linear_congruential_engine& __x, - const linear_congruential_engine& __y) - {return !(__x == __y);} - -private: - - _LIBCPP_INLINE_VISIBILITY - void seed(true_type, true_type, result_type __s) {__x_ = __s == 0 ? 1 : __s;} - _LIBCPP_INLINE_VISIBILITY - void seed(true_type, false_type, result_type __s) {__x_ = __s;} - _LIBCPP_INLINE_VISIBILITY - void seed(false_type, true_type, result_type __s) {__x_ = __s % __m == 0 ? - 1 : __s % __m;} - _LIBCPP_INLINE_VISIBILITY - void seed(false_type, false_type, result_type __s) {__x_ = __s % __m;} - - template<class _Sseq> - void __seed(_Sseq& __q, integral_constant<unsigned, 1>); - template<class _Sseq> - void __seed(_Sseq& __q, integral_constant<unsigned, 2>); - - template <class _CharT, class _Traits, - class _Up, _Up _Ap, _Up _Cp, _Up _Np> - friend - basic_ostream<_CharT, _Traits>& - operator<<(basic_ostream<_CharT, _Traits>& __os, - const linear_congruential_engine<_Up, _Ap, _Cp, _Np>&); - - template <class _CharT, class _Traits, - class _Up, _Up _Ap, _Up _Cp, _Up _Np> - friend - basic_istream<_CharT, _Traits>& - operator>>(basic_istream<_CharT, _Traits>& __is, - linear_congruential_engine<_Up, _Ap, _Cp, _Np>& __x); -}; - -template <class _UIntType, _UIntType __a, _UIntType __c, _UIntType __m> - _LIBCPP_CONSTEXPR const typename linear_congruential_engine<_UIntType, __a, __c, __m>::result_type - linear_congruential_engine<_UIntType, __a, __c, __m>::multiplier; - -template <class _UIntType, _UIntType __a, _UIntType __c, _UIntType __m> - _LIBCPP_CONSTEXPR const typename linear_congruential_engine<_UIntType, __a, __c, __m>::result_type - linear_congruential_engine<_UIntType, __a, __c, __m>::increment; - -template <class _UIntType, _UIntType __a, _UIntType __c, _UIntType __m> - _LIBCPP_CONSTEXPR const typename linear_congruential_engine<_UIntType, __a, __c, __m>::result_type - linear_congruential_engine<_UIntType, __a, __c, __m>::modulus; - -template <class _UIntType, _UIntType __a, _UIntType __c, _UIntType __m> - _LIBCPP_CONSTEXPR const typename linear_congruential_engine<_UIntType, __a, __c, __m>::result_type - linear_congruential_engine<_UIntType, __a, __c, __m>::default_seed; - -template <class _UIntType, _UIntType __a, _UIntType __c, _UIntType __m> -template<class _Sseq> -void -linear_congruential_engine<_UIntType, __a, __c, __m>::__seed(_Sseq& __q, - integral_constant<unsigned, 1>) -{ - const unsigned __k = 1; - uint32_t __ar[__k+3]; - __q.generate(__ar, __ar + __k + 3); - result_type __s = static_cast<result_type>(__ar[3] % __m); - __x_ = __c == 0 && __s == 0 ? result_type(1) : __s; -} - -template <class _UIntType, _UIntType __a, _UIntType __c, _UIntType __m> -template<class _Sseq> -void -linear_congruential_engine<_UIntType, __a, __c, __m>::__seed(_Sseq& __q, - integral_constant<unsigned, 2>) -{ - const unsigned __k = 2; - uint32_t __ar[__k+3]; - __q.generate(__ar, __ar + __k + 3); - result_type __s = static_cast<result_type>((__ar[3] + - ((uint64_t)__ar[4] << 32)) % __m); - __x_ = __c == 0 && __s == 0 ? result_type(1) : __s; -} - -template <class _CharT, class _Traits, - class _UIntType, _UIntType __a, _UIntType __c, _UIntType __m> -inline _LIBCPP_INLINE_VISIBILITY -basic_ostream<_CharT, _Traits>& -operator<<(basic_ostream<_CharT, _Traits>& __os, - const linear_congruential_engine<_UIntType, __a, __c, __m>& __x) -{ - __save_flags<_CharT, _Traits> __lx(__os); - typedef basic_ostream<_CharT, _Traits> _Ostream; - __os.flags(_Ostream::dec | _Ostream::left); - __os.fill(__os.widen(' ')); - return __os << __x.__x_; -} - -template <class _CharT, class _Traits, - class _UIntType, _UIntType __a, _UIntType __c, _UIntType __m> -basic_istream<_CharT, _Traits>& -operator>>(basic_istream<_CharT, _Traits>& __is, - linear_congruential_engine<_UIntType, __a, __c, __m>& __x) -{ - __save_flags<_CharT, _Traits> __lx(__is); - typedef basic_istream<_CharT, _Traits> _Istream; - __is.flags(_Istream::dec | _Istream::skipws); - _UIntType __t; - __is >> __t; - if (!__is.fail()) - __x.__x_ = __t; - return __is; -} - -typedef linear_congruential_engine<uint_fast32_t, 16807, 0, 2147483647> - minstd_rand0; -typedef linear_congruential_engine<uint_fast32_t, 48271, 0, 2147483647> - minstd_rand; -typedef minstd_rand default_random_engine; -// mersenne_twister_engine - -template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r, - _UIntType __a, size_t __u, _UIntType __d, size_t __s, - _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f> -class _LIBCPP_TEMPLATE_VIS mersenne_twister_engine; - -template <class _UInt, size_t _Wp, size_t _Np, size_t _Mp, size_t _Rp, - _UInt _Ap, size_t _Up, _UInt _Dp, size_t _Sp, - _UInt _Bp, size_t _Tp, _UInt _Cp, size_t _Lp, _UInt _Fp> -bool -operator==(const mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp, - _Bp, _Tp, _Cp, _Lp, _Fp>& __x, - const mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp, - _Bp, _Tp, _Cp, _Lp, _Fp>& __y); - -template <class _UInt, size_t _Wp, size_t _Np, size_t _Mp, size_t _Rp, - _UInt _Ap, size_t _Up, _UInt _Dp, size_t _Sp, - _UInt _Bp, size_t _Tp, _UInt _Cp, size_t _Lp, _UInt _Fp> -_LIBCPP_INLINE_VISIBILITY -bool -operator!=(const mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp, - _Bp, _Tp, _Cp, _Lp, _Fp>& __x, - const mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp, - _Bp, _Tp, _Cp, _Lp, _Fp>& __y); - -template <class _CharT, class _Traits, - class _UInt, size_t _Wp, size_t _Np, size_t _Mp, size_t _Rp, - _UInt _Ap, size_t _Up, _UInt _Dp, size_t _Sp, - _UInt _Bp, size_t _Tp, _UInt _Cp, size_t _Lp, _UInt _Fp> -basic_ostream<_CharT, _Traits>& -operator<<(basic_ostream<_CharT, _Traits>& __os, - const mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp, - _Bp, _Tp, _Cp, _Lp, _Fp>& __x); - -template <class _CharT, class _Traits, - class _UInt, size_t _Wp, size_t _Np, size_t _Mp, size_t _Rp, - _UInt _Ap, size_t _Up, _UInt _Dp, size_t _Sp, - _UInt _Bp, size_t _Tp, _UInt _Cp, size_t _Lp, _UInt _Fp> -basic_istream<_CharT, _Traits>& -operator>>(basic_istream<_CharT, _Traits>& __is, - mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp, - _Bp, _Tp, _Cp, _Lp, _Fp>& __x); - -template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r, - _UIntType __a, size_t __u, _UIntType __d, size_t __s, - _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f> -class _LIBCPP_TEMPLATE_VIS mersenne_twister_engine -{ -public: - // types - typedef _UIntType result_type; - -private: - result_type __x_[__n]; - size_t __i_; - - static_assert( 0 < __m, "mersenne_twister_engine invalid parameters"); - static_assert(__m <= __n, "mersenne_twister_engine invalid parameters"); - static _LIBCPP_CONSTEXPR const result_type _Dt = numeric_limits<result_type>::digits; - static_assert(__w <= _Dt, "mersenne_twister_engine invalid parameters"); - static_assert( 2 <= __w, "mersenne_twister_engine invalid parameters"); - static_assert(__r <= __w, "mersenne_twister_engine invalid parameters"); - static_assert(__u <= __w, "mersenne_twister_engine invalid parameters"); - static_assert(__s <= __w, "mersenne_twister_engine invalid parameters"); - static_assert(__t <= __w, "mersenne_twister_engine invalid parameters"); - static_assert(__l <= __w, "mersenne_twister_engine invalid parameters"); -public: - static _LIBCPP_CONSTEXPR const result_type _Min = 0; - static _LIBCPP_CONSTEXPR const result_type _Max = __w == _Dt ? result_type(~0) : - (result_type(1) << __w) - result_type(1); - static_assert(_Min < _Max, "mersenne_twister_engine invalid parameters"); - static_assert(__a <= _Max, "mersenne_twister_engine invalid parameters"); - static_assert(__b <= _Max, "mersenne_twister_engine invalid parameters"); - static_assert(__c <= _Max, "mersenne_twister_engine invalid parameters"); - static_assert(__d <= _Max, "mersenne_twister_engine invalid parameters"); - static_assert(__f <= _Max, "mersenne_twister_engine invalid parameters"); - - // engine characteristics - static _LIBCPP_CONSTEXPR const size_t word_size = __w; - static _LIBCPP_CONSTEXPR const size_t state_size = __n; - static _LIBCPP_CONSTEXPR const size_t shift_size = __m; - static _LIBCPP_CONSTEXPR const size_t mask_bits = __r; - static _LIBCPP_CONSTEXPR const result_type xor_mask = __a; - static _LIBCPP_CONSTEXPR const size_t tempering_u = __u; - static _LIBCPP_CONSTEXPR const result_type tempering_d = __d; - static _LIBCPP_CONSTEXPR const size_t tempering_s = __s; - static _LIBCPP_CONSTEXPR const result_type tempering_b = __b; - static _LIBCPP_CONSTEXPR const size_t tempering_t = __t; - static _LIBCPP_CONSTEXPR const result_type tempering_c = __c; - static _LIBCPP_CONSTEXPR const size_t tempering_l = __l; - static _LIBCPP_CONSTEXPR const result_type initialization_multiplier = __f; - _LIBCPP_INLINE_VISIBILITY - static _LIBCPP_CONSTEXPR result_type min() { return _Min; } - _LIBCPP_INLINE_VISIBILITY - static _LIBCPP_CONSTEXPR result_type max() { return _Max; } - static _LIBCPP_CONSTEXPR const result_type default_seed = 5489u; - - // constructors and seeding functions -#ifndef _LIBCPP_CXX03_LANG - _LIBCPP_INLINE_VISIBILITY - mersenne_twister_engine() : mersenne_twister_engine(default_seed) {} - _LIBCPP_INLINE_VISIBILITY - explicit mersenne_twister_engine(result_type __sd) { seed(__sd); } -#else - _LIBCPP_INLINE_VISIBILITY - explicit mersenne_twister_engine(result_type __sd = default_seed) { - seed(__sd); - } -#endif - template<class _Sseq> - _LIBCPP_INLINE_VISIBILITY - explicit mersenne_twister_engine(_Sseq& __q, - typename enable_if<__is_seed_sequence<_Sseq, mersenne_twister_engine>::value>::type* = 0) - {seed(__q);} - void seed(result_type __sd = default_seed); - template<class _Sseq> - _LIBCPP_INLINE_VISIBILITY - typename enable_if - < - __is_seed_sequence<_Sseq, mersenne_twister_engine>::value, - void - >::type - seed(_Sseq& __q) - {__seed(__q, integral_constant<unsigned, 1 + (__w - 1) / 32>());} - - // generating functions - result_type operator()(); - _LIBCPP_INLINE_VISIBILITY - void discard(unsigned long long __z) {for (; __z; --__z) operator()();} - - template <class _UInt, size_t _Wp, size_t _Np, size_t _Mp, size_t _Rp, - _UInt _Ap, size_t _Up, _UInt _Dp, size_t _Sp, - _UInt _Bp, size_t _Tp, _UInt _Cp, size_t _Lp, _UInt _Fp> - friend - bool - operator==(const mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp, - _Bp, _Tp, _Cp, _Lp, _Fp>& __x, - const mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp, - _Bp, _Tp, _Cp, _Lp, _Fp>& __y); - - template <class _UInt, size_t _Wp, size_t _Np, size_t _Mp, size_t _Rp, - _UInt _Ap, size_t _Up, _UInt _Dp, size_t _Sp, - _UInt _Bp, size_t _Tp, _UInt _Cp, size_t _Lp, _UInt _Fp> - friend - bool - operator!=(const mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp, - _Bp, _Tp, _Cp, _Lp, _Fp>& __x, - const mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp, - _Bp, _Tp, _Cp, _Lp, _Fp>& __y); - - template <class _CharT, class _Traits, - class _UInt, size_t _Wp, size_t _Np, size_t _Mp, size_t _Rp, - _UInt _Ap, size_t _Up, _UInt _Dp, size_t _Sp, - _UInt _Bp, size_t _Tp, _UInt _Cp, size_t _Lp, _UInt _Fp> - friend - basic_ostream<_CharT, _Traits>& - operator<<(basic_ostream<_CharT, _Traits>& __os, - const mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp, - _Bp, _Tp, _Cp, _Lp, _Fp>& __x); - - template <class _CharT, class _Traits, - class _UInt, size_t _Wp, size_t _Np, size_t _Mp, size_t _Rp, - _UInt _Ap, size_t _Up, _UInt _Dp, size_t _Sp, - _UInt _Bp, size_t _Tp, _UInt _Cp, size_t _Lp, _UInt _Fp> - friend - basic_istream<_CharT, _Traits>& - operator>>(basic_istream<_CharT, _Traits>& __is, - mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp, - _Bp, _Tp, _Cp, _Lp, _Fp>& __x); -private: - - template<class _Sseq> - void __seed(_Sseq& __q, integral_constant<unsigned, 1>); - template<class _Sseq> - void __seed(_Sseq& __q, integral_constant<unsigned, 2>); - - template <size_t __count> - _LIBCPP_INLINE_VISIBILITY - static - typename enable_if - < - __count < __w, - result_type - >::type - __lshift(result_type __x) {return (__x << __count) & _Max;} - - template <size_t __count> - _LIBCPP_INLINE_VISIBILITY - static - typename enable_if - < - (__count >= __w), - result_type - >::type - __lshift(result_type) {return result_type(0);} - - template <size_t __count> - _LIBCPP_INLINE_VISIBILITY - static - typename enable_if - < - __count < _Dt, - result_type - >::type - __rshift(result_type __x) {return __x >> __count;} - - template <size_t __count> - _LIBCPP_INLINE_VISIBILITY - static - typename enable_if - < - (__count >= _Dt), - result_type - >::type - __rshift(result_type) {return result_type(0);} -}; - -template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r, - _UIntType __a, size_t __u, _UIntType __d, size_t __s, - _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f> - _LIBCPP_CONSTEXPR const size_t - mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::word_size; - -template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r, - _UIntType __a, size_t __u, _UIntType __d, size_t __s, - _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f> - _LIBCPP_CONSTEXPR const size_t - mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::state_size; - -template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r, - _UIntType __a, size_t __u, _UIntType __d, size_t __s, - _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f> - _LIBCPP_CONSTEXPR const size_t - mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::shift_size; - -template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r, - _UIntType __a, size_t __u, _UIntType __d, size_t __s, - _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f> - _LIBCPP_CONSTEXPR const size_t - mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::mask_bits; - -template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r, - _UIntType __a, size_t __u, _UIntType __d, size_t __s, - _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f> - _LIBCPP_CONSTEXPR const typename mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::result_type - mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::xor_mask; - -template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r, - _UIntType __a, size_t __u, _UIntType __d, size_t __s, - _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f> - _LIBCPP_CONSTEXPR const size_t - mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::tempering_u; - -template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r, - _UIntType __a, size_t __u, _UIntType __d, size_t __s, - _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f> - _LIBCPP_CONSTEXPR const typename mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::result_type - mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::tempering_d; - -template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r, - _UIntType __a, size_t __u, _UIntType __d, size_t __s, - _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f> - _LIBCPP_CONSTEXPR const size_t - mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::tempering_s; - -template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r, - _UIntType __a, size_t __u, _UIntType __d, size_t __s, - _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f> - _LIBCPP_CONSTEXPR const typename mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::result_type - mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::tempering_b; - -template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r, - _UIntType __a, size_t __u, _UIntType __d, size_t __s, - _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f> - _LIBCPP_CONSTEXPR const size_t - mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::tempering_t; - -template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r, - _UIntType __a, size_t __u, _UIntType __d, size_t __s, - _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f> - _LIBCPP_CONSTEXPR const typename mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::result_type - mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::tempering_c; - -template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r, - _UIntType __a, size_t __u, _UIntType __d, size_t __s, - _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f> - _LIBCPP_CONSTEXPR const size_t - mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::tempering_l; - -template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r, - _UIntType __a, size_t __u, _UIntType __d, size_t __s, - _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f> - _LIBCPP_CONSTEXPR const typename mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::result_type - mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::initialization_multiplier; - -template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r, - _UIntType __a, size_t __u, _UIntType __d, size_t __s, - _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f> - _LIBCPP_CONSTEXPR const typename mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::result_type - mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::default_seed; - -template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r, - _UIntType __a, size_t __u, _UIntType __d, size_t __s, - _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f> -void -mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, - __t, __c, __l, __f>::seed(result_type __sd) - _LIBCPP_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK -{ // __w >= 2 - __x_[0] = __sd & _Max; - for (size_t __i = 1; __i < __n; ++__i) - __x_[__i] = (__f * (__x_[__i-1] ^ __rshift<__w - 2>(__x_[__i-1])) + __i) & _Max; - __i_ = 0; -} - -template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r, - _UIntType __a, size_t __u, _UIntType __d, size_t __s, - _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f> -template<class _Sseq> -void -mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, - __t, __c, __l, __f>::__seed(_Sseq& __q, integral_constant<unsigned, 1>) -{ - const unsigned __k = 1; - uint32_t __ar[__n * __k]; - __q.generate(__ar, __ar + __n * __k); - for (size_t __i = 0; __i < __n; ++__i) - __x_[__i] = static_cast<result_type>(__ar[__i] & _Max); - const result_type __mask = __r == _Dt ? result_type(~0) : - (result_type(1) << __r) - result_type(1); - __i_ = 0; - if ((__x_[0] & ~__mask) == 0) - { - for (size_t __i = 1; __i < __n; ++__i) - if (__x_[__i] != 0) - return; - __x_[0] = result_type(1) << (__w - 1); - } -} - -template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r, - _UIntType __a, size_t __u, _UIntType __d, size_t __s, - _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f> -template<class _Sseq> -void -mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, - __t, __c, __l, __f>::__seed(_Sseq& __q, integral_constant<unsigned, 2>) -{ - const unsigned __k = 2; - uint32_t __ar[__n * __k]; - __q.generate(__ar, __ar + __n * __k); - for (size_t __i = 0; __i < __n; ++__i) - __x_[__i] = static_cast<result_type>( - (__ar[2 * __i] + ((uint64_t)__ar[2 * __i + 1] << 32)) & _Max); - const result_type __mask = __r == _Dt ? result_type(~0) : - (result_type(1) << __r) - result_type(1); - __i_ = 0; - if ((__x_[0] & ~__mask) == 0) - { - for (size_t __i = 1; __i < __n; ++__i) - if (__x_[__i] != 0) - return; - __x_[0] = result_type(1) << (__w - 1); - } -} - -template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r, - _UIntType __a, size_t __u, _UIntType __d, size_t __s, - _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f> -_UIntType -mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, - __t, __c, __l, __f>::operator()() -{ - const size_t __j = (__i_ + 1) % __n; - const result_type __mask = __r == _Dt ? result_type(~0) : - (result_type(1) << __r) - result_type(1); - const result_type _Yp = (__x_[__i_] & ~__mask) | (__x_[__j] & __mask); - const size_t __k = (__i_ + __m) % __n; - __x_[__i_] = __x_[__k] ^ __rshift<1>(_Yp) ^ (__a * (_Yp & 1)); - result_type __z = __x_[__i_] ^ (__rshift<__u>(__x_[__i_]) & __d); - __i_ = __j; - __z ^= __lshift<__s>(__z) & __b; - __z ^= __lshift<__t>(__z) & __c; - return __z ^ __rshift<__l>(__z); -} - -template <class _UInt, size_t _Wp, size_t _Np, size_t _Mp, size_t _Rp, - _UInt _Ap, size_t _Up, _UInt _Dp, size_t _Sp, - _UInt _Bp, size_t _Tp, _UInt _Cp, size_t _Lp, _UInt _Fp> -bool -operator==(const mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp, - _Bp, _Tp, _Cp, _Lp, _Fp>& __x, - const mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp, - _Bp, _Tp, _Cp, _Lp, _Fp>& __y) -{ - if (__x.__i_ == __y.__i_) - return _VSTD::equal(__x.__x_, __x.__x_ + _Np, __y.__x_); - if (__x.__i_ == 0 || __y.__i_ == 0) - { - size_t __j = _VSTD::min(_Np - __x.__i_, _Np - __y.__i_); - if (!_VSTD::equal(__x.__x_ + __x.__i_, __x.__x_ + __x.__i_ + __j, - __y.__x_ + __y.__i_)) - return false; - if (__x.__i_ == 0) - return _VSTD::equal(__x.__x_ + __j, __x.__x_ + _Np, __y.__x_); - return _VSTD::equal(__x.__x_, __x.__x_ + (_Np - __j), __y.__x_ + __j); - } - if (__x.__i_ < __y.__i_) - { - size_t __j = _Np - __y.__i_; - if (!_VSTD::equal(__x.__x_ + __x.__i_, __x.__x_ + (__x.__i_ + __j), - __y.__x_ + __y.__i_)) - return false; - if (!_VSTD::equal(__x.__x_ + (__x.__i_ + __j), __x.__x_ + _Np, - __y.__x_)) - return false; - return _VSTD::equal(__x.__x_, __x.__x_ + __x.__i_, - __y.__x_ + (_Np - (__x.__i_ + __j))); - } - size_t __j = _Np - __x.__i_; - if (!_VSTD::equal(__y.__x_ + __y.__i_, __y.__x_ + (__y.__i_ + __j), - __x.__x_ + __x.__i_)) - return false; - if (!_VSTD::equal(__y.__x_ + (__y.__i_ + __j), __y.__x_ + _Np, - __x.__x_)) - return false; - return _VSTD::equal(__y.__x_, __y.__x_ + __y.__i_, - __x.__x_ + (_Np - (__y.__i_ + __j))); -} - -template <class _UInt, size_t _Wp, size_t _Np, size_t _Mp, size_t _Rp, - _UInt _Ap, size_t _Up, _UInt _Dp, size_t _Sp, - _UInt _Bp, size_t _Tp, _UInt _Cp, size_t _Lp, _UInt _Fp> -inline _LIBCPP_INLINE_VISIBILITY -bool -operator!=(const mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp, - _Bp, _Tp, _Cp, _Lp, _Fp>& __x, - const mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp, - _Bp, _Tp, _Cp, _Lp, _Fp>& __y) -{ - return !(__x == __y); -} - -template <class _CharT, class _Traits, - class _UInt, size_t _Wp, size_t _Np, size_t _Mp, size_t _Rp, - _UInt _Ap, size_t _Up, _UInt _Dp, size_t _Sp, - _UInt _Bp, size_t _Tp, _UInt _Cp, size_t _Lp, _UInt _Fp> -basic_ostream<_CharT, _Traits>& -operator<<(basic_ostream<_CharT, _Traits>& __os, - const mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp, - _Bp, _Tp, _Cp, _Lp, _Fp>& __x) -{ - __save_flags<_CharT, _Traits> __lx(__os); - typedef basic_ostream<_CharT, _Traits> _Ostream; - __os.flags(_Ostream::dec | _Ostream::left); - _CharT __sp = __os.widen(' '); - __os.fill(__sp); - __os << __x.__x_[__x.__i_]; - for (size_t __j = __x.__i_ + 1; __j < _Np; ++__j) - __os << __sp << __x.__x_[__j]; - for (size_t __j = 0; __j < __x.__i_; ++__j) - __os << __sp << __x.__x_[__j]; - return __os; -} - -template <class _CharT, class _Traits, - class _UInt, size_t _Wp, size_t _Np, size_t _Mp, size_t _Rp, - _UInt _Ap, size_t _Up, _UInt _Dp, size_t _Sp, - _UInt _Bp, size_t _Tp, _UInt _Cp, size_t _Lp, _UInt _Fp> -basic_istream<_CharT, _Traits>& -operator>>(basic_istream<_CharT, _Traits>& __is, - mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp, - _Bp, _Tp, _Cp, _Lp, _Fp>& __x) -{ - __save_flags<_CharT, _Traits> __lx(__is); - typedef basic_istream<_CharT, _Traits> _Istream; - __is.flags(_Istream::dec | _Istream::skipws); - _UInt __t[_Np]; - for (size_t __i = 0; __i < _Np; ++__i) - __is >> __t[__i]; - if (!__is.fail()) - { - for (size_t __i = 0; __i < _Np; ++__i) - __x.__x_[__i] = __t[__i]; - __x.__i_ = 0; - } - return __is; -} - -typedef mersenne_twister_engine<uint_fast32_t, 32, 624, 397, 31, - 0x9908b0df, 11, 0xffffffff, - 7, 0x9d2c5680, - 15, 0xefc60000, - 18, 1812433253> mt19937; -typedef mersenne_twister_engine<uint_fast64_t, 64, 312, 156, 31, - 0xb5026f5aa96619e9ULL, 29, 0x5555555555555555ULL, - 17, 0x71d67fffeda60000ULL, - 37, 0xfff7eee000000000ULL, - 43, 6364136223846793005ULL> mt19937_64; - -// subtract_with_carry_engine - -template<class _UIntType, size_t __w, size_t __s, size_t __r> -class _LIBCPP_TEMPLATE_VIS subtract_with_carry_engine; - -template<class _UInt, size_t _Wp, size_t _Sp, size_t _Rp> -bool -operator==( - const subtract_with_carry_engine<_UInt, _Wp, _Sp, _Rp>& __x, - const subtract_with_carry_engine<_UInt, _Wp, _Sp, _Rp>& __y); - -template<class _UInt, size_t _Wp, size_t _Sp, size_t _Rp> -_LIBCPP_INLINE_VISIBILITY -bool -operator!=( - const subtract_with_carry_engine<_UInt, _Wp, _Sp, _Rp>& __x, - const subtract_with_carry_engine<_UInt, _Wp, _Sp, _Rp>& __y); - -template <class _CharT, class _Traits, - class _UInt, size_t _Wp, size_t _Sp, size_t _Rp> -basic_ostream<_CharT, _Traits>& -operator<<(basic_ostream<_CharT, _Traits>& __os, - const subtract_with_carry_engine<_UInt, _Wp, _Sp, _Rp>& __x); - -template <class _CharT, class _Traits, - class _UInt, size_t _Wp, size_t _Sp, size_t _Rp> -basic_istream<_CharT, _Traits>& -operator>>(basic_istream<_CharT, _Traits>& __is, - subtract_with_carry_engine<_UInt, _Wp, _Sp, _Rp>& __x); - -template<class _UIntType, size_t __w, size_t __s, size_t __r> -class _LIBCPP_TEMPLATE_VIS subtract_with_carry_engine -{ -public: - // types - typedef _UIntType result_type; - -private: - result_type __x_[__r]; - result_type __c_; - size_t __i_; - - static _LIBCPP_CONSTEXPR const result_type _Dt = numeric_limits<result_type>::digits; - static_assert( 0 < __w, "subtract_with_carry_engine invalid parameters"); - static_assert(__w <= _Dt, "subtract_with_carry_engine invalid parameters"); - static_assert( 0 < __s, "subtract_with_carry_engine invalid parameters"); - static_assert(__s < __r, "subtract_with_carry_engine invalid parameters"); -public: - static _LIBCPP_CONSTEXPR const result_type _Min = 0; - static _LIBCPP_CONSTEXPR const result_type _Max = __w == _Dt ? result_type(~0) : - (result_type(1) << __w) - result_type(1); - static_assert(_Min < _Max, "subtract_with_carry_engine invalid parameters"); - - // engine characteristics - static _LIBCPP_CONSTEXPR const size_t word_size = __w; - static _LIBCPP_CONSTEXPR const size_t short_lag = __s; - static _LIBCPP_CONSTEXPR const size_t long_lag = __r; - _LIBCPP_INLINE_VISIBILITY - static _LIBCPP_CONSTEXPR result_type min() { return _Min; } - _LIBCPP_INLINE_VISIBILITY - static _LIBCPP_CONSTEXPR result_type max() { return _Max; } - static _LIBCPP_CONSTEXPR const result_type default_seed = 19780503u; - - // constructors and seeding functions -#ifndef _LIBCPP_CXX03_LANG - _LIBCPP_INLINE_VISIBILITY - subtract_with_carry_engine() : subtract_with_carry_engine(default_seed) {} - _LIBCPP_INLINE_VISIBILITY - explicit subtract_with_carry_engine(result_type __sd) { seed(__sd); } -#else - _LIBCPP_INLINE_VISIBILITY - explicit subtract_with_carry_engine(result_type __sd = default_seed) { - seed(__sd); - } -#endif - template<class _Sseq> - _LIBCPP_INLINE_VISIBILITY - explicit subtract_with_carry_engine(_Sseq& __q, - typename enable_if<__is_seed_sequence<_Sseq, subtract_with_carry_engine>::value>::type* = 0) - {seed(__q);} - _LIBCPP_INLINE_VISIBILITY - void seed(result_type __sd = default_seed) - {seed(__sd, integral_constant<unsigned, 1 + (__w - 1) / 32>());} - template<class _Sseq> - _LIBCPP_INLINE_VISIBILITY - typename enable_if - < - __is_seed_sequence<_Sseq, subtract_with_carry_engine>::value, - void - >::type - seed(_Sseq& __q) - {__seed(__q, integral_constant<unsigned, 1 + (__w - 1) / 32>());} - - // generating functions - result_type operator()(); - _LIBCPP_INLINE_VISIBILITY - void discard(unsigned long long __z) {for (; __z; --__z) operator()();} - - template<class _UInt, size_t _Wp, size_t _Sp, size_t _Rp> - friend - bool - operator==( - const subtract_with_carry_engine<_UInt, _Wp, _Sp, _Rp>& __x, - const subtract_with_carry_engine<_UInt, _Wp, _Sp, _Rp>& __y); - - template<class _UInt, size_t _Wp, size_t _Sp, size_t _Rp> - friend - bool - operator!=( - const subtract_with_carry_engine<_UInt, _Wp, _Sp, _Rp>& __x, - const subtract_with_carry_engine<_UInt, _Wp, _Sp, _Rp>& __y); - - template <class _CharT, class _Traits, - class _UInt, size_t _Wp, size_t _Sp, size_t _Rp> - friend - basic_ostream<_CharT, _Traits>& - operator<<(basic_ostream<_CharT, _Traits>& __os, - const subtract_with_carry_engine<_UInt, _Wp, _Sp, _Rp>& __x); - - template <class _CharT, class _Traits, - class _UInt, size_t _Wp, size_t _Sp, size_t _Rp> - friend - basic_istream<_CharT, _Traits>& - operator>>(basic_istream<_CharT, _Traits>& __is, - subtract_with_carry_engine<_UInt, _Wp, _Sp, _Rp>& __x); - -private: - - void seed(result_type __sd, integral_constant<unsigned, 1>); - void seed(result_type __sd, integral_constant<unsigned, 2>); - template<class _Sseq> - void __seed(_Sseq& __q, integral_constant<unsigned, 1>); - template<class _Sseq> - void __seed(_Sseq& __q, integral_constant<unsigned, 2>); -}; - -template<class _UIntType, size_t __w, size_t __s, size_t __r> - _LIBCPP_CONSTEXPR const size_t subtract_with_carry_engine<_UIntType, __w, __s, __r>::word_size; - -template<class _UIntType, size_t __w, size_t __s, size_t __r> - _LIBCPP_CONSTEXPR const size_t subtract_with_carry_engine<_UIntType, __w, __s, __r>::short_lag; - -template<class _UIntType, size_t __w, size_t __s, size_t __r> - _LIBCPP_CONSTEXPR const size_t subtract_with_carry_engine<_UIntType, __w, __s, __r>::long_lag; - -template<class _UIntType, size_t __w, size_t __s, size_t __r> - _LIBCPP_CONSTEXPR const typename subtract_with_carry_engine<_UIntType, __w, __s, __r>::result_type - subtract_with_carry_engine<_UIntType, __w, __s, __r>::default_seed; - -template<class _UIntType, size_t __w, size_t __s, size_t __r> -void -subtract_with_carry_engine<_UIntType, __w, __s, __r>::seed(result_type __sd, - integral_constant<unsigned, 1>) -{ - linear_congruential_engine<result_type, 40014u, 0u, 2147483563u> - __e(__sd == 0u ? default_seed : __sd); - for (size_t __i = 0; __i < __r; ++__i) - __x_[__i] = static_cast<result_type>(__e() & _Max); - __c_ = __x_[__r-1] == 0; - __i_ = 0; -} - -template<class _UIntType, size_t __w, size_t __s, size_t __r> -void -subtract_with_carry_engine<_UIntType, __w, __s, __r>::seed(result_type __sd, - integral_constant<unsigned, 2>) -{ - linear_congruential_engine<result_type, 40014u, 0u, 2147483563u> - __e(__sd == 0u ? default_seed : __sd); - for (size_t __i = 0; __i < __r; ++__i) - { - result_type __e0 = __e(); - __x_[__i] = static_cast<result_type>( - (__e0 + ((uint64_t)__e() << 32)) & _Max); - } - __c_ = __x_[__r-1] == 0; - __i_ = 0; -} - -template<class _UIntType, size_t __w, size_t __s, size_t __r> -template<class _Sseq> -void -subtract_with_carry_engine<_UIntType, __w, __s, __r>::__seed(_Sseq& __q, - integral_constant<unsigned, 1>) -{ - const unsigned __k = 1; - uint32_t __ar[__r * __k]; - __q.generate(__ar, __ar + __r * __k); - for (size_t __i = 0; __i < __r; ++__i) - __x_[__i] = static_cast<result_type>(__ar[__i] & _Max); - __c_ = __x_[__r-1] == 0; - __i_ = 0; -} - -template<class _UIntType, size_t __w, size_t __s, size_t __r> -template<class _Sseq> -void -subtract_with_carry_engine<_UIntType, __w, __s, __r>::__seed(_Sseq& __q, - integral_constant<unsigned, 2>) -{ - const unsigned __k = 2; - uint32_t __ar[__r * __k]; - __q.generate(__ar, __ar + __r * __k); - for (size_t __i = 0; __i < __r; ++__i) - __x_[__i] = static_cast<result_type>( - (__ar[2 * __i] + ((uint64_t)__ar[2 * __i + 1] << 32)) & _Max); - __c_ = __x_[__r-1] == 0; - __i_ = 0; -} - -template<class _UIntType, size_t __w, size_t __s, size_t __r> -_UIntType -subtract_with_carry_engine<_UIntType, __w, __s, __r>::operator()() -{ - const result_type& __xs = __x_[(__i_ + (__r - __s)) % __r]; - result_type& __xr = __x_[__i_]; - result_type __new_c = __c_ == 0 ? __xs < __xr : __xs != 0 ? __xs <= __xr : 1; - __xr = (__xs - __xr - __c_) & _Max; - __c_ = __new_c; - __i_ = (__i_ + 1) % __r; - return __xr; -} - -template<class _UInt, size_t _Wp, size_t _Sp, size_t _Rp> -bool -operator==( - const subtract_with_carry_engine<_UInt, _Wp, _Sp, _Rp>& __x, - const subtract_with_carry_engine<_UInt, _Wp, _Sp, _Rp>& __y) -{ - if (__x.__c_ != __y.__c_) - return false; - if (__x.__i_ == __y.__i_) - return _VSTD::equal(__x.__x_, __x.__x_ + _Rp, __y.__x_); - if (__x.__i_ == 0 || __y.__i_ == 0) - { - size_t __j = _VSTD::min(_Rp - __x.__i_, _Rp - __y.__i_); - if (!_VSTD::equal(__x.__x_ + __x.__i_, __x.__x_ + __x.__i_ + __j, - __y.__x_ + __y.__i_)) - return false; - if (__x.__i_ == 0) - return _VSTD::equal(__x.__x_ + __j, __x.__x_ + _Rp, __y.__x_); - return _VSTD::equal(__x.__x_, __x.__x_ + (_Rp - __j), __y.__x_ + __j); - } - if (__x.__i_ < __y.__i_) - { - size_t __j = _Rp - __y.__i_; - if (!_VSTD::equal(__x.__x_ + __x.__i_, __x.__x_ + (__x.__i_ + __j), - __y.__x_ + __y.__i_)) - return false; - if (!_VSTD::equal(__x.__x_ + (__x.__i_ + __j), __x.__x_ + _Rp, - __y.__x_)) - return false; - return _VSTD::equal(__x.__x_, __x.__x_ + __x.__i_, - __y.__x_ + (_Rp - (__x.__i_ + __j))); - } - size_t __j = _Rp - __x.__i_; - if (!_VSTD::equal(__y.__x_ + __y.__i_, __y.__x_ + (__y.__i_ + __j), - __x.__x_ + __x.__i_)) - return false; - if (!_VSTD::equal(__y.__x_ + (__y.__i_ + __j), __y.__x_ + _Rp, - __x.__x_)) - return false; - return _VSTD::equal(__y.__x_, __y.__x_ + __y.__i_, - __x.__x_ + (_Rp - (__y.__i_ + __j))); -} - -template<class _UInt, size_t _Wp, size_t _Sp, size_t _Rp> -inline _LIBCPP_INLINE_VISIBILITY -bool -operator!=( - const subtract_with_carry_engine<_UInt, _Wp, _Sp, _Rp>& __x, - const subtract_with_carry_engine<_UInt, _Wp, _Sp, _Rp>& __y) -{ - return !(__x == __y); -} - -template <class _CharT, class _Traits, - class _UInt, size_t _Wp, size_t _Sp, size_t _Rp> -basic_ostream<_CharT, _Traits>& -operator<<(basic_ostream<_CharT, _Traits>& __os, - const subtract_with_carry_engine<_UInt, _Wp, _Sp, _Rp>& __x) -{ - __save_flags<_CharT, _Traits> __lx(__os); - typedef basic_ostream<_CharT, _Traits> _Ostream; - __os.flags(_Ostream::dec | _Ostream::left); - _CharT __sp = __os.widen(' '); - __os.fill(__sp); - __os << __x.__x_[__x.__i_]; - for (size_t __j = __x.__i_ + 1; __j < _Rp; ++__j) - __os << __sp << __x.__x_[__j]; - for (size_t __j = 0; __j < __x.__i_; ++__j) - __os << __sp << __x.__x_[__j]; - __os << __sp << __x.__c_; - return __os; -} - -template <class _CharT, class _Traits, - class _UInt, size_t _Wp, size_t _Sp, size_t _Rp> -basic_istream<_CharT, _Traits>& -operator>>(basic_istream<_CharT, _Traits>& __is, - subtract_with_carry_engine<_UInt, _Wp, _Sp, _Rp>& __x) -{ - __save_flags<_CharT, _Traits> __lx(__is); - typedef basic_istream<_CharT, _Traits> _Istream; - __is.flags(_Istream::dec | _Istream::skipws); - _UInt __t[_Rp+1]; - for (size_t __i = 0; __i < _Rp+1; ++__i) - __is >> __t[__i]; - if (!__is.fail()) - { - for (size_t __i = 0; __i < _Rp; ++__i) - __x.__x_[__i] = __t[__i]; - __x.__c_ = __t[_Rp]; - __x.__i_ = 0; - } - return __is; -} - -typedef subtract_with_carry_engine<uint_fast32_t, 24, 10, 24> ranlux24_base; -typedef subtract_with_carry_engine<uint_fast64_t, 48, 5, 12> ranlux48_base; - -// discard_block_engine - -template<class _Engine, size_t __p, size_t __r> -class _LIBCPP_TEMPLATE_VIS discard_block_engine -{ - _Engine __e_; - int __n_; - - static_assert( 0 < __r, "discard_block_engine invalid parameters"); - static_assert(__r <= __p, "discard_block_engine invalid parameters"); - static_assert(__r <= INT_MAX, "discard_block_engine invalid parameters"); -public: - // types - typedef typename _Engine::result_type result_type; - - // engine characteristics - static _LIBCPP_CONSTEXPR const size_t block_size = __p; - static _LIBCPP_CONSTEXPR const size_t used_block = __r; - -#ifdef _LIBCPP_CXX03_LANG - static const result_type _Min = _Engine::_Min; - static const result_type _Max = _Engine::_Max; -#else - static _LIBCPP_CONSTEXPR const result_type _Min = _Engine::min(); - static _LIBCPP_CONSTEXPR const result_type _Max = _Engine::max(); -#endif - - _LIBCPP_INLINE_VISIBILITY - static _LIBCPP_CONSTEXPR result_type min() { return _Engine::min(); } - _LIBCPP_INLINE_VISIBILITY - static _LIBCPP_CONSTEXPR result_type max() { return _Engine::max(); } - - // constructors and seeding functions - _LIBCPP_INLINE_VISIBILITY - discard_block_engine() : __n_(0) {} - _LIBCPP_INLINE_VISIBILITY - explicit discard_block_engine(const _Engine& __e) - : __e_(__e), __n_(0) {} -#ifndef _LIBCPP_CXX03_LANG - _LIBCPP_INLINE_VISIBILITY - explicit discard_block_engine(_Engine&& __e) - : __e_(_VSTD::move(__e)), __n_(0) {} -#endif // _LIBCPP_CXX03_LANG - _LIBCPP_INLINE_VISIBILITY - explicit discard_block_engine(result_type __sd) : __e_(__sd), __n_(0) {} - template<class _Sseq> - _LIBCPP_INLINE_VISIBILITY - explicit discard_block_engine(_Sseq& __q, - typename enable_if<__is_seed_sequence<_Sseq, discard_block_engine>::value && - !is_convertible<_Sseq, _Engine>::value>::type* = 0) - : __e_(__q), __n_(0) {} - _LIBCPP_INLINE_VISIBILITY - void seed() {__e_.seed(); __n_ = 0;} - _LIBCPP_INLINE_VISIBILITY - void seed(result_type __sd) {__e_.seed(__sd); __n_ = 0;} - template<class _Sseq> - _LIBCPP_INLINE_VISIBILITY - typename enable_if - < - __is_seed_sequence<_Sseq, discard_block_engine>::value, - void - >::type - seed(_Sseq& __q) {__e_.seed(__q); __n_ = 0;} - - // generating functions - result_type operator()(); - _LIBCPP_INLINE_VISIBILITY - void discard(unsigned long long __z) {for (; __z; --__z) operator()();} - - // property functions - _LIBCPP_INLINE_VISIBILITY - const _Engine& base() const _NOEXCEPT {return __e_;} - - template<class _Eng, size_t _Pp, size_t _Rp> - friend - bool - operator==( - const discard_block_engine<_Eng, _Pp, _Rp>& __x, - const discard_block_engine<_Eng, _Pp, _Rp>& __y); - - template<class _Eng, size_t _Pp, size_t _Rp> - friend - bool - operator!=( - const discard_block_engine<_Eng, _Pp, _Rp>& __x, - const discard_block_engine<_Eng, _Pp, _Rp>& __y); - - template <class _CharT, class _Traits, - class _Eng, size_t _Pp, size_t _Rp> - friend - basic_ostream<_CharT, _Traits>& - operator<<(basic_ostream<_CharT, _Traits>& __os, - const discard_block_engine<_Eng, _Pp, _Rp>& __x); - - template <class _CharT, class _Traits, - class _Eng, size_t _Pp, size_t _Rp> - friend - basic_istream<_CharT, _Traits>& - operator>>(basic_istream<_CharT, _Traits>& __is, - discard_block_engine<_Eng, _Pp, _Rp>& __x); -}; - -template<class _Engine, size_t __p, size_t __r> - _LIBCPP_CONSTEXPR const size_t discard_block_engine<_Engine, __p, __r>::block_size; - -template<class _Engine, size_t __p, size_t __r> - _LIBCPP_CONSTEXPR const size_t discard_block_engine<_Engine, __p, __r>::used_block; - -template<class _Engine, size_t __p, size_t __r> -typename discard_block_engine<_Engine, __p, __r>::result_type -discard_block_engine<_Engine, __p, __r>::operator()() -{ - if (__n_ >= static_cast<int>(__r)) - { - __e_.discard(__p - __r); - __n_ = 0; - } - ++__n_; - return __e_(); -} - -template<class _Eng, size_t _Pp, size_t _Rp> -inline _LIBCPP_INLINE_VISIBILITY -bool -operator==(const discard_block_engine<_Eng, _Pp, _Rp>& __x, - const discard_block_engine<_Eng, _Pp, _Rp>& __y) -{ - return __x.__n_ == __y.__n_ && __x.__e_ == __y.__e_; -} - -template<class _Eng, size_t _Pp, size_t _Rp> -inline _LIBCPP_INLINE_VISIBILITY -bool -operator!=(const discard_block_engine<_Eng, _Pp, _Rp>& __x, - const discard_block_engine<_Eng, _Pp, _Rp>& __y) -{ - return !(__x == __y); -} - -template <class _CharT, class _Traits, - class _Eng, size_t _Pp, size_t _Rp> -basic_ostream<_CharT, _Traits>& -operator<<(basic_ostream<_CharT, _Traits>& __os, - const discard_block_engine<_Eng, _Pp, _Rp>& __x) -{ - __save_flags<_CharT, _Traits> __lx(__os); - typedef basic_ostream<_CharT, _Traits> _Ostream; - __os.flags(_Ostream::dec | _Ostream::left); - _CharT __sp = __os.widen(' '); - __os.fill(__sp); - return __os << __x.__e_ << __sp << __x.__n_; -} - -template <class _CharT, class _Traits, - class _Eng, size_t _Pp, size_t _Rp> -basic_istream<_CharT, _Traits>& -operator>>(basic_istream<_CharT, _Traits>& __is, - discard_block_engine<_Eng, _Pp, _Rp>& __x) -{ - __save_flags<_CharT, _Traits> __lx(__is); - typedef basic_istream<_CharT, _Traits> _Istream; - __is.flags(_Istream::dec | _Istream::skipws); - _Eng __e; - int __n; - __is >> __e >> __n; - if (!__is.fail()) - { - __x.__e_ = __e; - __x.__n_ = __n; - } - return __is; -} - -typedef discard_block_engine<ranlux24_base, 223, 23> ranlux24; -typedef discard_block_engine<ranlux48_base, 389, 11> ranlux48; - -// independent_bits_engine - -template<class _Engine, size_t __w, class _UIntType> -class _LIBCPP_TEMPLATE_VIS independent_bits_engine -{ - template <class _UInt, _UInt _R0, size_t _Wp, size_t _Mp> - class __get_n - { - static _LIBCPP_CONSTEXPR const size_t _Dt = numeric_limits<_UInt>::digits; - static _LIBCPP_CONSTEXPR const size_t _Np = _Wp / _Mp + (_Wp % _Mp != 0); - static _LIBCPP_CONSTEXPR const size_t _W0 = _Wp / _Np; - static _LIBCPP_CONSTEXPR const _UInt _Y0 = _W0 >= _Dt ? 0 : (_R0 >> _W0) << _W0; - public: - static _LIBCPP_CONSTEXPR const size_t value = _R0 - _Y0 > _Y0 / _Np ? _Np + 1 : _Np; - }; -public: - // types - typedef _UIntType result_type; - -private: - _Engine __e_; - - static _LIBCPP_CONSTEXPR const result_type _Dt = numeric_limits<result_type>::digits; - static_assert( 0 < __w, "independent_bits_engine invalid parameters"); - static_assert(__w <= _Dt, "independent_bits_engine invalid parameters"); - - typedef typename _Engine::result_type _Engine_result_type; - typedef typename conditional - < - sizeof(_Engine_result_type) <= sizeof(result_type), - result_type, - _Engine_result_type - >::type _Working_result_type; -#ifdef _LIBCPP_CXX03_LANG - static const _Working_result_type _Rp = _Engine::_Max - _Engine::_Min - + _Working_result_type(1); -#else - static _LIBCPP_CONSTEXPR const _Working_result_type _Rp = _Engine::max() - _Engine::min() - + _Working_result_type(1); -#endif - static _LIBCPP_CONSTEXPR const size_t __m = __log2<_Working_result_type, _Rp>::value; - static _LIBCPP_CONSTEXPR const size_t __n = __get_n<_Working_result_type, _Rp, __w, __m>::value; - static _LIBCPP_CONSTEXPR const size_t __w0 = __w / __n; - static _LIBCPP_CONSTEXPR const size_t __n0 = __n - __w % __n; - static _LIBCPP_CONSTEXPR const size_t _WDt = numeric_limits<_Working_result_type>::digits; - static _LIBCPP_CONSTEXPR const size_t _EDt = numeric_limits<_Engine_result_type>::digits; - static _LIBCPP_CONSTEXPR const _Working_result_type __y0 = __w0 >= _WDt ? 0 : - (_Rp >> __w0) << __w0; - static _LIBCPP_CONSTEXPR const _Working_result_type __y1 = __w0 >= _WDt - 1 ? 0 : - (_Rp >> (__w0+1)) << (__w0+1); - static _LIBCPP_CONSTEXPR const _Engine_result_type __mask0 = __w0 > 0 ? - _Engine_result_type(~0) >> (_EDt - __w0) : - _Engine_result_type(0); - static _LIBCPP_CONSTEXPR const _Engine_result_type __mask1 = __w0 < _EDt - 1 ? - _Engine_result_type(~0) >> (_EDt - (__w0 + 1)) : - _Engine_result_type(~0); -public: - static _LIBCPP_CONSTEXPR const result_type _Min = 0; - static _LIBCPP_CONSTEXPR const result_type _Max = __w == _Dt ? result_type(~0) : - (result_type(1) << __w) - result_type(1); - static_assert(_Min < _Max, "independent_bits_engine invalid parameters"); - - // engine characteristics - _LIBCPP_INLINE_VISIBILITY - static _LIBCPP_CONSTEXPR result_type min() { return _Min; } - _LIBCPP_INLINE_VISIBILITY - static _LIBCPP_CONSTEXPR result_type max() { return _Max; } - - // constructors and seeding functions - _LIBCPP_INLINE_VISIBILITY - independent_bits_engine() {} - _LIBCPP_INLINE_VISIBILITY - explicit independent_bits_engine(const _Engine& __e) - : __e_(__e) {} -#ifndef _LIBCPP_CXX03_LANG - _LIBCPP_INLINE_VISIBILITY - explicit independent_bits_engine(_Engine&& __e) - : __e_(_VSTD::move(__e)) {} -#endif // _LIBCPP_CXX03_LANG - _LIBCPP_INLINE_VISIBILITY - explicit independent_bits_engine(result_type __sd) : __e_(__sd) {} - template<class _Sseq> - _LIBCPP_INLINE_VISIBILITY - explicit independent_bits_engine(_Sseq& __q, - typename enable_if<__is_seed_sequence<_Sseq, independent_bits_engine>::value && - !is_convertible<_Sseq, _Engine>::value>::type* = 0) - : __e_(__q) {} - _LIBCPP_INLINE_VISIBILITY - void seed() {__e_.seed();} - _LIBCPP_INLINE_VISIBILITY - void seed(result_type __sd) {__e_.seed(__sd);} - template<class _Sseq> - _LIBCPP_INLINE_VISIBILITY - typename enable_if - < - __is_seed_sequence<_Sseq, independent_bits_engine>::value, - void - >::type - seed(_Sseq& __q) {__e_.seed(__q);} - - // generating functions - _LIBCPP_INLINE_VISIBILITY - result_type operator()() {return __eval(integral_constant<bool, _Rp != 0>());} - _LIBCPP_INLINE_VISIBILITY - void discard(unsigned long long __z) {for (; __z; --__z) operator()();} - - // property functions - _LIBCPP_INLINE_VISIBILITY - const _Engine& base() const _NOEXCEPT {return __e_;} - - template<class _Eng, size_t _Wp, class _UInt> - friend - bool - operator==( - const independent_bits_engine<_Eng, _Wp, _UInt>& __x, - const independent_bits_engine<_Eng, _Wp, _UInt>& __y); - - template<class _Eng, size_t _Wp, class _UInt> - friend - bool - operator!=( - const independent_bits_engine<_Eng, _Wp, _UInt>& __x, - const independent_bits_engine<_Eng, _Wp, _UInt>& __y); - - template <class _CharT, class _Traits, - class _Eng, size_t _Wp, class _UInt> - friend - basic_ostream<_CharT, _Traits>& - operator<<(basic_ostream<_CharT, _Traits>& __os, - const independent_bits_engine<_Eng, _Wp, _UInt>& __x); - - template <class _CharT, class _Traits, - class _Eng, size_t _Wp, class _UInt> - friend - basic_istream<_CharT, _Traits>& - operator>>(basic_istream<_CharT, _Traits>& __is, - independent_bits_engine<_Eng, _Wp, _UInt>& __x); - -private: - _LIBCPP_INLINE_VISIBILITY - result_type __eval(false_type); - result_type __eval(true_type); - - template <size_t __count> - _LIBCPP_INLINE_VISIBILITY - static - typename enable_if - < - __count < _Dt, - result_type - >::type - __lshift(result_type __x) {return __x << __count;} - - template <size_t __count> - _LIBCPP_INLINE_VISIBILITY - static - typename enable_if - < - (__count >= _Dt), - result_type - >::type - __lshift(result_type) {return result_type(0);} -}; - -template<class _Engine, size_t __w, class _UIntType> -inline -_UIntType -independent_bits_engine<_Engine, __w, _UIntType>::__eval(false_type) -{ - return static_cast<result_type>(__e_() & __mask0); -} - -template<class _Engine, size_t __w, class _UIntType> -_UIntType -independent_bits_engine<_Engine, __w, _UIntType>::__eval(true_type) -{ - result_type _Sp = 0; - for (size_t __k = 0; __k < __n0; ++__k) - { - _Engine_result_type __u; - do - { - __u = __e_() - _Engine::min(); - } while (__u >= __y0); - _Sp = static_cast<result_type>(__lshift<__w0>(_Sp) + (__u & __mask0)); - } - for (size_t __k = __n0; __k < __n; ++__k) - { - _Engine_result_type __u; - do - { - __u = __e_() - _Engine::min(); - } while (__u >= __y1); - _Sp = static_cast<result_type>(__lshift<__w0+1>(_Sp) + (__u & __mask1)); - } - return _Sp; -} - -template<class _Eng, size_t _Wp, class _UInt> -inline _LIBCPP_INLINE_VISIBILITY -bool -operator==( - const independent_bits_engine<_Eng, _Wp, _UInt>& __x, - const independent_bits_engine<_Eng, _Wp, _UInt>& __y) -{ - return __x.base() == __y.base(); -} - -template<class _Eng, size_t _Wp, class _UInt> -inline _LIBCPP_INLINE_VISIBILITY -bool -operator!=( - const independent_bits_engine<_Eng, _Wp, _UInt>& __x, - const independent_bits_engine<_Eng, _Wp, _UInt>& __y) -{ - return !(__x == __y); -} - -template <class _CharT, class _Traits, - class _Eng, size_t _Wp, class _UInt> -basic_ostream<_CharT, _Traits>& -operator<<(basic_ostream<_CharT, _Traits>& __os, - const independent_bits_engine<_Eng, _Wp, _UInt>& __x) -{ - return __os << __x.base(); -} - -template <class _CharT, class _Traits, - class _Eng, size_t _Wp, class _UInt> -basic_istream<_CharT, _Traits>& -operator>>(basic_istream<_CharT, _Traits>& __is, - independent_bits_engine<_Eng, _Wp, _UInt>& __x) -{ - _Eng __e; - __is >> __e; - if (!__is.fail()) - __x.__e_ = __e; - return __is; -} - -// shuffle_order_engine - -template <uint64_t _Xp, uint64_t _Yp> -struct __ugcd -{ - static _LIBCPP_CONSTEXPR const uint64_t value = __ugcd<_Yp, _Xp % _Yp>::value; -}; - -template <uint64_t _Xp> -struct __ugcd<_Xp, 0> -{ - static _LIBCPP_CONSTEXPR const uint64_t value = _Xp; -}; - -template <uint64_t _Np, uint64_t _Dp> -class __uratio -{ - static_assert(_Dp != 0, "__uratio divide by 0"); - static _LIBCPP_CONSTEXPR const uint64_t __gcd = __ugcd<_Np, _Dp>::value; -public: - static _LIBCPP_CONSTEXPR const uint64_t num = _Np / __gcd; - static _LIBCPP_CONSTEXPR const uint64_t den = _Dp / __gcd; - - typedef __uratio<num, den> type; -}; - -template<class _Engine, size_t __k> -class _LIBCPP_TEMPLATE_VIS shuffle_order_engine -{ - static_assert(0 < __k, "shuffle_order_engine invalid parameters"); -public: - // types - typedef typename _Engine::result_type result_type; - -private: - _Engine __e_; - result_type _V_[__k]; - result_type _Y_; - -public: - // engine characteristics - static _LIBCPP_CONSTEXPR const size_t table_size = __k; - -#ifdef _LIBCPP_CXX03_LANG - static const result_type _Min = _Engine::_Min; - static const result_type _Max = _Engine::_Max; -#else - static _LIBCPP_CONSTEXPR const result_type _Min = _Engine::min(); - static _LIBCPP_CONSTEXPR const result_type _Max = _Engine::max(); -#endif - static_assert(_Min < _Max, "shuffle_order_engine invalid parameters"); - _LIBCPP_INLINE_VISIBILITY - static _LIBCPP_CONSTEXPR result_type min() { return _Min; } - _LIBCPP_INLINE_VISIBILITY - static _LIBCPP_CONSTEXPR result_type max() { return _Max; } - - static _LIBCPP_CONSTEXPR const unsigned long long _Rp = _Max - _Min + 1ull; - - // constructors and seeding functions - _LIBCPP_INLINE_VISIBILITY - shuffle_order_engine() {__init();} - _LIBCPP_INLINE_VISIBILITY - explicit shuffle_order_engine(const _Engine& __e) - : __e_(__e) {__init();} -#ifndef _LIBCPP_CXX03_LANG - _LIBCPP_INLINE_VISIBILITY - explicit shuffle_order_engine(_Engine&& __e) - : __e_(_VSTD::move(__e)) {__init();} -#endif // _LIBCPP_CXX03_LANG - _LIBCPP_INLINE_VISIBILITY - explicit shuffle_order_engine(result_type __sd) : __e_(__sd) {__init();} - template<class _Sseq> - _LIBCPP_INLINE_VISIBILITY - explicit shuffle_order_engine(_Sseq& __q, - typename enable_if<__is_seed_sequence<_Sseq, shuffle_order_engine>::value && - !is_convertible<_Sseq, _Engine>::value>::type* = 0) - : __e_(__q) {__init();} - _LIBCPP_INLINE_VISIBILITY - void seed() {__e_.seed(); __init();} - _LIBCPP_INLINE_VISIBILITY - void seed(result_type __sd) {__e_.seed(__sd); __init();} - template<class _Sseq> - _LIBCPP_INLINE_VISIBILITY - typename enable_if - < - __is_seed_sequence<_Sseq, shuffle_order_engine>::value, - void - >::type - seed(_Sseq& __q) {__e_.seed(__q); __init();} - - // generating functions - _LIBCPP_INLINE_VISIBILITY - result_type operator()() {return __eval(integral_constant<bool, _Rp != 0>());} - _LIBCPP_INLINE_VISIBILITY - void discard(unsigned long long __z) {for (; __z; --__z) operator()();} - - // property functions - _LIBCPP_INLINE_VISIBILITY - const _Engine& base() const _NOEXCEPT {return __e_;} - -private: - template<class _Eng, size_t _Kp> - friend - bool - operator==( - const shuffle_order_engine<_Eng, _Kp>& __x, - const shuffle_order_engine<_Eng, _Kp>& __y); - - template<class _Eng, size_t _Kp> - friend - bool - operator!=( - const shuffle_order_engine<_Eng, _Kp>& __x, - const shuffle_order_engine<_Eng, _Kp>& __y); - - template <class _CharT, class _Traits, - class _Eng, size_t _Kp> - friend - basic_ostream<_CharT, _Traits>& - operator<<(basic_ostream<_CharT, _Traits>& __os, - const shuffle_order_engine<_Eng, _Kp>& __x); - - template <class _CharT, class _Traits, - class _Eng, size_t _Kp> - friend - basic_istream<_CharT, _Traits>& - operator>>(basic_istream<_CharT, _Traits>& __is, - shuffle_order_engine<_Eng, _Kp>& __x); - - _LIBCPP_INLINE_VISIBILITY - void __init() - { - for (size_t __i = 0; __i < __k; ++__i) - _V_[__i] = __e_(); - _Y_ = __e_(); - } - - _LIBCPP_INLINE_VISIBILITY - result_type __eval(false_type) {return __eval2(integral_constant<bool, __k & 1>());} - _LIBCPP_INLINE_VISIBILITY - result_type __eval(true_type) {return __eval(__uratio<__k, _Rp>());} - - _LIBCPP_INLINE_VISIBILITY - result_type __eval2(false_type) {return __eval(__uratio<__k/2, 0x8000000000000000ull>());} - _LIBCPP_INLINE_VISIBILITY - result_type __eval2(true_type) {return __evalf<__k, 0>();} - - template <uint64_t _Np, uint64_t _Dp> - _LIBCPP_INLINE_VISIBILITY - typename enable_if - < - (__uratio<_Np, _Dp>::num > 0xFFFFFFFFFFFFFFFFull / (_Max - _Min)), - result_type - >::type - __eval(__uratio<_Np, _Dp>) - {return __evalf<__uratio<_Np, _Dp>::num, __uratio<_Np, _Dp>::den>();} - - template <uint64_t _Np, uint64_t _Dp> - _LIBCPP_INLINE_VISIBILITY - typename enable_if - < - __uratio<_Np, _Dp>::num <= 0xFFFFFFFFFFFFFFFFull / (_Max - _Min), - result_type - >::type - __eval(__uratio<_Np, _Dp>) - { - const size_t __j = static_cast<size_t>(__uratio<_Np, _Dp>::num * (_Y_ - _Min) - / __uratio<_Np, _Dp>::den); - _Y_ = _V_[__j]; - _V_[__j] = __e_(); - return _Y_; - } - - template <uint64_t __n, uint64_t __d> - _LIBCPP_INLINE_VISIBILITY - result_type __evalf() - { - const double _Fp = __d == 0 ? - __n / (2. * 0x8000000000000000ull) : - __n / (double)__d; - const size_t __j = static_cast<size_t>(_Fp * (_Y_ - _Min)); - _Y_ = _V_[__j]; - _V_[__j] = __e_(); - return _Y_; - } -}; - -template<class _Engine, size_t __k> - _LIBCPP_CONSTEXPR const size_t shuffle_order_engine<_Engine, __k>::table_size; - -template<class _Eng, size_t _Kp> -bool -operator==( - const shuffle_order_engine<_Eng, _Kp>& __x, - const shuffle_order_engine<_Eng, _Kp>& __y) -{ - return __x._Y_ == __y._Y_ && _VSTD::equal(__x._V_, __x._V_ + _Kp, __y._V_) && - __x.__e_ == __y.__e_; -} - -template<class _Eng, size_t _Kp> -inline _LIBCPP_INLINE_VISIBILITY -bool -operator!=( - const shuffle_order_engine<_Eng, _Kp>& __x, - const shuffle_order_engine<_Eng, _Kp>& __y) -{ - return !(__x == __y); -} - -template <class _CharT, class _Traits, - class _Eng, size_t _Kp> -basic_ostream<_CharT, _Traits>& -operator<<(basic_ostream<_CharT, _Traits>& __os, - const shuffle_order_engine<_Eng, _Kp>& __x) -{ - __save_flags<_CharT, _Traits> __lx(__os); - typedef basic_ostream<_CharT, _Traits> _Ostream; - __os.flags(_Ostream::dec | _Ostream::left); - _CharT __sp = __os.widen(' '); - __os.fill(__sp); - __os << __x.__e_ << __sp << __x._V_[0]; - for (size_t __i = 1; __i < _Kp; ++__i) - __os << __sp << __x._V_[__i]; - return __os << __sp << __x._Y_; -} - -template <class _CharT, class _Traits, - class _Eng, size_t _Kp> -basic_istream<_CharT, _Traits>& -operator>>(basic_istream<_CharT, _Traits>& __is, - shuffle_order_engine<_Eng, _Kp>& __x) -{ - typedef typename shuffle_order_engine<_Eng, _Kp>::result_type result_type; - __save_flags<_CharT, _Traits> __lx(__is); - typedef basic_istream<_CharT, _Traits> _Istream; - __is.flags(_Istream::dec | _Istream::skipws); - _Eng __e; - result_type _Vp[_Kp+1]; - __is >> __e; - for (size_t __i = 0; __i < _Kp+1; ++__i) - __is >> _Vp[__i]; - if (!__is.fail()) - { - __x.__e_ = __e; - for (size_t __i = 0; __i < _Kp; ++__i) - __x._V_[__i] = _Vp[__i]; - __x._Y_ = _Vp[_Kp]; - } - return __is; -} - -typedef shuffle_order_engine<minstd_rand0, 256> knuth_b; - -// random_device - -#if !defined(_LIBCPP_HAS_NO_RANDOM_DEVICE) - -class _LIBCPP_TYPE_VIS random_device -{ -#ifdef _LIBCPP_USING_DEV_RANDOM - int __f_; -#endif // defined(_LIBCPP_USING_DEV_RANDOM) -public: - // types - typedef unsigned result_type; - - // generator characteristics - static _LIBCPP_CONSTEXPR const result_type _Min = 0; - static _LIBCPP_CONSTEXPR const result_type _Max = 0xFFFFFFFFu; - - _LIBCPP_INLINE_VISIBILITY - static _LIBCPP_CONSTEXPR result_type min() { return _Min;} - _LIBCPP_INLINE_VISIBILITY - static _LIBCPP_CONSTEXPR result_type max() { return _Max;} - - // constructors -#ifndef _LIBCPP_CXX03_LANG - random_device() : random_device("/dev/urandom") {} - explicit random_device(const string& __token); -#else - explicit random_device(const string& __token = "/dev/urandom"); -#endif - ~random_device(); - - // generating functions - result_type operator()(); - - // property functions - double entropy() const _NOEXCEPT; - -private: - // no copy functions - random_device(const random_device&); // = delete; - random_device& operator=(const random_device&); // = delete; -}; - -#endif // !_LIBCPP_HAS_NO_RANDOM_DEVICE - -// seed_seq - -class _LIBCPP_TEMPLATE_VIS seed_seq -{ -public: - // types - typedef uint32_t result_type; - -private: - vector<result_type> __v_; - - template<class _InputIterator> - void init(_InputIterator __first, _InputIterator __last); -public: - // constructors - _LIBCPP_INLINE_VISIBILITY - seed_seq() _NOEXCEPT {} -#ifndef _LIBCPP_CXX03_LANG - template<class _Tp> - _LIBCPP_INLINE_VISIBILITY - seed_seq(initializer_list<_Tp> __il) {init(__il.begin(), __il.end());} -#endif // _LIBCPP_CXX03_LANG - - template<class _InputIterator> - _LIBCPP_INLINE_VISIBILITY - seed_seq(_InputIterator __first, _InputIterator __last) - {init(__first, __last);} - - // generating functions - template<class _RandomAccessIterator> - void generate(_RandomAccessIterator __first, _RandomAccessIterator __last); - - // property functions - _LIBCPP_INLINE_VISIBILITY - size_t size() const _NOEXCEPT {return __v_.size();} - template<class _OutputIterator> - _LIBCPP_INLINE_VISIBILITY - void param(_OutputIterator __dest) const - {_VSTD::copy(__v_.begin(), __v_.end(), __dest);} - -private: - // no copy functions - seed_seq(const seed_seq&); // = delete; - void operator=(const seed_seq&); // = delete; - - _LIBCPP_INLINE_VISIBILITY - static result_type _Tp(result_type __x) {return __x ^ (__x >> 27);} -}; - -template<class _InputIterator> -void -seed_seq::init(_InputIterator __first, _InputIterator __last) -{ - for (_InputIterator __s = __first; __s != __last; ++__s) - __v_.push_back(*__s & 0xFFFFFFFF); -} - -template<class _RandomAccessIterator> -void -seed_seq::generate(_RandomAccessIterator __first, _RandomAccessIterator __last) -{ - if (__first != __last) - { - _VSTD::fill(__first, __last, 0x8b8b8b8b); - const size_t __n = static_cast<size_t>(__last - __first); - const size_t __s = __v_.size(); - const size_t __t = (__n >= 623) ? 11 - : (__n >= 68) ? 7 - : (__n >= 39) ? 5 - : (__n >= 7) ? 3 - : (__n - 1) / 2; - const size_t __p = (__n - __t) / 2; - const size_t __q = __p + __t; - const size_t __m = _VSTD::max(__s + 1, __n); - // __k = 0; - { - result_type __r = 1664525 * _Tp(__first[0] ^ __first[__p] - ^ __first[__n - 1]); - __first[__p] += __r; - __r += __s; - __first[__q] += __r; - __first[0] = __r; - } - for (size_t __k = 1; __k <= __s; ++__k) - { - const size_t __kmodn = __k % __n; - const size_t __kpmodn = (__k + __p) % __n; - result_type __r = 1664525 * _Tp(__first[__kmodn] ^ __first[__kpmodn] - ^ __first[(__k - 1) % __n]); - __first[__kpmodn] += __r; - __r += __kmodn + __v_[__k-1]; - __first[(__k + __q) % __n] += __r; - __first[__kmodn] = __r; - } - for (size_t __k = __s + 1; __k < __m; ++__k) - { - const size_t __kmodn = __k % __n; - const size_t __kpmodn = (__k + __p) % __n; - result_type __r = 1664525 * _Tp(__first[__kmodn] ^ __first[__kpmodn] - ^ __first[(__k - 1) % __n]); - __first[__kpmodn] += __r; - __r += __kmodn; - __first[(__k + __q) % __n] += __r; - __first[__kmodn] = __r; - } - for (size_t __k = __m; __k < __m + __n; ++__k) - { - const size_t __kmodn = __k % __n; - const size_t __kpmodn = (__k + __p) % __n; - result_type __r = 1566083941 * _Tp(__first[__kmodn] + - __first[__kpmodn] + - __first[(__k - 1) % __n]); - __first[__kpmodn] ^= __r; - __r -= __kmodn; - __first[(__k + __q) % __n] ^= __r; - __first[__kmodn] = __r; - } - } -} - -// generate_canonical - -template<class _RealType, size_t __bits, class _URNG> -_RealType -generate_canonical(_URNG& __g) -{ - const size_t _Dt = numeric_limits<_RealType>::digits; - const size_t __b = _Dt < __bits ? _Dt : __bits; -#ifdef _LIBCPP_CXX03_LANG - const size_t __logR = __log2<uint64_t, _URNG::_Max - _URNG::_Min + uint64_t(1)>::value; -#else - const size_t __logR = __log2<uint64_t, _URNG::max() - _URNG::min() + uint64_t(1)>::value; -#endif - const size_t __k = __b / __logR + (__b % __logR != 0) + (__b == 0); - const _RealType _Rp = static_cast<_RealType>(_URNG::max() - _URNG::min()) + _RealType(1); - _RealType __base = _Rp; - _RealType _Sp = __g() - _URNG::min(); - for (size_t __i = 1; __i < __k; ++__i, __base *= _Rp) - _Sp += (__g() - _URNG::min()) * __base; - return _Sp / __base; -} - -// uniform_real_distribution - -template<class _RealType = double> -class _LIBCPP_TEMPLATE_VIS uniform_real_distribution -{ -public: - // types - typedef _RealType result_type; - - class _LIBCPP_TEMPLATE_VIS param_type - { - result_type __a_; - result_type __b_; - public: - typedef uniform_real_distribution distribution_type; - - _LIBCPP_INLINE_VISIBILITY - explicit param_type(result_type __a = 0, - result_type __b = 1) - : __a_(__a), __b_(__b) {} - - _LIBCPP_INLINE_VISIBILITY - result_type a() const {return __a_;} - _LIBCPP_INLINE_VISIBILITY - result_type b() const {return __b_;} - - friend _LIBCPP_INLINE_VISIBILITY - bool operator==(const param_type& __x, const param_type& __y) - {return __x.__a_ == __y.__a_ && __x.__b_ == __y.__b_;} - friend _LIBCPP_INLINE_VISIBILITY - bool operator!=(const param_type& __x, const param_type& __y) - {return !(__x == __y);} - }; - -private: - param_type __p_; - -public: - // constructors and reset functions -#ifndef _LIBCPP_CXX03_LANG - _LIBCPP_INLINE_VISIBILITY - uniform_real_distribution() : uniform_real_distribution(0) {} - explicit uniform_real_distribution(result_type __a, result_type __b = 1) - : __p_(param_type(__a, __b)) {} -#else - _LIBCPP_INLINE_VISIBILITY - explicit uniform_real_distribution(result_type __a = 0, result_type __b = 1) - : __p_(param_type(__a, __b)) {} -#endif - _LIBCPP_INLINE_VISIBILITY - explicit uniform_real_distribution(const param_type& __p) : __p_(__p) {} - _LIBCPP_INLINE_VISIBILITY - void reset() {} - - // generating functions - template<class _URNG> - _LIBCPP_INLINE_VISIBILITY - result_type operator()(_URNG& __g) - {return (*this)(__g, __p_);} - template<class _URNG> _LIBCPP_INLINE_VISIBILITY result_type operator()(_URNG& __g, const param_type& __p); - - // property functions - _LIBCPP_INLINE_VISIBILITY - result_type a() const {return __p_.a();} - _LIBCPP_INLINE_VISIBILITY - result_type b() const {return __p_.b();} - - _LIBCPP_INLINE_VISIBILITY - param_type param() const {return __p_;} - _LIBCPP_INLINE_VISIBILITY - void param(const param_type& __p) {__p_ = __p;} - - _LIBCPP_INLINE_VISIBILITY - result_type min() const {return a();} - _LIBCPP_INLINE_VISIBILITY - result_type max() const {return b();} - - friend _LIBCPP_INLINE_VISIBILITY - bool operator==(const uniform_real_distribution& __x, - const uniform_real_distribution& __y) - {return __x.__p_ == __y.__p_;} - friend _LIBCPP_INLINE_VISIBILITY - bool operator!=(const uniform_real_distribution& __x, - const uniform_real_distribution& __y) - {return !(__x == __y);} -}; - -template<class _RealType> -template<class _URNG> -inline -typename uniform_real_distribution<_RealType>::result_type -uniform_real_distribution<_RealType>::operator()(_URNG& __g, const param_type& __p) -{ - return (__p.b() - __p.a()) - * _VSTD::generate_canonical<_RealType, numeric_limits<_RealType>::digits>(__g) - + __p.a(); -} - -template <class _CharT, class _Traits, class _RT> -basic_ostream<_CharT, _Traits>& -operator<<(basic_ostream<_CharT, _Traits>& __os, - const uniform_real_distribution<_RT>& __x) -{ - __save_flags<_CharT, _Traits> __lx(__os); - typedef basic_ostream<_CharT, _Traits> _OStream; - __os.flags(_OStream::dec | _OStream::left | _OStream::fixed | - _OStream::scientific); - _CharT __sp = __os.widen(' '); - __os.fill(__sp); - return __os << __x.a() << __sp << __x.b(); -} - -template <class _CharT, class _Traits, class _RT> -basic_istream<_CharT, _Traits>& -operator>>(basic_istream<_CharT, _Traits>& __is, - uniform_real_distribution<_RT>& __x) -{ - typedef uniform_real_distribution<_RT> _Eng; - typedef typename _Eng::result_type result_type; - typedef typename _Eng::param_type param_type; - __save_flags<_CharT, _Traits> __lx(__is); - typedef basic_istream<_CharT, _Traits> _Istream; - __is.flags(_Istream::dec | _Istream::skipws); - result_type __a; - result_type __b; - __is >> __a >> __b; - if (!__is.fail()) - __x.param(param_type(__a, __b)); - return __is; -} - -// bernoulli_distribution - -class _LIBCPP_TEMPLATE_VIS bernoulli_distribution -{ -public: - // types - typedef bool result_type; - - class _LIBCPP_TEMPLATE_VIS param_type - { - double __p_; - public: - typedef bernoulli_distribution distribution_type; - - _LIBCPP_INLINE_VISIBILITY - explicit param_type(double __p = 0.5) : __p_(__p) {} - - _LIBCPP_INLINE_VISIBILITY - double p() const {return __p_;} - - friend _LIBCPP_INLINE_VISIBILITY - bool operator==(const param_type& __x, const param_type& __y) - {return __x.__p_ == __y.__p_;} - friend _LIBCPP_INLINE_VISIBILITY - bool operator!=(const param_type& __x, const param_type& __y) - {return !(__x == __y);} - }; - -private: - param_type __p_; - -public: - // constructors and reset functions -#ifndef _LIBCPP_CXX03_LANG - _LIBCPP_INLINE_VISIBILITY - bernoulli_distribution() : bernoulli_distribution(0.5) {} - _LIBCPP_INLINE_VISIBILITY - explicit bernoulli_distribution(double __p) : __p_(param_type(__p)) {} -#else - _LIBCPP_INLINE_VISIBILITY - explicit bernoulli_distribution(double __p = 0.5) : __p_(param_type(__p)) {} -#endif - _LIBCPP_INLINE_VISIBILITY - explicit bernoulli_distribution(const param_type& __p) : __p_(__p) {} - _LIBCPP_INLINE_VISIBILITY - void reset() {} - - // generating functions - template<class _URNG> - _LIBCPP_INLINE_VISIBILITY - result_type operator()(_URNG& __g) - {return (*this)(__g, __p_);} - template<class _URNG> _LIBCPP_INLINE_VISIBILITY result_type operator()(_URNG& __g, const param_type& __p); - - // property functions - _LIBCPP_INLINE_VISIBILITY - double p() const {return __p_.p();} - - _LIBCPP_INLINE_VISIBILITY - param_type param() const {return __p_;} - _LIBCPP_INLINE_VISIBILITY - void param(const param_type& __p) {__p_ = __p;} - - _LIBCPP_INLINE_VISIBILITY - result_type min() const {return false;} - _LIBCPP_INLINE_VISIBILITY - result_type max() const {return true;} - - friend _LIBCPP_INLINE_VISIBILITY - bool operator==(const bernoulli_distribution& __x, - const bernoulli_distribution& __y) - {return __x.__p_ == __y.__p_;} - friend _LIBCPP_INLINE_VISIBILITY - bool operator!=(const bernoulli_distribution& __x, - const bernoulli_distribution& __y) - {return !(__x == __y);} -}; - -template<class _URNG> -inline -bernoulli_distribution::result_type -bernoulli_distribution::operator()(_URNG& __g, const param_type& __p) -{ - uniform_real_distribution<double> __gen; - return __gen(__g) < __p.p(); -} - -template <class _CharT, class _Traits> -basic_ostream<_CharT, _Traits>& -operator<<(basic_ostream<_CharT, _Traits>& __os, const bernoulli_distribution& __x) -{ - __save_flags<_CharT, _Traits> __lx(__os); - typedef basic_ostream<_CharT, _Traits> _OStream; - __os.flags(_OStream::dec | _OStream::left | _OStream::fixed | - _OStream::scientific); - _CharT __sp = __os.widen(' '); - __os.fill(__sp); - return __os << __x.p(); -} - -template <class _CharT, class _Traits> -basic_istream<_CharT, _Traits>& -operator>>(basic_istream<_CharT, _Traits>& __is, bernoulli_distribution& __x) -{ - typedef bernoulli_distribution _Eng; - typedef typename _Eng::param_type param_type; - __save_flags<_CharT, _Traits> __lx(__is); - typedef basic_istream<_CharT, _Traits> _Istream; - __is.flags(_Istream::dec | _Istream::skipws); - double __p; - __is >> __p; - if (!__is.fail()) - __x.param(param_type(__p)); - return __is; -} - -// binomial_distribution - -template<class _IntType = int> -class _LIBCPP_TEMPLATE_VIS binomial_distribution -{ -public: - // types - typedef _IntType result_type; - - class _LIBCPP_TEMPLATE_VIS param_type - { - result_type __t_; - double __p_; - double __pr_; - double __odds_ratio_; - result_type __r0_; - public: - typedef binomial_distribution distribution_type; - - explicit param_type(result_type __t = 1, double __p = 0.5); - - _LIBCPP_INLINE_VISIBILITY - result_type t() const {return __t_;} - _LIBCPP_INLINE_VISIBILITY - double p() const {return __p_;} - - friend _LIBCPP_INLINE_VISIBILITY - bool operator==(const param_type& __x, const param_type& __y) - {return __x.__t_ == __y.__t_ && __x.__p_ == __y.__p_;} - friend _LIBCPP_INLINE_VISIBILITY - bool operator!=(const param_type& __x, const param_type& __y) - {return !(__x == __y);} - - friend class binomial_distribution; - }; - -private: - param_type __p_; - -public: - // constructors and reset functions -#ifndef _LIBCPP_CXX03_LANG - _LIBCPP_INLINE_VISIBILITY - binomial_distribution() : binomial_distribution(1) {} - _LIBCPP_INLINE_VISIBILITY - explicit binomial_distribution(result_type __t, double __p = 0.5) - : __p_(param_type(__t, __p)) {} -#else - _LIBCPP_INLINE_VISIBILITY - explicit binomial_distribution(result_type __t = 1, double __p = 0.5) - : __p_(param_type(__t, __p)) {} -#endif - _LIBCPP_INLINE_VISIBILITY - explicit binomial_distribution(const param_type& __p) : __p_(__p) {} - _LIBCPP_INLINE_VISIBILITY - void reset() {} - - // generating functions - template<class _URNG> - _LIBCPP_INLINE_VISIBILITY - result_type operator()(_URNG& __g) - {return (*this)(__g, __p_);} - template<class _URNG> result_type operator()(_URNG& __g, const param_type& __p); - - // property functions - _LIBCPP_INLINE_VISIBILITY - result_type t() const {return __p_.t();} - _LIBCPP_INLINE_VISIBILITY - double p() const {return __p_.p();} - - _LIBCPP_INLINE_VISIBILITY - param_type param() const {return __p_;} - _LIBCPP_INLINE_VISIBILITY - void param(const param_type& __p) {__p_ = __p;} - - _LIBCPP_INLINE_VISIBILITY - result_type min() const {return 0;} - _LIBCPP_INLINE_VISIBILITY - result_type max() const {return t();} - - friend _LIBCPP_INLINE_VISIBILITY - bool operator==(const binomial_distribution& __x, - const binomial_distribution& __y) - {return __x.__p_ == __y.__p_;} - friend _LIBCPP_INLINE_VISIBILITY - bool operator!=(const binomial_distribution& __x, - const binomial_distribution& __y) - {return !(__x == __y);} -}; - -#ifndef _LIBCPP_MSVCRT_LIKE -extern "C" double lgamma_r(double, int *); -#endif - -inline _LIBCPP_INLINE_VISIBILITY double __libcpp_lgamma(double __d) { -#if defined(_LIBCPP_MSVCRT_LIKE) - return lgamma(__d); -#else - int __sign; - return lgamma_r(__d, &__sign); -#endif -} - -template<class _IntType> -binomial_distribution<_IntType>::param_type::param_type(result_type __t, double __p) - : __t_(__t), __p_(__p) -{ - if (0 < __p_ && __p_ < 1) - { - __r0_ = static_cast<result_type>((__t_ + 1) * __p_); - __pr_ = _VSTD::exp(__libcpp_lgamma(__t_ + 1.) - - __libcpp_lgamma(__r0_ + 1.) - - __libcpp_lgamma(__t_ - __r0_ + 1.) + __r0_ * _VSTD::log(__p_) + - (__t_ - __r0_) * _VSTD::log(1 - __p_)); - __odds_ratio_ = __p_ / (1 - __p_); - } -} - -// Reference: Kemp, C.D. (1986). `A modal method for generating binomial -// variables', Commun. Statist. - Theor. Meth. 15(3), 805-813. -template<class _IntType> -template<class _URNG> -_IntType -binomial_distribution<_IntType>::operator()(_URNG& __g, const param_type& __pr) -{ - if (__pr.__t_ == 0 || __pr.__p_ == 0) - return 0; - if (__pr.__p_ == 1) - return __pr.__t_; - uniform_real_distribution<double> __gen; - double __u = __gen(__g) - __pr.__pr_; - if (__u < 0) - return __pr.__r0_; - double __pu = __pr.__pr_; - double __pd = __pu; - result_type __ru = __pr.__r0_; - result_type __rd = __ru; - while (true) - { - bool __break = true; - if (__rd >= 1) - { - __pd *= __rd / (__pr.__odds_ratio_ * (__pr.__t_ - __rd + 1)); - __u -= __pd; - __break = false; - if (__u < 0) - return __rd - 1; - } - if ( __rd != 0 ) - --__rd; - ++__ru; - if (__ru <= __pr.__t_) - { - __pu *= (__pr.__t_ - __ru + 1) * __pr.__odds_ratio_ / __ru; - __u -= __pu; - __break = false; - if (__u < 0) - return __ru; - } - if (__break) - return 0; - } -} - -template <class _CharT, class _Traits, class _IntType> -basic_ostream<_CharT, _Traits>& -operator<<(basic_ostream<_CharT, _Traits>& __os, - const binomial_distribution<_IntType>& __x) -{ - __save_flags<_CharT, _Traits> __lx(__os); - typedef basic_ostream<_CharT, _Traits> _OStream; - __os.flags(_OStream::dec | _OStream::left | _OStream::fixed | - _OStream::scientific); - _CharT __sp = __os.widen(' '); - __os.fill(__sp); - return __os << __x.t() << __sp << __x.p(); -} - -template <class _CharT, class _Traits, class _IntType> -basic_istream<_CharT, _Traits>& -operator>>(basic_istream<_CharT, _Traits>& __is, - binomial_distribution<_IntType>& __x) -{ - typedef binomial_distribution<_IntType> _Eng; - typedef typename _Eng::result_type result_type; - typedef typename _Eng::param_type param_type; - __save_flags<_CharT, _Traits> __lx(__is); - typedef basic_istream<_CharT, _Traits> _Istream; - __is.flags(_Istream::dec | _Istream::skipws); - result_type __t; - double __p; - __is >> __t >> __p; - if (!__is.fail()) - __x.param(param_type(__t, __p)); - return __is; -} - -// exponential_distribution - -template<class _RealType = double> -class _LIBCPP_TEMPLATE_VIS exponential_distribution -{ -public: - // types - typedef _RealType result_type; - - class _LIBCPP_TEMPLATE_VIS param_type - { - result_type __lambda_; - public: - typedef exponential_distribution distribution_type; - - _LIBCPP_INLINE_VISIBILITY - explicit param_type(result_type __lambda = 1) : __lambda_(__lambda) {} - - _LIBCPP_INLINE_VISIBILITY - result_type lambda() const {return __lambda_;} - - friend _LIBCPP_INLINE_VISIBILITY - bool operator==(const param_type& __x, const param_type& __y) - {return __x.__lambda_ == __y.__lambda_;} - friend _LIBCPP_INLINE_VISIBILITY - bool operator!=(const param_type& __x, const param_type& __y) - {return !(__x == __y);} - }; - -private: - param_type __p_; - -public: - // constructors and reset functions -#ifndef _LIBCPP_CXX03_LANG - _LIBCPP_INLINE_VISIBILITY - exponential_distribution() : exponential_distribution(1) {} - _LIBCPP_INLINE_VISIBILITY - explicit exponential_distribution(result_type __lambda) - : __p_(param_type(__lambda)) {} -#else - _LIBCPP_INLINE_VISIBILITY - explicit exponential_distribution(result_type __lambda = 1) - : __p_(param_type(__lambda)) {} -#endif - _LIBCPP_INLINE_VISIBILITY - explicit exponential_distribution(const param_type& __p) : __p_(__p) {} - _LIBCPP_INLINE_VISIBILITY - void reset() {} - - // generating functions - template<class _URNG> - _LIBCPP_INLINE_VISIBILITY - result_type operator()(_URNG& __g) - {return (*this)(__g, __p_);} - template<class _URNG> result_type operator()(_URNG& __g, const param_type& __p); - - // property functions - _LIBCPP_INLINE_VISIBILITY - result_type lambda() const {return __p_.lambda();} - - _LIBCPP_INLINE_VISIBILITY - param_type param() const {return __p_;} - _LIBCPP_INLINE_VISIBILITY - void param(const param_type& __p) {__p_ = __p;} - - _LIBCPP_INLINE_VISIBILITY - result_type min() const {return 0;} - _LIBCPP_INLINE_VISIBILITY - result_type max() const {return numeric_limits<result_type>::infinity();} - - friend _LIBCPP_INLINE_VISIBILITY - bool operator==(const exponential_distribution& __x, - const exponential_distribution& __y) - {return __x.__p_ == __y.__p_;} - friend _LIBCPP_INLINE_VISIBILITY - bool operator!=(const exponential_distribution& __x, - const exponential_distribution& __y) - {return !(__x == __y);} -}; - -template <class _RealType> -template<class _URNG> -_RealType -exponential_distribution<_RealType>::operator()(_URNG& __g, const param_type& __p) -{ - return -_VSTD::log - ( - result_type(1) - - _VSTD::generate_canonical<result_type, - numeric_limits<result_type>::digits>(__g) - ) - / __p.lambda(); -} - -template <class _CharT, class _Traits, class _RealType> -basic_ostream<_CharT, _Traits>& -operator<<(basic_ostream<_CharT, _Traits>& __os, - const exponential_distribution<_RealType>& __x) -{ - __save_flags<_CharT, _Traits> __lx(__os); - typedef basic_ostream<_CharT, _Traits> _OStream; - __os.flags(_OStream::dec | _OStream::left | _OStream::fixed | - _OStream::scientific); - return __os << __x.lambda(); -} - -template <class _CharT, class _Traits, class _RealType> -basic_istream<_CharT, _Traits>& -operator>>(basic_istream<_CharT, _Traits>& __is, - exponential_distribution<_RealType>& __x) -{ - typedef exponential_distribution<_RealType> _Eng; - typedef typename _Eng::result_type result_type; - typedef typename _Eng::param_type param_type; - __save_flags<_CharT, _Traits> __lx(__is); - typedef basic_istream<_CharT, _Traits> _Istream; - __is.flags(_Istream::dec | _Istream::skipws); - result_type __lambda; - __is >> __lambda; - if (!__is.fail()) - __x.param(param_type(__lambda)); - return __is; -} - -// normal_distribution - -template<class _RealType = double> -class _LIBCPP_TEMPLATE_VIS normal_distribution -{ -public: - // types - typedef _RealType result_type; - - class _LIBCPP_TEMPLATE_VIS param_type - { - result_type __mean_; - result_type __stddev_; - public: - typedef normal_distribution distribution_type; - - _LIBCPP_INLINE_VISIBILITY - explicit param_type(result_type __mean = 0, result_type __stddev = 1) - : __mean_(__mean), __stddev_(__stddev) {} - - _LIBCPP_INLINE_VISIBILITY - result_type mean() const {return __mean_;} - _LIBCPP_INLINE_VISIBILITY - result_type stddev() const {return __stddev_;} - - friend _LIBCPP_INLINE_VISIBILITY - bool operator==(const param_type& __x, const param_type& __y) - {return __x.__mean_ == __y.__mean_ && __x.__stddev_ == __y.__stddev_;} - friend _LIBCPP_INLINE_VISIBILITY - bool operator!=(const param_type& __x, const param_type& __y) - {return !(__x == __y);} - }; - -private: - param_type __p_; - result_type _V_; - bool _V_hot_; - -public: - // constructors and reset functions -#ifndef _LIBCPP_CXX03_LANG - _LIBCPP_INLINE_VISIBILITY - normal_distribution() : normal_distribution(0) {} - _LIBCPP_INLINE_VISIBILITY - explicit normal_distribution(result_type __mean, result_type __stddev = 1) - : __p_(param_type(__mean, __stddev)), _V_hot_(false) {} -#else - _LIBCPP_INLINE_VISIBILITY - explicit normal_distribution(result_type __mean = 0, - result_type __stddev = 1) - : __p_(param_type(__mean, __stddev)), _V_hot_(false) {} -#endif - _LIBCPP_INLINE_VISIBILITY - explicit normal_distribution(const param_type& __p) - : __p_(__p), _V_hot_(false) {} - _LIBCPP_INLINE_VISIBILITY - void reset() {_V_hot_ = false;} - - // generating functions - template<class _URNG> - _LIBCPP_INLINE_VISIBILITY - result_type operator()(_URNG& __g) - {return (*this)(__g, __p_);} - template<class _URNG> result_type operator()(_URNG& __g, const param_type& __p); - - // property functions - _LIBCPP_INLINE_VISIBILITY - result_type mean() const {return __p_.mean();} - _LIBCPP_INLINE_VISIBILITY - result_type stddev() const {return __p_.stddev();} - - _LIBCPP_INLINE_VISIBILITY - param_type param() const {return __p_;} - _LIBCPP_INLINE_VISIBILITY - void param(const param_type& __p) {__p_ = __p;} - - _LIBCPP_INLINE_VISIBILITY - result_type min() const {return -numeric_limits<result_type>::infinity();} - _LIBCPP_INLINE_VISIBILITY - result_type max() const {return numeric_limits<result_type>::infinity();} - - friend _LIBCPP_INLINE_VISIBILITY - bool operator==(const normal_distribution& __x, - const normal_distribution& __y) - {return __x.__p_ == __y.__p_ && __x._V_hot_ == __y._V_hot_ && - (!__x._V_hot_ || __x._V_ == __y._V_);} - friend _LIBCPP_INLINE_VISIBILITY - bool operator!=(const normal_distribution& __x, - const normal_distribution& __y) - {return !(__x == __y);} - - template <class _CharT, class _Traits, class _RT> - friend - basic_ostream<_CharT, _Traits>& - operator<<(basic_ostream<_CharT, _Traits>& __os, - const normal_distribution<_RT>& __x); - - template <class _CharT, class _Traits, class _RT> - friend - basic_istream<_CharT, _Traits>& - operator>>(basic_istream<_CharT, _Traits>& __is, - normal_distribution<_RT>& __x); -}; - -template <class _RealType> -template<class _URNG> -_RealType -normal_distribution<_RealType>::operator()(_URNG& __g, const param_type& __p) -{ - result_type _Up; - if (_V_hot_) - { - _V_hot_ = false; - _Up = _V_; - } - else - { - uniform_real_distribution<result_type> _Uni(-1, 1); - result_type __u; - result_type __v; - result_type __s; - do - { - __u = _Uni(__g); - __v = _Uni(__g); - __s = __u * __u + __v * __v; - } while (__s > 1 || __s == 0); - result_type _Fp = _VSTD::sqrt(-2 * _VSTD::log(__s) / __s); - _V_ = __v * _Fp; - _V_hot_ = true; - _Up = __u * _Fp; - } - return _Up * __p.stddev() + __p.mean(); -} - -template <class _CharT, class _Traits, class _RT> -basic_ostream<_CharT, _Traits>& -operator<<(basic_ostream<_CharT, _Traits>& __os, - const normal_distribution<_RT>& __x) -{ - __save_flags<_CharT, _Traits> __lx(__os); - typedef basic_ostream<_CharT, _Traits> _OStream; - __os.flags(_OStream::dec | _OStream::left | _OStream::fixed | - _OStream::scientific); - _CharT __sp = __os.widen(' '); - __os.fill(__sp); - __os << __x.mean() << __sp << __x.stddev() << __sp << __x._V_hot_; - if (__x._V_hot_) - __os << __sp << __x._V_; - return __os; -} - -template <class _CharT, class _Traits, class _RT> -basic_istream<_CharT, _Traits>& -operator>>(basic_istream<_CharT, _Traits>& __is, - normal_distribution<_RT>& __x) -{ - typedef normal_distribution<_RT> _Eng; - typedef typename _Eng::result_type result_type; - typedef typename _Eng::param_type param_type; - __save_flags<_CharT, _Traits> __lx(__is); - typedef basic_istream<_CharT, _Traits> _Istream; - __is.flags(_Istream::dec | _Istream::skipws); - result_type __mean; - result_type __stddev; - result_type _Vp = 0; - bool _V_hot = false; - __is >> __mean >> __stddev >> _V_hot; - if (_V_hot) - __is >> _Vp; - if (!__is.fail()) - { - __x.param(param_type(__mean, __stddev)); - __x._V_hot_ = _V_hot; - __x._V_ = _Vp; - } - return __is; -} - -// lognormal_distribution - -template<class _RealType = double> -class _LIBCPP_TEMPLATE_VIS lognormal_distribution -{ -public: - // types - typedef _RealType result_type; - - class _LIBCPP_TEMPLATE_VIS param_type - { - normal_distribution<result_type> __nd_; - public: - typedef lognormal_distribution distribution_type; - - _LIBCPP_INLINE_VISIBILITY - explicit param_type(result_type __m = 0, result_type __s = 1) - : __nd_(__m, __s) {} - - _LIBCPP_INLINE_VISIBILITY - result_type m() const {return __nd_.mean();} - _LIBCPP_INLINE_VISIBILITY - result_type s() const {return __nd_.stddev();} - - friend _LIBCPP_INLINE_VISIBILITY - bool operator==(const param_type& __x, const param_type& __y) - {return __x.__nd_ == __y.__nd_;} - friend _LIBCPP_INLINE_VISIBILITY - bool operator!=(const param_type& __x, const param_type& __y) - {return !(__x == __y);} - friend class lognormal_distribution; - - template <class _CharT, class _Traits, class _RT> - friend - basic_ostream<_CharT, _Traits>& - operator<<(basic_ostream<_CharT, _Traits>& __os, - const lognormal_distribution<_RT>& __x); - - template <class _CharT, class _Traits, class _RT> - friend - basic_istream<_CharT, _Traits>& - operator>>(basic_istream<_CharT, _Traits>& __is, - lognormal_distribution<_RT>& __x); - }; - -private: - param_type __p_; - -public: - // constructor and reset functions -#ifndef _LIBCPP_CXX03_LANG - _LIBCPP_INLINE_VISIBILITY - lognormal_distribution() : lognormal_distribution(0) {} - _LIBCPP_INLINE_VISIBILITY - explicit lognormal_distribution(result_type __m, result_type __s = 1) - : __p_(param_type(__m, __s)) {} -#else - _LIBCPP_INLINE_VISIBILITY - explicit lognormal_distribution(result_type __m = 0, - result_type __s = 1) - : __p_(param_type(__m, __s)) {} -#endif - _LIBCPP_INLINE_VISIBILITY - explicit lognormal_distribution(const param_type& __p) - : __p_(__p) {} - _LIBCPP_INLINE_VISIBILITY - void reset() {__p_.__nd_.reset();} - - // generating functions - template<class _URNG> - _LIBCPP_INLINE_VISIBILITY - result_type operator()(_URNG& __g) - {return (*this)(__g, __p_);} - template<class _URNG> - _LIBCPP_INLINE_VISIBILITY - result_type operator()(_URNG& __g, const param_type& __p) - {return _VSTD::exp(const_cast<normal_distribution<result_type>&>(__p.__nd_)(__g));} - - // property functions - _LIBCPP_INLINE_VISIBILITY - result_type m() const {return __p_.m();} - _LIBCPP_INLINE_VISIBILITY - result_type s() const {return __p_.s();} - - _LIBCPP_INLINE_VISIBILITY - param_type param() const {return __p_;} - _LIBCPP_INLINE_VISIBILITY - void param(const param_type& __p) {__p_ = __p;} - - _LIBCPP_INLINE_VISIBILITY - result_type min() const {return 0;} - _LIBCPP_INLINE_VISIBILITY - result_type max() const {return numeric_limits<result_type>::infinity();} - - friend _LIBCPP_INLINE_VISIBILITY - bool operator==(const lognormal_distribution& __x, - const lognormal_distribution& __y) - {return __x.__p_ == __y.__p_;} - friend _LIBCPP_INLINE_VISIBILITY - bool operator!=(const lognormal_distribution& __x, - const lognormal_distribution& __y) - {return !(__x == __y);} - - template <class _CharT, class _Traits, class _RT> - friend - basic_ostream<_CharT, _Traits>& - operator<<(basic_ostream<_CharT, _Traits>& __os, - const lognormal_distribution<_RT>& __x); - - template <class _CharT, class _Traits, class _RT> - friend - basic_istream<_CharT, _Traits>& - operator>>(basic_istream<_CharT, _Traits>& __is, - lognormal_distribution<_RT>& __x); -}; - -template <class _CharT, class _Traits, class _RT> -inline _LIBCPP_INLINE_VISIBILITY -basic_ostream<_CharT, _Traits>& -operator<<(basic_ostream<_CharT, _Traits>& __os, - const lognormal_distribution<_RT>& __x) -{ - return __os << __x.__p_.__nd_; -} - -template <class _CharT, class _Traits, class _RT> -inline _LIBCPP_INLINE_VISIBILITY -basic_istream<_CharT, _Traits>& -operator>>(basic_istream<_CharT, _Traits>& __is, - lognormal_distribution<_RT>& __x) -{ - return __is >> __x.__p_.__nd_; -} - -// poisson_distribution - -template<class _IntType = int> -class _LIBCPP_TEMPLATE_VIS poisson_distribution -{ -public: - // types - typedef _IntType result_type; - - class _LIBCPP_TEMPLATE_VIS param_type - { - double __mean_; - double __s_; - double __d_; - double __l_; - double __omega_; - double __c0_; - double __c1_; - double __c2_; - double __c3_; - double __c_; - - public: - typedef poisson_distribution distribution_type; - - explicit param_type(double __mean = 1.0); - - _LIBCPP_INLINE_VISIBILITY - double mean() const {return __mean_;} - - friend _LIBCPP_INLINE_VISIBILITY - bool operator==(const param_type& __x, const param_type& __y) - {return __x.__mean_ == __y.__mean_;} - friend _LIBCPP_INLINE_VISIBILITY - bool operator!=(const param_type& __x, const param_type& __y) - {return !(__x == __y);} - - friend class poisson_distribution; - }; - -private: - param_type __p_; - -public: - // constructors and reset functions -#ifndef _LIBCPP_CXX03_LANG - _LIBCPP_INLINE_VISIBILITY - poisson_distribution() : poisson_distribution(1.0) {} - _LIBCPP_INLINE_VISIBILITY - explicit poisson_distribution(double __mean) - : __p_(__mean) {} -#else - _LIBCPP_INLINE_VISIBILITY - explicit poisson_distribution(double __mean = 1.0) - : __p_(__mean) {} -#endif - _LIBCPP_INLINE_VISIBILITY - explicit poisson_distribution(const param_type& __p) : __p_(__p) {} - _LIBCPP_INLINE_VISIBILITY - void reset() {} - - // generating functions - template<class _URNG> - _LIBCPP_INLINE_VISIBILITY - result_type operator()(_URNG& __g) - {return (*this)(__g, __p_);} - template<class _URNG> result_type operator()(_URNG& __g, const param_type& __p); - - // property functions - _LIBCPP_INLINE_VISIBILITY - double mean() const {return __p_.mean();} - - _LIBCPP_INLINE_VISIBILITY - param_type param() const {return __p_;} - _LIBCPP_INLINE_VISIBILITY - void param(const param_type& __p) {__p_ = __p;} - - _LIBCPP_INLINE_VISIBILITY - result_type min() const {return 0;} - _LIBCPP_INLINE_VISIBILITY - result_type max() const {return numeric_limits<result_type>::max();} - - friend _LIBCPP_INLINE_VISIBILITY - bool operator==(const poisson_distribution& __x, - const poisson_distribution& __y) - {return __x.__p_ == __y.__p_;} - friend _LIBCPP_INLINE_VISIBILITY - bool operator!=(const poisson_distribution& __x, - const poisson_distribution& __y) - {return !(__x == __y);} -}; - -template<class _IntType> -poisson_distribution<_IntType>::param_type::param_type(double __mean) - // According to the standard `inf` is a valid input, but it causes the - // distribution to hang, so we replace it with the maximum representable - // mean. - : __mean_(isinf(__mean) ? numeric_limits<double>::max() : __mean) -{ - if (__mean_ < 10) - { - __s_ = 0; - __d_ = 0; - __l_ = _VSTD::exp(-__mean_); - __omega_ = 0; - __c3_ = 0; - __c2_ = 0; - __c1_ = 0; - __c0_ = 0; - __c_ = 0; - } - else - { - __s_ = _VSTD::sqrt(__mean_); - __d_ = 6 * __mean_ * __mean_; - __l_ = _VSTD::trunc(__mean_ - 1.1484); - __omega_ = .3989423 / __s_; - double __b1_ = .4166667E-1 / __mean_; - double __b2_ = .3 * __b1_ * __b1_; - __c3_ = .1428571 * __b1_ * __b2_; - __c2_ = __b2_ - 15. * __c3_; - __c1_ = __b1_ - 6. * __b2_ + 45. * __c3_; - __c0_ = 1. - __b1_ + 3. * __b2_ - 15. * __c3_; - __c_ = .1069 / __mean_; - } -} - -template <class _IntType> -template<class _URNG> -_IntType -poisson_distribution<_IntType>::operator()(_URNG& __urng, const param_type& __pr) -{ - double __tx; - uniform_real_distribution<double> __urd; - if (__pr.__mean_ < 10) - { - __tx = 0; - for (double __p = __urd(__urng); __p > __pr.__l_; ++__tx) - __p *= __urd(__urng); - } - else - { - double __difmuk; - double __g = __pr.__mean_ + __pr.__s_ * normal_distribution<double>()(__urng); - double __u; - if (__g > 0) - { - __tx = _VSTD::trunc(__g); - if (__tx >= __pr.__l_) - return _VSTD::__clamp_to_integral<result_type>(__tx); - __difmuk = __pr.__mean_ - __tx; - __u = __urd(__urng); - if (__pr.__d_ * __u >= __difmuk * __difmuk * __difmuk) - return _VSTD::__clamp_to_integral<result_type>(__tx); - } - exponential_distribution<double> __edist; - for (bool __using_exp_dist = false; true; __using_exp_dist = true) - { - double __e; - if (__using_exp_dist || __g <= 0) - { - double __t; - do - { - __e = __edist(__urng); - __u = __urd(__urng); - __u += __u - 1; - __t = 1.8 + (__u < 0 ? -__e : __e); - } while (__t <= -.6744); - __tx = _VSTD::trunc(__pr.__mean_ + __pr.__s_ * __t); - __difmuk = __pr.__mean_ - __tx; - __using_exp_dist = true; - } - double __px; - double __py; - if (__tx < 10 && __tx >= 0) - { - const double __fac[] = {1, 1, 2, 6, 24, 120, 720, 5040, - 40320, 362880}; - __px = -__pr.__mean_; - __py = _VSTD::pow(__pr.__mean_, (double)__tx) / __fac[static_cast<int>(__tx)]; - } - else - { - double __del = .8333333E-1 / __tx; - __del -= 4.8 * __del * __del * __del; - double __v = __difmuk / __tx; - if (_VSTD::abs(__v) > 0.25) - __px = __tx * _VSTD::log(1 + __v) - __difmuk - __del; - else - __px = __tx * __v * __v * (((((((.1250060 * __v + -.1384794) * - __v + .1421878) * __v + -.1661269) * __v + .2000118) * - __v + -.2500068) * __v + .3333333) * __v + -.5) - __del; - __py = .3989423 / _VSTD::sqrt(__tx); - } - double __r = (0.5 - __difmuk) / __pr.__s_; - double __r2 = __r * __r; - double __fx = -0.5 * __r2; - double __fy = __pr.__omega_ * (((__pr.__c3_ * __r2 + __pr.__c2_) * - __r2 + __pr.__c1_) * __r2 + __pr.__c0_); - if (__using_exp_dist) - { - if (__pr.__c_ * _VSTD::abs(__u) <= __py * _VSTD::exp(__px + __e) - - __fy * _VSTD::exp(__fx + __e)) - break; - } - else - { - if (__fy - __u * __fy <= __py * _VSTD::exp(__px - __fx)) - break; - } - } - } - return _VSTD::__clamp_to_integral<result_type>(__tx); -} - -template <class _CharT, class _Traits, class _IntType> -basic_ostream<_CharT, _Traits>& -operator<<(basic_ostream<_CharT, _Traits>& __os, - const poisson_distribution<_IntType>& __x) -{ - __save_flags<_CharT, _Traits> __lx(__os); - typedef basic_ostream<_CharT, _Traits> _OStream; - __os.flags(_OStream::dec | _OStream::left | _OStream::fixed | - _OStream::scientific); - return __os << __x.mean(); -} - -template <class _CharT, class _Traits, class _IntType> -basic_istream<_CharT, _Traits>& -operator>>(basic_istream<_CharT, _Traits>& __is, - poisson_distribution<_IntType>& __x) -{ - typedef poisson_distribution<_IntType> _Eng; - typedef typename _Eng::param_type param_type; - __save_flags<_CharT, _Traits> __lx(__is); - typedef basic_istream<_CharT, _Traits> _Istream; - __is.flags(_Istream::dec | _Istream::skipws); - double __mean; - __is >> __mean; - if (!__is.fail()) - __x.param(param_type(__mean)); - return __is; -} - -// weibull_distribution - -template<class _RealType = double> -class _LIBCPP_TEMPLATE_VIS weibull_distribution -{ -public: - // types - typedef _RealType result_type; - - class _LIBCPP_TEMPLATE_VIS param_type - { - result_type __a_; - result_type __b_; - public: - typedef weibull_distribution distribution_type; - - _LIBCPP_INLINE_VISIBILITY - explicit param_type(result_type __a = 1, result_type __b = 1) - : __a_(__a), __b_(__b) {} - - _LIBCPP_INLINE_VISIBILITY - result_type a() const {return __a_;} - _LIBCPP_INLINE_VISIBILITY - result_type b() const {return __b_;} - - friend _LIBCPP_INLINE_VISIBILITY - bool operator==(const param_type& __x, const param_type& __y) - {return __x.__a_ == __y.__a_ && __x.__b_ == __y.__b_;} - friend _LIBCPP_INLINE_VISIBILITY - bool operator!=(const param_type& __x, const param_type& __y) - {return !(__x == __y);} - }; - -private: - param_type __p_; - -public: - // constructor and reset functions -#ifndef _LIBCPP_CXX03_LANG - _LIBCPP_INLINE_VISIBILITY - weibull_distribution() : weibull_distribution(1) {} - _LIBCPP_INLINE_VISIBILITY - explicit weibull_distribution(result_type __a, result_type __b = 1) - : __p_(param_type(__a, __b)) {} -#else - _LIBCPP_INLINE_VISIBILITY - explicit weibull_distribution(result_type __a = 1, result_type __b = 1) - : __p_(param_type(__a, __b)) {} -#endif - _LIBCPP_INLINE_VISIBILITY - explicit weibull_distribution(const param_type& __p) - : __p_(__p) {} - _LIBCPP_INLINE_VISIBILITY - void reset() {} - - // generating functions - template<class _URNG> - _LIBCPP_INLINE_VISIBILITY - result_type operator()(_URNG& __g) - {return (*this)(__g, __p_);} - template<class _URNG> - _LIBCPP_INLINE_VISIBILITY - result_type operator()(_URNG& __g, const param_type& __p) - {return __p.b() * - _VSTD::pow(exponential_distribution<result_type>()(__g), 1/__p.a());} - - // property functions - _LIBCPP_INLINE_VISIBILITY - result_type a() const {return __p_.a();} - _LIBCPP_INLINE_VISIBILITY - result_type b() const {return __p_.b();} - - _LIBCPP_INLINE_VISIBILITY - param_type param() const {return __p_;} - _LIBCPP_INLINE_VISIBILITY - void param(const param_type& __p) {__p_ = __p;} - - _LIBCPP_INLINE_VISIBILITY - result_type min() const {return 0;} - _LIBCPP_INLINE_VISIBILITY - result_type max() const {return numeric_limits<result_type>::infinity();} - - friend _LIBCPP_INLINE_VISIBILITY - bool operator==(const weibull_distribution& __x, - const weibull_distribution& __y) - {return __x.__p_ == __y.__p_;} - friend _LIBCPP_INLINE_VISIBILITY - bool operator!=(const weibull_distribution& __x, - const weibull_distribution& __y) - {return !(__x == __y);} -}; - -template <class _CharT, class _Traits, class _RT> -basic_ostream<_CharT, _Traits>& -operator<<(basic_ostream<_CharT, _Traits>& __os, - const weibull_distribution<_RT>& __x) -{ - __save_flags<_CharT, _Traits> __lx(__os); - typedef basic_ostream<_CharT, _Traits> _OStream; - __os.flags(_OStream::dec | _OStream::left | _OStream::fixed | - _OStream::scientific); - _CharT __sp = __os.widen(' '); - __os.fill(__sp); - __os << __x.a() << __sp << __x.b(); - return __os; -} - -template <class _CharT, class _Traits, class _RT> -basic_istream<_CharT, _Traits>& -operator>>(basic_istream<_CharT, _Traits>& __is, - weibull_distribution<_RT>& __x) -{ - typedef weibull_distribution<_RT> _Eng; - typedef typename _Eng::result_type result_type; - typedef typename _Eng::param_type param_type; - __save_flags<_CharT, _Traits> __lx(__is); - typedef basic_istream<_CharT, _Traits> _Istream; - __is.flags(_Istream::dec | _Istream::skipws); - result_type __a; - result_type __b; - __is >> __a >> __b; - if (!__is.fail()) - __x.param(param_type(__a, __b)); - return __is; -} - -template<class _RealType = double> -class _LIBCPP_TEMPLATE_VIS extreme_value_distribution -{ -public: - // types - typedef _RealType result_type; - - class _LIBCPP_TEMPLATE_VIS param_type - { - result_type __a_; - result_type __b_; - public: - typedef extreme_value_distribution distribution_type; - - _LIBCPP_INLINE_VISIBILITY - explicit param_type(result_type __a = 0, result_type __b = 1) - : __a_(__a), __b_(__b) {} - - _LIBCPP_INLINE_VISIBILITY - result_type a() const {return __a_;} - _LIBCPP_INLINE_VISIBILITY - result_type b() const {return __b_;} - - friend _LIBCPP_INLINE_VISIBILITY - bool operator==(const param_type& __x, const param_type& __y) - {return __x.__a_ == __y.__a_ && __x.__b_ == __y.__b_;} - friend _LIBCPP_INLINE_VISIBILITY - bool operator!=(const param_type& __x, const param_type& __y) - {return !(__x == __y);} - }; - -private: - param_type __p_; - -public: - // constructor and reset functions -#ifndef _LIBCPP_CXX03_LANG - _LIBCPP_INLINE_VISIBILITY - extreme_value_distribution() : extreme_value_distribution(0) {} - _LIBCPP_INLINE_VISIBILITY - explicit extreme_value_distribution(result_type __a, result_type __b = 1) - : __p_(param_type(__a, __b)) {} -#else - _LIBCPP_INLINE_VISIBILITY - explicit extreme_value_distribution(result_type __a = 0, - result_type __b = 1) - : __p_(param_type(__a, __b)) {} -#endif - _LIBCPP_INLINE_VISIBILITY - explicit extreme_value_distribution(const param_type& __p) - : __p_(__p) {} - _LIBCPP_INLINE_VISIBILITY - void reset() {} - - // generating functions - template<class _URNG> - _LIBCPP_INLINE_VISIBILITY - result_type operator()(_URNG& __g) - {return (*this)(__g, __p_);} - template<class _URNG> result_type operator()(_URNG& __g, const param_type& __p); - - // property functions - _LIBCPP_INLINE_VISIBILITY - result_type a() const {return __p_.a();} - _LIBCPP_INLINE_VISIBILITY - result_type b() const {return __p_.b();} - - _LIBCPP_INLINE_VISIBILITY - param_type param() const {return __p_;} - _LIBCPP_INLINE_VISIBILITY - void param(const param_type& __p) {__p_ = __p;} - - _LIBCPP_INLINE_VISIBILITY - result_type min() const {return -numeric_limits<result_type>::infinity();} - _LIBCPP_INLINE_VISIBILITY - result_type max() const {return numeric_limits<result_type>::infinity();} - - friend _LIBCPP_INLINE_VISIBILITY - bool operator==(const extreme_value_distribution& __x, - const extreme_value_distribution& __y) - {return __x.__p_ == __y.__p_;} - friend _LIBCPP_INLINE_VISIBILITY - bool operator!=(const extreme_value_distribution& __x, - const extreme_value_distribution& __y) - {return !(__x == __y);} -}; - -template<class _RealType> -template<class _URNG> -_RealType -extreme_value_distribution<_RealType>::operator()(_URNG& __g, const param_type& __p) -{ - return __p.a() - __p.b() * - _VSTD::log(-_VSTD::log(1-uniform_real_distribution<result_type>()(__g))); -} - -template <class _CharT, class _Traits, class _RT> -basic_ostream<_CharT, _Traits>& -operator<<(basic_ostream<_CharT, _Traits>& __os, - const extreme_value_distribution<_RT>& __x) -{ - __save_flags<_CharT, _Traits> __lx(__os); - typedef basic_ostream<_CharT, _Traits> _OStream; - __os.flags(_OStream::dec | _OStream::left | _OStream::fixed | - _OStream::scientific); - _CharT __sp = __os.widen(' '); - __os.fill(__sp); - __os << __x.a() << __sp << __x.b(); - return __os; -} - -template <class _CharT, class _Traits, class _RT> -basic_istream<_CharT, _Traits>& -operator>>(basic_istream<_CharT, _Traits>& __is, - extreme_value_distribution<_RT>& __x) -{ - typedef extreme_value_distribution<_RT> _Eng; - typedef typename _Eng::result_type result_type; - typedef typename _Eng::param_type param_type; - __save_flags<_CharT, _Traits> __lx(__is); - typedef basic_istream<_CharT, _Traits> _Istream; - __is.flags(_Istream::dec | _Istream::skipws); - result_type __a; - result_type __b; - __is >> __a >> __b; - if (!__is.fail()) - __x.param(param_type(__a, __b)); - return __is; -} - -// gamma_distribution - -template<class _RealType = double> -class _LIBCPP_TEMPLATE_VIS gamma_distribution -{ -public: - // types - typedef _RealType result_type; - - class _LIBCPP_TEMPLATE_VIS param_type - { - result_type __alpha_; - result_type __beta_; - public: - typedef gamma_distribution distribution_type; - - _LIBCPP_INLINE_VISIBILITY - explicit param_type(result_type __alpha = 1, result_type __beta = 1) - : __alpha_(__alpha), __beta_(__beta) {} - - _LIBCPP_INLINE_VISIBILITY - result_type alpha() const {return __alpha_;} - _LIBCPP_INLINE_VISIBILITY - result_type beta() const {return __beta_;} - - friend _LIBCPP_INLINE_VISIBILITY - bool operator==(const param_type& __x, const param_type& __y) - {return __x.__alpha_ == __y.__alpha_ && __x.__beta_ == __y.__beta_;} - friend _LIBCPP_INLINE_VISIBILITY - bool operator!=(const param_type& __x, const param_type& __y) - {return !(__x == __y);} - }; - -private: - param_type __p_; - -public: - // constructors and reset functions -#ifndef _LIBCPP_CXX03_LANG - _LIBCPP_INLINE_VISIBILITY - gamma_distribution() : gamma_distribution(1) {} - _LIBCPP_INLINE_VISIBILITY - explicit gamma_distribution(result_type __alpha, result_type __beta = 1) - : __p_(param_type(__alpha, __beta)) {} -#else - _LIBCPP_INLINE_VISIBILITY - explicit gamma_distribution(result_type __alpha = 1, - result_type __beta = 1) - : __p_(param_type(__alpha, __beta)) {} -#endif - _LIBCPP_INLINE_VISIBILITY - explicit gamma_distribution(const param_type& __p) - : __p_(__p) {} - _LIBCPP_INLINE_VISIBILITY - void reset() {} - - // generating functions - template<class _URNG> - _LIBCPP_INLINE_VISIBILITY - result_type operator()(_URNG& __g) - {return (*this)(__g, __p_);} - template<class _URNG> result_type operator()(_URNG& __g, const param_type& __p); - - // property functions - _LIBCPP_INLINE_VISIBILITY - result_type alpha() const {return __p_.alpha();} - _LIBCPP_INLINE_VISIBILITY - result_type beta() const {return __p_.beta();} - - _LIBCPP_INLINE_VISIBILITY - param_type param() const {return __p_;} - _LIBCPP_INLINE_VISIBILITY - void param(const param_type& __p) {__p_ = __p;} - - _LIBCPP_INLINE_VISIBILITY - result_type min() const {return 0;} - _LIBCPP_INLINE_VISIBILITY - result_type max() const {return numeric_limits<result_type>::infinity();} - - friend _LIBCPP_INLINE_VISIBILITY - bool operator==(const gamma_distribution& __x, - const gamma_distribution& __y) - {return __x.__p_ == __y.__p_;} - friend _LIBCPP_INLINE_VISIBILITY - bool operator!=(const gamma_distribution& __x, - const gamma_distribution& __y) - {return !(__x == __y);} -}; - -template <class _RealType> -template<class _URNG> -_RealType -gamma_distribution<_RealType>::operator()(_URNG& __g, const param_type& __p) -{ - result_type __a = __p.alpha(); - uniform_real_distribution<result_type> __gen(0, 1); - exponential_distribution<result_type> __egen; - result_type __x; - if (__a == 1) - __x = __egen(__g); - else if (__a > 1) - { - const result_type __b = __a - 1; - const result_type __c = 3 * __a - result_type(0.75); - while (true) - { - const result_type __u = __gen(__g); - const result_type __v = __gen(__g); - const result_type __w = __u * (1 - __u); - if (__w != 0) - { - const result_type __y = _VSTD::sqrt(__c / __w) * - (__u - result_type(0.5)); - __x = __b + __y; - if (__x >= 0) - { - const result_type __z = 64 * __w * __w * __w * __v * __v; - if (__z <= 1 - 2 * __y * __y / __x) - break; - if (_VSTD::log(__z) <= 2 * (__b * _VSTD::log(__x / __b) - __y)) - break; - } - } - } - } - else // __a < 1 - { - while (true) - { - const result_type __u = __gen(__g); - const result_type __es = __egen(__g); - if (__u <= 1 - __a) - { - __x = _VSTD::pow(__u, 1 / __a); - if (__x <= __es) - break; - } - else - { - const result_type __e = -_VSTD::log((1-__u)/__a); - __x = _VSTD::pow(1 - __a + __a * __e, 1 / __a); - if (__x <= __e + __es) - break; - } - } - } - return __x * __p.beta(); -} - -template <class _CharT, class _Traits, class _RT> -basic_ostream<_CharT, _Traits>& -operator<<(basic_ostream<_CharT, _Traits>& __os, - const gamma_distribution<_RT>& __x) -{ - __save_flags<_CharT, _Traits> __lx(__os); - typedef basic_ostream<_CharT, _Traits> _OStream; - __os.flags(_OStream::dec | _OStream::left | _OStream::fixed | - _OStream::scientific); - _CharT __sp = __os.widen(' '); - __os.fill(__sp); - __os << __x.alpha() << __sp << __x.beta(); - return __os; -} - -template <class _CharT, class _Traits, class _RT> -basic_istream<_CharT, _Traits>& -operator>>(basic_istream<_CharT, _Traits>& __is, - gamma_distribution<_RT>& __x) -{ - typedef gamma_distribution<_RT> _Eng; - typedef typename _Eng::result_type result_type; - typedef typename _Eng::param_type param_type; - __save_flags<_CharT, _Traits> __lx(__is); - typedef basic_istream<_CharT, _Traits> _Istream; - __is.flags(_Istream::dec | _Istream::skipws); - result_type __alpha; - result_type __beta; - __is >> __alpha >> __beta; - if (!__is.fail()) - __x.param(param_type(__alpha, __beta)); - return __is; -} - -// negative_binomial_distribution - -template<class _IntType = int> -class _LIBCPP_TEMPLATE_VIS negative_binomial_distribution -{ -public: - // types - typedef _IntType result_type; - - class _LIBCPP_TEMPLATE_VIS param_type - { - result_type __k_; - double __p_; - public: - typedef negative_binomial_distribution distribution_type; - - _LIBCPP_INLINE_VISIBILITY - explicit param_type(result_type __k = 1, double __p = 0.5) - : __k_(__k), __p_(__p) {} - - _LIBCPP_INLINE_VISIBILITY - result_type k() const {return __k_;} - _LIBCPP_INLINE_VISIBILITY - double p() const {return __p_;} - - friend _LIBCPP_INLINE_VISIBILITY - bool operator==(const param_type& __x, const param_type& __y) - {return __x.__k_ == __y.__k_ && __x.__p_ == __y.__p_;} - friend _LIBCPP_INLINE_VISIBILITY - bool operator!=(const param_type& __x, const param_type& __y) - {return !(__x == __y);} - }; - -private: - param_type __p_; - -public: - // constructor and reset functions -#ifndef _LIBCPP_CXX03_LANG - _LIBCPP_INLINE_VISIBILITY - negative_binomial_distribution() : negative_binomial_distribution(1) {} - _LIBCPP_INLINE_VISIBILITY - explicit negative_binomial_distribution(result_type __k, double __p = 0.5) - : __p_(__k, __p) {} -#else - _LIBCPP_INLINE_VISIBILITY - explicit negative_binomial_distribution(result_type __k = 1, - double __p = 0.5) - : __p_(__k, __p) {} -#endif - _LIBCPP_INLINE_VISIBILITY - explicit negative_binomial_distribution(const param_type& __p) : __p_(__p) {} - _LIBCPP_INLINE_VISIBILITY - void reset() {} - - // generating functions - template<class _URNG> - _LIBCPP_INLINE_VISIBILITY - result_type operator()(_URNG& __g) - {return (*this)(__g, __p_);} - template<class _URNG> result_type operator()(_URNG& __g, const param_type& __p); - - // property functions - _LIBCPP_INLINE_VISIBILITY - result_type k() const {return __p_.k();} - _LIBCPP_INLINE_VISIBILITY - double p() const {return __p_.p();} - - _LIBCPP_INLINE_VISIBILITY - param_type param() const {return __p_;} - _LIBCPP_INLINE_VISIBILITY - void param(const param_type& __p) {__p_ = __p;} - - _LIBCPP_INLINE_VISIBILITY - result_type min() const {return 0;} - _LIBCPP_INLINE_VISIBILITY - result_type max() const {return numeric_limits<result_type>::max();} - - friend _LIBCPP_INLINE_VISIBILITY - bool operator==(const negative_binomial_distribution& __x, - const negative_binomial_distribution& __y) - {return __x.__p_ == __y.__p_;} - friend _LIBCPP_INLINE_VISIBILITY - bool operator!=(const negative_binomial_distribution& __x, - const negative_binomial_distribution& __y) - {return !(__x == __y);} -}; - -template <class _IntType> -template<class _URNG> -_IntType -negative_binomial_distribution<_IntType>::operator()(_URNG& __urng, const param_type& __pr) -{ - result_type __k = __pr.k(); - double __p = __pr.p(); - if (__k <= 21 * __p) - { - bernoulli_distribution __gen(__p); - result_type __f = 0; - result_type __s = 0; - while (__s < __k) - { - if (__gen(__urng)) - ++__s; - else - ++__f; - } - return __f; - } - return poisson_distribution<result_type>(gamma_distribution<double> - (__k, (1-__p)/__p)(__urng))(__urng); -} - -template <class _CharT, class _Traits, class _IntType> -basic_ostream<_CharT, _Traits>& -operator<<(basic_ostream<_CharT, _Traits>& __os, - const negative_binomial_distribution<_IntType>& __x) -{ - __save_flags<_CharT, _Traits> __lx(__os); - typedef basic_ostream<_CharT, _Traits> _OStream; - __os.flags(_OStream::dec | _OStream::left | _OStream::fixed | - _OStream::scientific); - _CharT __sp = __os.widen(' '); - __os.fill(__sp); - return __os << __x.k() << __sp << __x.p(); -} - -template <class _CharT, class _Traits, class _IntType> -basic_istream<_CharT, _Traits>& -operator>>(basic_istream<_CharT, _Traits>& __is, - negative_binomial_distribution<_IntType>& __x) -{ - typedef negative_binomial_distribution<_IntType> _Eng; - typedef typename _Eng::result_type result_type; - typedef typename _Eng::param_type param_type; - __save_flags<_CharT, _Traits> __lx(__is); - typedef basic_istream<_CharT, _Traits> _Istream; - __is.flags(_Istream::dec | _Istream::skipws); - result_type __k; - double __p; - __is >> __k >> __p; - if (!__is.fail()) - __x.param(param_type(__k, __p)); - return __is; -} - -// geometric_distribution - -template<class _IntType = int> -class _LIBCPP_TEMPLATE_VIS geometric_distribution -{ -public: - // types - typedef _IntType result_type; - - class _LIBCPP_TEMPLATE_VIS param_type - { - double __p_; - public: - typedef geometric_distribution distribution_type; - - _LIBCPP_INLINE_VISIBILITY - explicit param_type(double __p = 0.5) : __p_(__p) {} - - _LIBCPP_INLINE_VISIBILITY - double p() const {return __p_;} - - friend _LIBCPP_INLINE_VISIBILITY - bool operator==(const param_type& __x, const param_type& __y) - {return __x.__p_ == __y.__p_;} - friend _LIBCPP_INLINE_VISIBILITY - bool operator!=(const param_type& __x, const param_type& __y) - {return !(__x == __y);} - }; - -private: - param_type __p_; - -public: - // constructors and reset functions -#ifndef _LIBCPP_CXX03_LANG - _LIBCPP_INLINE_VISIBILITY - geometric_distribution() : geometric_distribution(0.5) {} - _LIBCPP_INLINE_VISIBILITY - explicit geometric_distribution(double __p) - : __p_(__p) {} -#else - _LIBCPP_INLINE_VISIBILITY - explicit geometric_distribution(double __p = 0.5) - : __p_(__p) {} -#endif - _LIBCPP_INLINE_VISIBILITY - explicit geometric_distribution(const param_type& __p) : __p_(__p) {} - _LIBCPP_INLINE_VISIBILITY - void reset() {} - - // generating functions - template<class _URNG> - _LIBCPP_INLINE_VISIBILITY - result_type operator()(_URNG& __g) - {return (*this)(__g, __p_);} - template<class _URNG> - _LIBCPP_INLINE_VISIBILITY - result_type operator()(_URNG& __g, const param_type& __p) - {return negative_binomial_distribution<result_type>(1, __p.p())(__g);} - - // property functions - _LIBCPP_INLINE_VISIBILITY - double p() const {return __p_.p();} - - _LIBCPP_INLINE_VISIBILITY - param_type param() const {return __p_;} - _LIBCPP_INLINE_VISIBILITY - void param(const param_type& __p) {__p_ = __p;} - - _LIBCPP_INLINE_VISIBILITY - result_type min() const {return 0;} - _LIBCPP_INLINE_VISIBILITY - result_type max() const {return numeric_limits<result_type>::max();} - - friend _LIBCPP_INLINE_VISIBILITY - bool operator==(const geometric_distribution& __x, - const geometric_distribution& __y) - {return __x.__p_ == __y.__p_;} - friend _LIBCPP_INLINE_VISIBILITY - bool operator!=(const geometric_distribution& __x, - const geometric_distribution& __y) - {return !(__x == __y);} -}; - -template <class _CharT, class _Traits, class _IntType> -basic_ostream<_CharT, _Traits>& -operator<<(basic_ostream<_CharT, _Traits>& __os, - const geometric_distribution<_IntType>& __x) -{ - __save_flags<_CharT, _Traits> __lx(__os); - typedef basic_ostream<_CharT, _Traits> _OStream; - __os.flags(_OStream::dec | _OStream::left | _OStream::fixed | - _OStream::scientific); - return __os << __x.p(); -} - -template <class _CharT, class _Traits, class _IntType> -basic_istream<_CharT, _Traits>& -operator>>(basic_istream<_CharT, _Traits>& __is, - geometric_distribution<_IntType>& __x) -{ - typedef geometric_distribution<_IntType> _Eng; - typedef typename _Eng::param_type param_type; - __save_flags<_CharT, _Traits> __lx(__is); - typedef basic_istream<_CharT, _Traits> _Istream; - __is.flags(_Istream::dec | _Istream::skipws); - double __p; - __is >> __p; - if (!__is.fail()) - __x.param(param_type(__p)); - return __is; -} - -// chi_squared_distribution - -template<class _RealType = double> -class _LIBCPP_TEMPLATE_VIS chi_squared_distribution -{ -public: - // types - typedef _RealType result_type; - - class _LIBCPP_TEMPLATE_VIS param_type - { - result_type __n_; - public: - typedef chi_squared_distribution distribution_type; - - _LIBCPP_INLINE_VISIBILITY - explicit param_type(result_type __n = 1) : __n_(__n) {} - - _LIBCPP_INLINE_VISIBILITY - result_type n() const {return __n_;} - - friend _LIBCPP_INLINE_VISIBILITY - bool operator==(const param_type& __x, const param_type& __y) - {return __x.__n_ == __y.__n_;} - friend _LIBCPP_INLINE_VISIBILITY - bool operator!=(const param_type& __x, const param_type& __y) - {return !(__x == __y);} - }; - -private: - param_type __p_; - -public: - // constructor and reset functions -#ifndef _LIBCPP_CXX03_LANG - _LIBCPP_INLINE_VISIBILITY - chi_squared_distribution() : chi_squared_distribution(1) {} - _LIBCPP_INLINE_VISIBILITY - explicit chi_squared_distribution(result_type __n) - : __p_(param_type(__n)) {} -#else - _LIBCPP_INLINE_VISIBILITY - explicit chi_squared_distribution(result_type __n = 1) - : __p_(param_type(__n)) {} -#endif - _LIBCPP_INLINE_VISIBILITY - explicit chi_squared_distribution(const param_type& __p) - : __p_(__p) {} - _LIBCPP_INLINE_VISIBILITY - void reset() {} - - // generating functions - template<class _URNG> - _LIBCPP_INLINE_VISIBILITY - result_type operator()(_URNG& __g) - {return (*this)(__g, __p_);} - template<class _URNG> - _LIBCPP_INLINE_VISIBILITY - result_type operator()(_URNG& __g, const param_type& __p) - {return gamma_distribution<result_type>(__p.n() / 2, 2)(__g);} - - // property functions - _LIBCPP_INLINE_VISIBILITY - result_type n() const {return __p_.n();} - - _LIBCPP_INLINE_VISIBILITY - param_type param() const {return __p_;} - _LIBCPP_INLINE_VISIBILITY - void param(const param_type& __p) {__p_ = __p;} - - _LIBCPP_INLINE_VISIBILITY - result_type min() const {return 0;} - _LIBCPP_INLINE_VISIBILITY - result_type max() const {return numeric_limits<result_type>::infinity();} - - friend _LIBCPP_INLINE_VISIBILITY - bool operator==(const chi_squared_distribution& __x, - const chi_squared_distribution& __y) - {return __x.__p_ == __y.__p_;} - friend _LIBCPP_INLINE_VISIBILITY - bool operator!=(const chi_squared_distribution& __x, - const chi_squared_distribution& __y) - {return !(__x == __y);} -}; - -template <class _CharT, class _Traits, class _RT> -basic_ostream<_CharT, _Traits>& -operator<<(basic_ostream<_CharT, _Traits>& __os, - const chi_squared_distribution<_RT>& __x) -{ - __save_flags<_CharT, _Traits> __lx(__os); - typedef basic_ostream<_CharT, _Traits> _OStream; - __os.flags(_OStream::dec | _OStream::left | _OStream::fixed | - _OStream::scientific); - __os << __x.n(); - return __os; -} - -template <class _CharT, class _Traits, class _RT> -basic_istream<_CharT, _Traits>& -operator>>(basic_istream<_CharT, _Traits>& __is, - chi_squared_distribution<_RT>& __x) -{ - typedef chi_squared_distribution<_RT> _Eng; - typedef typename _Eng::result_type result_type; - typedef typename _Eng::param_type param_type; - __save_flags<_CharT, _Traits> __lx(__is); - typedef basic_istream<_CharT, _Traits> _Istream; - __is.flags(_Istream::dec | _Istream::skipws); - result_type __n; - __is >> __n; - if (!__is.fail()) - __x.param(param_type(__n)); - return __is; -} - -// cauchy_distribution - -template<class _RealType = double> -class _LIBCPP_TEMPLATE_VIS cauchy_distribution -{ -public: - // types - typedef _RealType result_type; - - class _LIBCPP_TEMPLATE_VIS param_type - { - result_type __a_; - result_type __b_; - public: - typedef cauchy_distribution distribution_type; - - _LIBCPP_INLINE_VISIBILITY - explicit param_type(result_type __a = 0, result_type __b = 1) - : __a_(__a), __b_(__b) {} - - _LIBCPP_INLINE_VISIBILITY - result_type a() const {return __a_;} - _LIBCPP_INLINE_VISIBILITY - result_type b() const {return __b_;} - - friend _LIBCPP_INLINE_VISIBILITY - bool operator==(const param_type& __x, const param_type& __y) - {return __x.__a_ == __y.__a_ && __x.__b_ == __y.__b_;} - friend _LIBCPP_INLINE_VISIBILITY - bool operator!=(const param_type& __x, const param_type& __y) - {return !(__x == __y);} - }; - -private: - param_type __p_; - -public: - // constructor and reset functions -#ifndef _LIBCPP_CXX03_LANG - _LIBCPP_INLINE_VISIBILITY - cauchy_distribution() : cauchy_distribution(0) {} - _LIBCPP_INLINE_VISIBILITY - explicit cauchy_distribution(result_type __a, result_type __b = 1) - : __p_(param_type(__a, __b)) {} -#else - _LIBCPP_INLINE_VISIBILITY - explicit cauchy_distribution(result_type __a = 0, result_type __b = 1) - : __p_(param_type(__a, __b)) {} -#endif - _LIBCPP_INLINE_VISIBILITY - explicit cauchy_distribution(const param_type& __p) - : __p_(__p) {} - _LIBCPP_INLINE_VISIBILITY - void reset() {} - - // generating functions - template<class _URNG> - _LIBCPP_INLINE_VISIBILITY - result_type operator()(_URNG& __g) - {return (*this)(__g, __p_);} - template<class _URNG> _LIBCPP_INLINE_VISIBILITY result_type operator()(_URNG& __g, const param_type& __p); - - // property functions - _LIBCPP_INLINE_VISIBILITY - result_type a() const {return __p_.a();} - _LIBCPP_INLINE_VISIBILITY - result_type b() const {return __p_.b();} - - _LIBCPP_INLINE_VISIBILITY - param_type param() const {return __p_;} - _LIBCPP_INLINE_VISIBILITY - void param(const param_type& __p) {__p_ = __p;} - - _LIBCPP_INLINE_VISIBILITY - result_type min() const {return -numeric_limits<result_type>::infinity();} - _LIBCPP_INLINE_VISIBILITY - result_type max() const {return numeric_limits<result_type>::infinity();} - - friend _LIBCPP_INLINE_VISIBILITY - bool operator==(const cauchy_distribution& __x, - const cauchy_distribution& __y) - {return __x.__p_ == __y.__p_;} - friend _LIBCPP_INLINE_VISIBILITY - bool operator!=(const cauchy_distribution& __x, - const cauchy_distribution& __y) - {return !(__x == __y);} -}; - -template <class _RealType> -template<class _URNG> -inline -_RealType -cauchy_distribution<_RealType>::operator()(_URNG& __g, const param_type& __p) -{ - uniform_real_distribution<result_type> __gen; - // purposefully let tan arg get as close to pi/2 as it wants, tan will return a finite - return __p.a() + __p.b() * _VSTD::tan(3.1415926535897932384626433832795 * __gen(__g)); -} - -template <class _CharT, class _Traits, class _RT> -basic_ostream<_CharT, _Traits>& -operator<<(basic_ostream<_CharT, _Traits>& __os, - const cauchy_distribution<_RT>& __x) -{ - __save_flags<_CharT, _Traits> __lx(__os); - typedef basic_ostream<_CharT, _Traits> _OStream; - __os.flags(_OStream::dec | _OStream::left | _OStream::fixed | - _OStream::scientific); - _CharT __sp = __os.widen(' '); - __os.fill(__sp); - __os << __x.a() << __sp << __x.b(); - return __os; -} - -template <class _CharT, class _Traits, class _RT> -basic_istream<_CharT, _Traits>& -operator>>(basic_istream<_CharT, _Traits>& __is, - cauchy_distribution<_RT>& __x) -{ - typedef cauchy_distribution<_RT> _Eng; - typedef typename _Eng::result_type result_type; - typedef typename _Eng::param_type param_type; - __save_flags<_CharT, _Traits> __lx(__is); - typedef basic_istream<_CharT, _Traits> _Istream; - __is.flags(_Istream::dec | _Istream::skipws); - result_type __a; - result_type __b; - __is >> __a >> __b; - if (!__is.fail()) - __x.param(param_type(__a, __b)); - return __is; -} - -// fisher_f_distribution - -template<class _RealType = double> -class _LIBCPP_TEMPLATE_VIS fisher_f_distribution -{ -public: - // types - typedef _RealType result_type; - - class _LIBCPP_TEMPLATE_VIS param_type - { - result_type __m_; - result_type __n_; - public: - typedef fisher_f_distribution distribution_type; - - _LIBCPP_INLINE_VISIBILITY - explicit param_type(result_type __m = 1, result_type __n = 1) - : __m_(__m), __n_(__n) {} - - _LIBCPP_INLINE_VISIBILITY - result_type m() const {return __m_;} - _LIBCPP_INLINE_VISIBILITY - result_type n() const {return __n_;} - - friend _LIBCPP_INLINE_VISIBILITY - bool operator==(const param_type& __x, const param_type& __y) - {return __x.__m_ == __y.__m_ && __x.__n_ == __y.__n_;} - friend _LIBCPP_INLINE_VISIBILITY - bool operator!=(const param_type& __x, const param_type& __y) - {return !(__x == __y);} - }; - -private: - param_type __p_; - -public: - // constructor and reset functions -#ifndef _LIBCPP_CXX03_LANG - _LIBCPP_INLINE_VISIBILITY - fisher_f_distribution() : fisher_f_distribution(1) {} - _LIBCPP_INLINE_VISIBILITY - explicit fisher_f_distribution(result_type __m, result_type __n = 1) - : __p_(param_type(__m, __n)) {} -#else - _LIBCPP_INLINE_VISIBILITY - explicit fisher_f_distribution(result_type __m = 1, result_type __n = 1) - : __p_(param_type(__m, __n)) {} -#endif - _LIBCPP_INLINE_VISIBILITY - explicit fisher_f_distribution(const param_type& __p) - : __p_(__p) {} - _LIBCPP_INLINE_VISIBILITY - void reset() {} - - // generating functions - template<class _URNG> - _LIBCPP_INLINE_VISIBILITY - result_type operator()(_URNG& __g) - {return (*this)(__g, __p_);} - template<class _URNG> result_type operator()(_URNG& __g, const param_type& __p); - - // property functions - _LIBCPP_INLINE_VISIBILITY - result_type m() const {return __p_.m();} - _LIBCPP_INLINE_VISIBILITY - result_type n() const {return __p_.n();} - - _LIBCPP_INLINE_VISIBILITY - param_type param() const {return __p_;} - _LIBCPP_INLINE_VISIBILITY - void param(const param_type& __p) {__p_ = __p;} - - _LIBCPP_INLINE_VISIBILITY - result_type min() const {return 0;} - _LIBCPP_INLINE_VISIBILITY - result_type max() const {return numeric_limits<result_type>::infinity();} - - friend _LIBCPP_INLINE_VISIBILITY - bool operator==(const fisher_f_distribution& __x, - const fisher_f_distribution& __y) - {return __x.__p_ == __y.__p_;} - friend _LIBCPP_INLINE_VISIBILITY - bool operator!=(const fisher_f_distribution& __x, - const fisher_f_distribution& __y) - {return !(__x == __y);} -}; - -template <class _RealType> -template<class _URNG> -_RealType -fisher_f_distribution<_RealType>::operator()(_URNG& __g, const param_type& __p) -{ - gamma_distribution<result_type> __gdm(__p.m() * result_type(.5)); - gamma_distribution<result_type> __gdn(__p.n() * result_type(.5)); - return __p.n() * __gdm(__g) / (__p.m() * __gdn(__g)); -} - -template <class _CharT, class _Traits, class _RT> -basic_ostream<_CharT, _Traits>& -operator<<(basic_ostream<_CharT, _Traits>& __os, - const fisher_f_distribution<_RT>& __x) -{ - __save_flags<_CharT, _Traits> __lx(__os); - typedef basic_ostream<_CharT, _Traits> _OStream; - __os.flags(_OStream::dec | _OStream::left | _OStream::fixed | - _OStream::scientific); - _CharT __sp = __os.widen(' '); - __os.fill(__sp); - __os << __x.m() << __sp << __x.n(); - return __os; -} - -template <class _CharT, class _Traits, class _RT> -basic_istream<_CharT, _Traits>& -operator>>(basic_istream<_CharT, _Traits>& __is, - fisher_f_distribution<_RT>& __x) -{ - typedef fisher_f_distribution<_RT> _Eng; - typedef typename _Eng::result_type result_type; - typedef typename _Eng::param_type param_type; - __save_flags<_CharT, _Traits> __lx(__is); - typedef basic_istream<_CharT, _Traits> _Istream; - __is.flags(_Istream::dec | _Istream::skipws); - result_type __m; - result_type __n; - __is >> __m >> __n; - if (!__is.fail()) - __x.param(param_type(__m, __n)); - return __is; -} - -// student_t_distribution - -template<class _RealType = double> -class _LIBCPP_TEMPLATE_VIS student_t_distribution -{ -public: - // types - typedef _RealType result_type; - - class _LIBCPP_TEMPLATE_VIS param_type - { - result_type __n_; - public: - typedef student_t_distribution distribution_type; - - _LIBCPP_INLINE_VISIBILITY - explicit param_type(result_type __n = 1) : __n_(__n) {} - - _LIBCPP_INLINE_VISIBILITY - result_type n() const {return __n_;} - - friend _LIBCPP_INLINE_VISIBILITY - bool operator==(const param_type& __x, const param_type& __y) - {return __x.__n_ == __y.__n_;} - friend _LIBCPP_INLINE_VISIBILITY - bool operator!=(const param_type& __x, const param_type& __y) - {return !(__x == __y);} - }; - -private: - param_type __p_; - normal_distribution<result_type> __nd_; - -public: - // constructor and reset functions -#ifndef _LIBCPP_CXX03_LANG - _LIBCPP_INLINE_VISIBILITY - student_t_distribution() : student_t_distribution(1) {} - _LIBCPP_INLINE_VISIBILITY - explicit student_t_distribution(result_type __n) - : __p_(param_type(__n)) {} -#else - _LIBCPP_INLINE_VISIBILITY - explicit student_t_distribution(result_type __n = 1) - : __p_(param_type(__n)) {} -#endif - _LIBCPP_INLINE_VISIBILITY - explicit student_t_distribution(const param_type& __p) - : __p_(__p) {} - _LIBCPP_INLINE_VISIBILITY - void reset() {__nd_.reset();} - - // generating functions - template<class _URNG> - _LIBCPP_INLINE_VISIBILITY - result_type operator()(_URNG& __g) - {return (*this)(__g, __p_);} - template<class _URNG> result_type operator()(_URNG& __g, const param_type& __p); - - // property functions - _LIBCPP_INLINE_VISIBILITY - result_type n() const {return __p_.n();} - - _LIBCPP_INLINE_VISIBILITY - param_type param() const {return __p_;} - _LIBCPP_INLINE_VISIBILITY - void param(const param_type& __p) {__p_ = __p;} - - _LIBCPP_INLINE_VISIBILITY - result_type min() const {return -numeric_limits<result_type>::infinity();} - _LIBCPP_INLINE_VISIBILITY - result_type max() const {return numeric_limits<result_type>::infinity();} - - friend _LIBCPP_INLINE_VISIBILITY - bool operator==(const student_t_distribution& __x, - const student_t_distribution& __y) - {return __x.__p_ == __y.__p_;} - friend _LIBCPP_INLINE_VISIBILITY - bool operator!=(const student_t_distribution& __x, - const student_t_distribution& __y) - {return !(__x == __y);} -}; - -template <class _RealType> -template<class _URNG> -_RealType -student_t_distribution<_RealType>::operator()(_URNG& __g, const param_type& __p) -{ - gamma_distribution<result_type> __gd(__p.n() * .5, 2); - return __nd_(__g) * _VSTD::sqrt(__p.n()/__gd(__g)); -} - -template <class _CharT, class _Traits, class _RT> -basic_ostream<_CharT, _Traits>& -operator<<(basic_ostream<_CharT, _Traits>& __os, - const student_t_distribution<_RT>& __x) -{ - __save_flags<_CharT, _Traits> __lx(__os); - typedef basic_ostream<_CharT, _Traits> _OStream; - __os.flags(_OStream::dec | _OStream::left | _OStream::fixed | - _OStream::scientific); - __os << __x.n(); - return __os; -} - -template <class _CharT, class _Traits, class _RT> -basic_istream<_CharT, _Traits>& -operator>>(basic_istream<_CharT, _Traits>& __is, - student_t_distribution<_RT>& __x) -{ - typedef student_t_distribution<_RT> _Eng; - typedef typename _Eng::result_type result_type; - typedef typename _Eng::param_type param_type; - __save_flags<_CharT, _Traits> __lx(__is); - typedef basic_istream<_CharT, _Traits> _Istream; - __is.flags(_Istream::dec | _Istream::skipws); - result_type __n; - __is >> __n; - if (!__is.fail()) - __x.param(param_type(__n)); - return __is; -} - -// discrete_distribution - -template<class _IntType = int> -class _LIBCPP_TEMPLATE_VIS discrete_distribution -{ -public: - // types - typedef _IntType result_type; - - class _LIBCPP_TEMPLATE_VIS param_type - { - vector<double> __p_; - public: - typedef discrete_distribution distribution_type; - - _LIBCPP_INLINE_VISIBILITY - param_type() {} - template<class _InputIterator> - _LIBCPP_INLINE_VISIBILITY - param_type(_InputIterator __f, _InputIterator __l) - : __p_(__f, __l) {__init();} -#ifndef _LIBCPP_CXX03_LANG - _LIBCPP_INLINE_VISIBILITY - param_type(initializer_list<double> __wl) - : __p_(__wl.begin(), __wl.end()) {__init();} -#endif // _LIBCPP_CXX03_LANG - template<class _UnaryOperation> - param_type(size_t __nw, double __xmin, double __xmax, - _UnaryOperation __fw); - - vector<double> probabilities() const; - - friend _LIBCPP_INLINE_VISIBILITY - bool operator==(const param_type& __x, const param_type& __y) - {return __x.__p_ == __y.__p_;} - friend _LIBCPP_INLINE_VISIBILITY - bool operator!=(const param_type& __x, const param_type& __y) - {return !(__x == __y);} - - private: - void __init(); - - friend class discrete_distribution; - - template <class _CharT, class _Traits, class _IT> - friend - basic_ostream<_CharT, _Traits>& - operator<<(basic_ostream<_CharT, _Traits>& __os, - const discrete_distribution<_IT>& __x); - - template <class _CharT, class _Traits, class _IT> - friend - basic_istream<_CharT, _Traits>& - operator>>(basic_istream<_CharT, _Traits>& __is, - discrete_distribution<_IT>& __x); - }; - -private: - param_type __p_; - -public: - // constructor and reset functions - _LIBCPP_INLINE_VISIBILITY - discrete_distribution() {} - template<class _InputIterator> - _LIBCPP_INLINE_VISIBILITY - discrete_distribution(_InputIterator __f, _InputIterator __l) - : __p_(__f, __l) {} -#ifndef _LIBCPP_CXX03_LANG - _LIBCPP_INLINE_VISIBILITY - discrete_distribution(initializer_list<double> __wl) - : __p_(__wl) {} -#endif // _LIBCPP_CXX03_LANG - template<class _UnaryOperation> - _LIBCPP_INLINE_VISIBILITY - discrete_distribution(size_t __nw, double __xmin, double __xmax, - _UnaryOperation __fw) - : __p_(__nw, __xmin, __xmax, __fw) {} - _LIBCPP_INLINE_VISIBILITY - explicit discrete_distribution(const param_type& __p) - : __p_(__p) {} - _LIBCPP_INLINE_VISIBILITY - void reset() {} - - // generating functions - template<class _URNG> - _LIBCPP_INLINE_VISIBILITY - result_type operator()(_URNG& __g) - {return (*this)(__g, __p_);} - template<class _URNG> result_type operator()(_URNG& __g, const param_type& __p); - - // property functions - _LIBCPP_INLINE_VISIBILITY - vector<double> probabilities() const {return __p_.probabilities();} - - _LIBCPP_INLINE_VISIBILITY - param_type param() const {return __p_;} - _LIBCPP_INLINE_VISIBILITY - void param(const param_type& __p) {__p_ = __p;} - - _LIBCPP_INLINE_VISIBILITY - result_type min() const {return 0;} - _LIBCPP_INLINE_VISIBILITY - result_type max() const {return __p_.__p_.size();} - - friend _LIBCPP_INLINE_VISIBILITY - bool operator==(const discrete_distribution& __x, - const discrete_distribution& __y) - {return __x.__p_ == __y.__p_;} - friend _LIBCPP_INLINE_VISIBILITY - bool operator!=(const discrete_distribution& __x, - const discrete_distribution& __y) - {return !(__x == __y);} - - template <class _CharT, class _Traits, class _IT> - friend - basic_ostream<_CharT, _Traits>& - operator<<(basic_ostream<_CharT, _Traits>& __os, - const discrete_distribution<_IT>& __x); - - template <class _CharT, class _Traits, class _IT> - friend - basic_istream<_CharT, _Traits>& - operator>>(basic_istream<_CharT, _Traits>& __is, - discrete_distribution<_IT>& __x); -}; - -template<class _IntType> -template<class _UnaryOperation> -discrete_distribution<_IntType>::param_type::param_type(size_t __nw, - double __xmin, - double __xmax, - _UnaryOperation __fw) -{ - if (__nw > 1) - { - __p_.reserve(__nw - 1); - double __d = (__xmax - __xmin) / __nw; - double __d2 = __d / 2; - for (size_t __k = 0; __k < __nw; ++__k) - __p_.push_back(__fw(__xmin + __k * __d + __d2)); - __init(); - } -} - -template<class _IntType> -void -discrete_distribution<_IntType>::param_type::__init() -{ - if (!__p_.empty()) - { - if (__p_.size() > 1) - { - double __s = _VSTD::accumulate(__p_.begin(), __p_.end(), 0.0); - for (vector<double>::iterator __i = __p_.begin(), __e = __p_.end(); __i < __e; ++__i) - *__i /= __s; - vector<double> __t(__p_.size() - 1); - _VSTD::partial_sum(__p_.begin(), __p_.end() - 1, __t.begin()); - swap(__p_, __t); - } - else - { - __p_.clear(); - __p_.shrink_to_fit(); - } - } -} - -template<class _IntType> -vector<double> -discrete_distribution<_IntType>::param_type::probabilities() const -{ - size_t __n = __p_.size(); - vector<double> __p(__n+1); - _VSTD::adjacent_difference(__p_.begin(), __p_.end(), __p.begin()); - if (__n > 0) - __p[__n] = 1 - __p_[__n-1]; - else - __p[0] = 1; - return __p; -} - -template<class _IntType> -template<class _URNG> -_IntType -discrete_distribution<_IntType>::operator()(_URNG& __g, const param_type& __p) -{ - uniform_real_distribution<double> __gen; - return static_cast<_IntType>( - _VSTD::upper_bound(__p.__p_.begin(), __p.__p_.end(), __gen(__g)) - - __p.__p_.begin()); -} - -template <class _CharT, class _Traits, class _IT> -basic_ostream<_CharT, _Traits>& -operator<<(basic_ostream<_CharT, _Traits>& __os, - const discrete_distribution<_IT>& __x) -{ - __save_flags<_CharT, _Traits> __lx(__os); - typedef basic_ostream<_CharT, _Traits> _OStream; - __os.flags(_OStream::dec | _OStream::left | _OStream::fixed | - _OStream::scientific); - _CharT __sp = __os.widen(' '); - __os.fill(__sp); - size_t __n = __x.__p_.__p_.size(); - __os << __n; - for (size_t __i = 0; __i < __n; ++__i) - __os << __sp << __x.__p_.__p_[__i]; - return __os; -} - -template <class _CharT, class _Traits, class _IT> -basic_istream<_CharT, _Traits>& -operator>>(basic_istream<_CharT, _Traits>& __is, - discrete_distribution<_IT>& __x) -{ - __save_flags<_CharT, _Traits> __lx(__is); - typedef basic_istream<_CharT, _Traits> _Istream; - __is.flags(_Istream::dec | _Istream::skipws); - size_t __n; - __is >> __n; - vector<double> __p(__n); - for (size_t __i = 0; __i < __n; ++__i) - __is >> __p[__i]; - if (!__is.fail()) - swap(__x.__p_.__p_, __p); - return __is; -} - -// piecewise_constant_distribution - -template<class _RealType = double> -class _LIBCPP_TEMPLATE_VIS piecewise_constant_distribution -{ -public: - // types - typedef _RealType result_type; - - class _LIBCPP_TEMPLATE_VIS param_type - { - vector<result_type> __b_; - vector<result_type> __densities_; - vector<result_type> __areas_; - public: - typedef piecewise_constant_distribution distribution_type; - - param_type(); - template<class _InputIteratorB, class _InputIteratorW> - param_type(_InputIteratorB __fB, _InputIteratorB __lB, - _InputIteratorW __fW); -#ifndef _LIBCPP_CXX03_LANG - template<class _UnaryOperation> - param_type(initializer_list<result_type> __bl, _UnaryOperation __fw); -#endif // _LIBCPP_CXX03_LANG - template<class _UnaryOperation> - param_type(size_t __nw, result_type __xmin, result_type __xmax, - _UnaryOperation __fw); - param_type(param_type const&) = default; - param_type & operator=(const param_type& __rhs); - - _LIBCPP_INLINE_VISIBILITY - vector<result_type> intervals() const {return __b_;} - _LIBCPP_INLINE_VISIBILITY - vector<result_type> densities() const {return __densities_;} - - friend _LIBCPP_INLINE_VISIBILITY - bool operator==(const param_type& __x, const param_type& __y) - {return __x.__densities_ == __y.__densities_ && __x.__b_ == __y.__b_;} - friend _LIBCPP_INLINE_VISIBILITY - bool operator!=(const param_type& __x, const param_type& __y) - {return !(__x == __y);} - - private: - void __init(); - - friend class piecewise_constant_distribution; - - template <class _CharT, class _Traits, class _RT> - friend - basic_ostream<_CharT, _Traits>& - operator<<(basic_ostream<_CharT, _Traits>& __os, - const piecewise_constant_distribution<_RT>& __x); - - template <class _CharT, class _Traits, class _RT> - friend - basic_istream<_CharT, _Traits>& - operator>>(basic_istream<_CharT, _Traits>& __is, - piecewise_constant_distribution<_RT>& __x); - }; - -private: - param_type __p_; - -public: - // constructor and reset functions - _LIBCPP_INLINE_VISIBILITY - piecewise_constant_distribution() {} - template<class _InputIteratorB, class _InputIteratorW> - _LIBCPP_INLINE_VISIBILITY - piecewise_constant_distribution(_InputIteratorB __fB, - _InputIteratorB __lB, - _InputIteratorW __fW) - : __p_(__fB, __lB, __fW) {} - -#ifndef _LIBCPP_CXX03_LANG - template<class _UnaryOperation> - _LIBCPP_INLINE_VISIBILITY - piecewise_constant_distribution(initializer_list<result_type> __bl, - _UnaryOperation __fw) - : __p_(__bl, __fw) {} -#endif // _LIBCPP_CXX03_LANG - - template<class _UnaryOperation> - _LIBCPP_INLINE_VISIBILITY - piecewise_constant_distribution(size_t __nw, result_type __xmin, - result_type __xmax, _UnaryOperation __fw) - : __p_(__nw, __xmin, __xmax, __fw) {} - - _LIBCPP_INLINE_VISIBILITY - explicit piecewise_constant_distribution(const param_type& __p) - : __p_(__p) {} - - _LIBCPP_INLINE_VISIBILITY - void reset() {} - - // generating functions - template<class _URNG> - _LIBCPP_INLINE_VISIBILITY - result_type operator()(_URNG& __g) - {return (*this)(__g, __p_);} - template<class _URNG> result_type operator()(_URNG& __g, const param_type& __p); - - // property functions - _LIBCPP_INLINE_VISIBILITY - vector<result_type> intervals() const {return __p_.intervals();} - _LIBCPP_INLINE_VISIBILITY - vector<result_type> densities() const {return __p_.densities();} - - _LIBCPP_INLINE_VISIBILITY - param_type param() const {return __p_;} - _LIBCPP_INLINE_VISIBILITY - void param(const param_type& __p) {__p_ = __p;} - - _LIBCPP_INLINE_VISIBILITY - result_type min() const {return __p_.__b_.front();} - _LIBCPP_INLINE_VISIBILITY - result_type max() const {return __p_.__b_.back();} - - friend _LIBCPP_INLINE_VISIBILITY - bool operator==(const piecewise_constant_distribution& __x, - const piecewise_constant_distribution& __y) - {return __x.__p_ == __y.__p_;} - friend _LIBCPP_INLINE_VISIBILITY - bool operator!=(const piecewise_constant_distribution& __x, - const piecewise_constant_distribution& __y) - {return !(__x == __y);} - - template <class _CharT, class _Traits, class _RT> - friend - basic_ostream<_CharT, _Traits>& - operator<<(basic_ostream<_CharT, _Traits>& __os, - const piecewise_constant_distribution<_RT>& __x); - - template <class _CharT, class _Traits, class _RT> - friend - basic_istream<_CharT, _Traits>& - operator>>(basic_istream<_CharT, _Traits>& __is, - piecewise_constant_distribution<_RT>& __x); -}; - -template<class _RealType> -typename piecewise_constant_distribution<_RealType>::param_type & -piecewise_constant_distribution<_RealType>::param_type::operator= - (const param_type& __rhs) -{ -// These can throw - __b_.reserve (__rhs.__b_.size ()); - __densities_.reserve(__rhs.__densities_.size()); - __areas_.reserve (__rhs.__areas_.size()); - -// These can not throw - __b_ = __rhs.__b_; - __densities_ = __rhs.__densities_; - __areas_ = __rhs.__areas_; - return *this; -} - -template<class _RealType> -void -piecewise_constant_distribution<_RealType>::param_type::__init() -{ - // __densities_ contains non-normalized areas - result_type __total_area = _VSTD::accumulate(__densities_.begin(), - __densities_.end(), - result_type()); - for (size_t __i = 0; __i < __densities_.size(); ++__i) - __densities_[__i] /= __total_area; - // __densities_ contains normalized areas - __areas_.assign(__densities_.size(), result_type()); - _VSTD::partial_sum(__densities_.begin(), __densities_.end() - 1, - __areas_.begin() + 1); - // __areas_ contains partial sums of normalized areas: [0, __densities_ - 1] - __densities_.back() = 1 - __areas_.back(); // correct round off error - for (size_t __i = 0; __i < __densities_.size(); ++__i) - __densities_[__i] /= (__b_[__i+1] - __b_[__i]); - // __densities_ now contains __densities_ -} - -template<class _RealType> -piecewise_constant_distribution<_RealType>::param_type::param_type() - : __b_(2), - __densities_(1, 1.0), - __areas_(1, 0.0) -{ - __b_[1] = 1; -} - -template<class _RealType> -template<class _InputIteratorB, class _InputIteratorW> -piecewise_constant_distribution<_RealType>::param_type::param_type( - _InputIteratorB __fB, _InputIteratorB __lB, _InputIteratorW __fW) - : __b_(__fB, __lB) -{ - if (__b_.size() < 2) - { - __b_.resize(2); - __b_[0] = 0; - __b_[1] = 1; - __densities_.assign(1, 1.0); - __areas_.assign(1, 0.0); - } - else - { - __densities_.reserve(__b_.size() - 1); - for (size_t __i = 0; __i < __b_.size() - 1; ++__i, ++__fW) - __densities_.push_back(*__fW); - __init(); - } -} - -#ifndef _LIBCPP_CXX03_LANG - -template<class _RealType> -template<class _UnaryOperation> -piecewise_constant_distribution<_RealType>::param_type::param_type( - initializer_list<result_type> __bl, _UnaryOperation __fw) - : __b_(__bl.begin(), __bl.end()) -{ - if (__b_.size() < 2) - { - __b_.resize(2); - __b_[0] = 0; - __b_[1] = 1; - __densities_.assign(1, 1.0); - __areas_.assign(1, 0.0); - } - else - { - __densities_.reserve(__b_.size() - 1); - for (size_t __i = 0; __i < __b_.size() - 1; ++__i) - __densities_.push_back(__fw((__b_[__i+1] + __b_[__i])*.5)); - __init(); - } -} - -#endif // _LIBCPP_CXX03_LANG - -template<class _RealType> -template<class _UnaryOperation> -piecewise_constant_distribution<_RealType>::param_type::param_type( - size_t __nw, result_type __xmin, result_type __xmax, _UnaryOperation __fw) - : __b_(__nw == 0 ? 2 : __nw + 1) -{ - size_t __n = __b_.size() - 1; - result_type __d = (__xmax - __xmin) / __n; - __densities_.reserve(__n); - for (size_t __i = 0; __i < __n; ++__i) - { - __b_[__i] = __xmin + __i * __d; - __densities_.push_back(__fw(__b_[__i] + __d*.5)); - } - __b_[__n] = __xmax; - __init(); -} - -template<class _RealType> -template<class _URNG> -_RealType -piecewise_constant_distribution<_RealType>::operator()(_URNG& __g, const param_type& __p) -{ - typedef uniform_real_distribution<result_type> _Gen; - result_type __u = _Gen()(__g); - ptrdiff_t __k = _VSTD::upper_bound(__p.__areas_.begin(), __p.__areas_.end(), - __u) - __p.__areas_.begin() - 1; - return (__u - __p.__areas_[__k]) / __p.__densities_[__k] + __p.__b_[__k]; -} - -template <class _CharT, class _Traits, class _RT> -basic_ostream<_CharT, _Traits>& -operator<<(basic_ostream<_CharT, _Traits>& __os, - const piecewise_constant_distribution<_RT>& __x) -{ - __save_flags<_CharT, _Traits> __lx(__os); - typedef basic_ostream<_CharT, _Traits> _OStream; - __os.flags(_OStream::dec | _OStream::left | _OStream::fixed | - _OStream::scientific); - _CharT __sp = __os.widen(' '); - __os.fill(__sp); - size_t __n = __x.__p_.__b_.size(); - __os << __n; - for (size_t __i = 0; __i < __n; ++__i) - __os << __sp << __x.__p_.__b_[__i]; - __n = __x.__p_.__densities_.size(); - __os << __sp << __n; - for (size_t __i = 0; __i < __n; ++__i) - __os << __sp << __x.__p_.__densities_[__i]; - __n = __x.__p_.__areas_.size(); - __os << __sp << __n; - for (size_t __i = 0; __i < __n; ++__i) - __os << __sp << __x.__p_.__areas_[__i]; - return __os; -} - -template <class _CharT, class _Traits, class _RT> -basic_istream<_CharT, _Traits>& -operator>>(basic_istream<_CharT, _Traits>& __is, - piecewise_constant_distribution<_RT>& __x) -{ - typedef piecewise_constant_distribution<_RT> _Eng; - typedef typename _Eng::result_type result_type; - __save_flags<_CharT, _Traits> __lx(__is); - typedef basic_istream<_CharT, _Traits> _Istream; - __is.flags(_Istream::dec | _Istream::skipws); - size_t __n; - __is >> __n; - vector<result_type> __b(__n); - for (size_t __i = 0; __i < __n; ++__i) - __is >> __b[__i]; - __is >> __n; - vector<result_type> __densities(__n); - for (size_t __i = 0; __i < __n; ++__i) - __is >> __densities[__i]; - __is >> __n; - vector<result_type> __areas(__n); - for (size_t __i = 0; __i < __n; ++__i) - __is >> __areas[__i]; - if (!__is.fail()) - { - swap(__x.__p_.__b_, __b); - swap(__x.__p_.__densities_, __densities); - swap(__x.__p_.__areas_, __areas); - } - return __is; -} - -// piecewise_linear_distribution - -template<class _RealType = double> -class _LIBCPP_TEMPLATE_VIS piecewise_linear_distribution -{ -public: - // types - typedef _RealType result_type; - - class _LIBCPP_TEMPLATE_VIS param_type - { - vector<result_type> __b_; - vector<result_type> __densities_; - vector<result_type> __areas_; - public: - typedef piecewise_linear_distribution distribution_type; - - param_type(); - template<class _InputIteratorB, class _InputIteratorW> - param_type(_InputIteratorB __fB, _InputIteratorB __lB, - _InputIteratorW __fW); -#ifndef _LIBCPP_CXX03_LANG - template<class _UnaryOperation> - param_type(initializer_list<result_type> __bl, _UnaryOperation __fw); -#endif // _LIBCPP_CXX03_LANG - template<class _UnaryOperation> - param_type(size_t __nw, result_type __xmin, result_type __xmax, - _UnaryOperation __fw); - param_type(param_type const&) = default; - param_type & operator=(const param_type& __rhs); - - _LIBCPP_INLINE_VISIBILITY - vector<result_type> intervals() const {return __b_;} - _LIBCPP_INLINE_VISIBILITY - vector<result_type> densities() const {return __densities_;} - - friend _LIBCPP_INLINE_VISIBILITY - bool operator==(const param_type& __x, const param_type& __y) - {return __x.__densities_ == __y.__densities_ && __x.__b_ == __y.__b_;} - friend _LIBCPP_INLINE_VISIBILITY - bool operator!=(const param_type& __x, const param_type& __y) - {return !(__x == __y);} - - private: - void __init(); - - friend class piecewise_linear_distribution; - - template <class _CharT, class _Traits, class _RT> - friend - basic_ostream<_CharT, _Traits>& - operator<<(basic_ostream<_CharT, _Traits>& __os, - const piecewise_linear_distribution<_RT>& __x); - - template <class _CharT, class _Traits, class _RT> - friend - basic_istream<_CharT, _Traits>& - operator>>(basic_istream<_CharT, _Traits>& __is, - piecewise_linear_distribution<_RT>& __x); - }; - -private: - param_type __p_; - -public: - // constructor and reset functions - _LIBCPP_INLINE_VISIBILITY - piecewise_linear_distribution() {} - template<class _InputIteratorB, class _InputIteratorW> - _LIBCPP_INLINE_VISIBILITY - piecewise_linear_distribution(_InputIteratorB __fB, - _InputIteratorB __lB, - _InputIteratorW __fW) - : __p_(__fB, __lB, __fW) {} - -#ifndef _LIBCPP_CXX03_LANG - template<class _UnaryOperation> - _LIBCPP_INLINE_VISIBILITY - piecewise_linear_distribution(initializer_list<result_type> __bl, - _UnaryOperation __fw) - : __p_(__bl, __fw) {} -#endif // _LIBCPP_CXX03_LANG - - template<class _UnaryOperation> - _LIBCPP_INLINE_VISIBILITY - piecewise_linear_distribution(size_t __nw, result_type __xmin, - result_type __xmax, _UnaryOperation __fw) - : __p_(__nw, __xmin, __xmax, __fw) {} - - _LIBCPP_INLINE_VISIBILITY - explicit piecewise_linear_distribution(const param_type& __p) - : __p_(__p) {} - - _LIBCPP_INLINE_VISIBILITY - void reset() {} - - // generating functions - template<class _URNG> - _LIBCPP_INLINE_VISIBILITY - result_type operator()(_URNG& __g) - {return (*this)(__g, __p_);} - template<class _URNG> result_type operator()(_URNG& __g, const param_type& __p); - - // property functions - _LIBCPP_INLINE_VISIBILITY - vector<result_type> intervals() const {return __p_.intervals();} - _LIBCPP_INLINE_VISIBILITY - vector<result_type> densities() const {return __p_.densities();} - - _LIBCPP_INLINE_VISIBILITY - param_type param() const {return __p_;} - _LIBCPP_INLINE_VISIBILITY - void param(const param_type& __p) {__p_ = __p;} - - _LIBCPP_INLINE_VISIBILITY - result_type min() const {return __p_.__b_.front();} - _LIBCPP_INLINE_VISIBILITY - result_type max() const {return __p_.__b_.back();} - - friend _LIBCPP_INLINE_VISIBILITY - bool operator==(const piecewise_linear_distribution& __x, - const piecewise_linear_distribution& __y) - {return __x.__p_ == __y.__p_;} - friend _LIBCPP_INLINE_VISIBILITY - bool operator!=(const piecewise_linear_distribution& __x, - const piecewise_linear_distribution& __y) - {return !(__x == __y);} - - template <class _CharT, class _Traits, class _RT> - friend - basic_ostream<_CharT, _Traits>& - operator<<(basic_ostream<_CharT, _Traits>& __os, - const piecewise_linear_distribution<_RT>& __x); - - template <class _CharT, class _Traits, class _RT> - friend - basic_istream<_CharT, _Traits>& - operator>>(basic_istream<_CharT, _Traits>& __is, - piecewise_linear_distribution<_RT>& __x); -}; - -template<class _RealType> -typename piecewise_linear_distribution<_RealType>::param_type & -piecewise_linear_distribution<_RealType>::param_type::operator= - (const param_type& __rhs) -{ -// These can throw - __b_.reserve (__rhs.__b_.size ()); - __densities_.reserve(__rhs.__densities_.size()); - __areas_.reserve (__rhs.__areas_.size()); - -// These can not throw - __b_ = __rhs.__b_; - __densities_ = __rhs.__densities_; - __areas_ = __rhs.__areas_; - return *this; -} - - -template<class _RealType> -void -piecewise_linear_distribution<_RealType>::param_type::__init() -{ - __areas_.assign(__densities_.size() - 1, result_type()); - result_type _Sp = 0; - for (size_t __i = 0; __i < __areas_.size(); ++__i) - { - __areas_[__i] = (__densities_[__i+1] + __densities_[__i]) * - (__b_[__i+1] - __b_[__i]) * .5; - _Sp += __areas_[__i]; - } - for (size_t __i = __areas_.size(); __i > 1;) - { - --__i; - __areas_[__i] = __areas_[__i-1] / _Sp; - } - __areas_[0] = 0; - for (size_t __i = 1; __i < __areas_.size(); ++__i) - __areas_[__i] += __areas_[__i-1]; - for (size_t __i = 0; __i < __densities_.size(); ++__i) - __densities_[__i] /= _Sp; -} - -template<class _RealType> -piecewise_linear_distribution<_RealType>::param_type::param_type() - : __b_(2), - __densities_(2, 1.0), - __areas_(1, 0.0) -{ - __b_[1] = 1; -} - -template<class _RealType> -template<class _InputIteratorB, class _InputIteratorW> -piecewise_linear_distribution<_RealType>::param_type::param_type( - _InputIteratorB __fB, _InputIteratorB __lB, _InputIteratorW __fW) - : __b_(__fB, __lB) -{ - if (__b_.size() < 2) - { - __b_.resize(2); - __b_[0] = 0; - __b_[1] = 1; - __densities_.assign(2, 1.0); - __areas_.assign(1, 0.0); - } - else - { - __densities_.reserve(__b_.size()); - for (size_t __i = 0; __i < __b_.size(); ++__i, ++__fW) - __densities_.push_back(*__fW); - __init(); - } -} - -#ifndef _LIBCPP_CXX03_LANG - -template<class _RealType> -template<class _UnaryOperation> -piecewise_linear_distribution<_RealType>::param_type::param_type( - initializer_list<result_type> __bl, _UnaryOperation __fw) - : __b_(__bl.begin(), __bl.end()) -{ - if (__b_.size() < 2) - { - __b_.resize(2); - __b_[0] = 0; - __b_[1] = 1; - __densities_.assign(2, 1.0); - __areas_.assign(1, 0.0); - } - else - { - __densities_.reserve(__b_.size()); - for (size_t __i = 0; __i < __b_.size(); ++__i) - __densities_.push_back(__fw(__b_[__i])); - __init(); - } -} - -#endif // _LIBCPP_CXX03_LANG - -template<class _RealType> -template<class _UnaryOperation> -piecewise_linear_distribution<_RealType>::param_type::param_type( - size_t __nw, result_type __xmin, result_type __xmax, _UnaryOperation __fw) - : __b_(__nw == 0 ? 2 : __nw + 1) -{ - size_t __n = __b_.size() - 1; - result_type __d = (__xmax - __xmin) / __n; - __densities_.reserve(__b_.size()); - for (size_t __i = 0; __i < __n; ++__i) - { - __b_[__i] = __xmin + __i * __d; - __densities_.push_back(__fw(__b_[__i])); - } - __b_[__n] = __xmax; - __densities_.push_back(__fw(__b_[__n])); - __init(); -} - -template<class _RealType> -template<class _URNG> -_RealType -piecewise_linear_distribution<_RealType>::operator()(_URNG& __g, const param_type& __p) -{ - typedef uniform_real_distribution<result_type> _Gen; - result_type __u = _Gen()(__g); - ptrdiff_t __k = _VSTD::upper_bound(__p.__areas_.begin(), __p.__areas_.end(), - __u) - __p.__areas_.begin() - 1; - __u -= __p.__areas_[__k]; - const result_type __dk = __p.__densities_[__k]; - const result_type __dk1 = __p.__densities_[__k+1]; - const result_type __deltad = __dk1 - __dk; - const result_type __bk = __p.__b_[__k]; - if (__deltad == 0) - return __u / __dk + __bk; - const result_type __bk1 = __p.__b_[__k+1]; - const result_type __deltab = __bk1 - __bk; - return (__bk * __dk1 - __bk1 * __dk + - _VSTD::sqrt(__deltab * (__deltab * __dk * __dk + 2 * __deltad * __u))) / - __deltad; -} - -template <class _CharT, class _Traits, class _RT> -basic_ostream<_CharT, _Traits>& -operator<<(basic_ostream<_CharT, _Traits>& __os, - const piecewise_linear_distribution<_RT>& __x) -{ - __save_flags<_CharT, _Traits> __lx(__os); - typedef basic_ostream<_CharT, _Traits> _OStream; - __os.flags(_OStream::dec | _OStream::left | _OStream::fixed | - _OStream::scientific); - _CharT __sp = __os.widen(' '); - __os.fill(__sp); - size_t __n = __x.__p_.__b_.size(); - __os << __n; - for (size_t __i = 0; __i < __n; ++__i) - __os << __sp << __x.__p_.__b_[__i]; - __n = __x.__p_.__densities_.size(); - __os << __sp << __n; - for (size_t __i = 0; __i < __n; ++__i) - __os << __sp << __x.__p_.__densities_[__i]; - __n = __x.__p_.__areas_.size(); - __os << __sp << __n; - for (size_t __i = 0; __i < __n; ++__i) - __os << __sp << __x.__p_.__areas_[__i]; - return __os; -} - -template <class _CharT, class _Traits, class _RT> -basic_istream<_CharT, _Traits>& -operator>>(basic_istream<_CharT, _Traits>& __is, - piecewise_linear_distribution<_RT>& __x) -{ - typedef piecewise_linear_distribution<_RT> _Eng; - typedef typename _Eng::result_type result_type; - __save_flags<_CharT, _Traits> __lx(__is); - typedef basic_istream<_CharT, _Traits> _Istream; - __is.flags(_Istream::dec | _Istream::skipws); - size_t __n; - __is >> __n; - vector<result_type> __b(__n); - for (size_t __i = 0; __i < __n; ++__i) - __is >> __b[__i]; - __is >> __n; - vector<result_type> __densities(__n); - for (size_t __i = 0; __i < __n; ++__i) - __is >> __densities[__i]; - __is >> __n; - vector<result_type> __areas(__n); - for (size_t __i = 0; __i < __n; ++__i) - __is >> __areas[__i]; - if (!__is.fail()) - { - swap(__x.__p_.__b_, __b); - swap(__x.__p_.__densities_, __densities); - swap(__x.__p_.__areas_, __areas); - } - return __is; -} - -_LIBCPP_END_NAMESPACE_STD - -_LIBCPP_POP_MACROS - #endif // _LIBCPP_RANDOM diff --git a/libcxx/include/ranges b/libcxx/include/ranges index 8a99ee64cfc9..dd7decf66fa8 100644 --- a/libcxx/include/ranges +++ b/libcxx/include/ranges @@ -36,7 +36,7 @@ namespace std::ranges { inline constexpr bool enable_borrowed_range = false; template<class T> - using iterator_t = decltype(ranges::begin(declval<R&>())); + using iterator_t = decltype(ranges::begin(declval<T&>())); template<range R> using sentinel_t = decltype(ranges::end(declval<R&>())); template<range R> diff --git a/libcxx/include/string_view b/libcxx/include/string_view index 0ad7dcce9848..a5f85e88b502 100644 --- a/libcxx/include/string_view +++ b/libcxx/include/string_view @@ -87,6 +87,8 @@ namespace std { constexpr basic_string_view(const charT* str, size_type len); template <class It, class End> constexpr basic_string_view(It begin, End end); // C++20 + template <class Range> + constexpr basic_string_view(Range&& r); // C++23 // 7.4, basic_string_view iterator support constexpr const_iterator begin() const noexcept; @@ -171,6 +173,8 @@ namespace std { // basic_string_view deduction guides template<class It, class End> basic_string_view(It, End) -> basic_string_view<iter_value_t<It>>; // C++20 + template<class Range> + basic_string_view(Range&&) -> basic_string_view<ranges::range_value_t<Range>>; // C++23 // 7.11, Hash support template <class T> struct hash; @@ -191,12 +195,13 @@ namespace std { */ -#include <__concepts/convertible_to.h> -#include <__concepts/same_as.h> #include <__config> #include <__debug> +#include <__ranges/concepts.h> +#include <__ranges/data.h> #include <__ranges/enable_borrowed_range.h> #include <__ranges/enable_view.h> +#include <__ranges/size.h> #include <__string> #include <algorithm> #include <compare> @@ -204,6 +209,7 @@ namespace std { #include <iterator> #include <limits> #include <stdexcept> +#include <type_traits> #include <version> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -282,7 +288,7 @@ public: #if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_RANGES) template <contiguous_iterator _It, sized_sentinel_for<_It> _End> - requires (same_as<iter_value_t<_It>, _CharT> && !convertible_to<_End, size_type>) + requires (is_same_v<iter_value_t<_It>, _CharT> && !is_convertible_v<_End, size_type>) constexpr _LIBCPP_HIDE_FROM_ABI basic_string_view(_It __begin, _End __end) : __data(_VSTD::to_address(__begin)), __size(__end - __begin) { @@ -290,6 +296,25 @@ public: } #endif +#if _LIBCPP_STD_VER > 20 && !defined(_LIBCPP_HAS_NO_RANGES) + template <class _Range> + requires ( + !is_same_v<remove_cvref_t<_Range>, basic_string_view> && + ranges::contiguous_range<_Range> && + ranges::sized_range<_Range> && + is_same_v<ranges::range_value_t<_Range>, _CharT> && + !is_convertible_v<_Range, const _CharT*> && + (!requires(remove_cvref_t<_Range>& d) { + d.operator _VSTD::basic_string_view<_CharT, _Traits>(); + }) && + (!requires { + typename remove_reference_t<_Range>::traits_type; + } || is_same_v<typename remove_reference_t<_Range>::traits_type, _Traits>) + ) + constexpr _LIBCPP_HIDE_FROM_ABI + basic_string_view(_Range&& __r) : __data(ranges::data(__r)), __size(ranges::size(__r)) {} +#endif + _LIBCPP_CONSTEXPR _LIBCPP_INLINE_VISIBILITY basic_string_view(const _CharT* __s) : __data(__s), __size(_VSTD::__char_traits_length_checked<_Traits>(__s)) {} @@ -697,6 +722,12 @@ template <contiguous_iterator _It, sized_sentinel_for<_It> _End> basic_string_view(_It, _End) -> basic_string_view<iter_value_t<_It>>; #endif + +#if _LIBCPP_STD_VER > 20 && !defined(_LIBCPP_HAS_NO_RANGES) +template <ranges::contiguous_range _Range> + basic_string_view(_Range) -> basic_string_view<ranges::range_value_t<_Range>>; +#endif + // [string.view.comparison] // operator == template<class _CharT, class _Traits> @@ -708,7 +739,9 @@ bool operator==(basic_string_view<_CharT, _Traits> __lhs, return __lhs.compare(__rhs) == 0; } -template<class _CharT, class _Traits> +// The dummy default template parameters are used to work around a MSVC issue with mangling, see VSO-409326 for details. +// This applies to the other sufficient overloads below for the other comparison operators. +template<class _CharT, class _Traits, int = 1> _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY bool operator==(basic_string_view<_CharT, _Traits> __lhs, typename common_type<basic_string_view<_CharT, _Traits> >::type __rhs) _NOEXCEPT @@ -717,7 +750,7 @@ bool operator==(basic_string_view<_CharT, _Traits> __lhs, return __lhs.compare(__rhs) == 0; } -template<class _CharT, class _Traits> +template<class _CharT, class _Traits, int = 2> _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY bool operator==(typename common_type<basic_string_view<_CharT, _Traits> >::type __lhs, basic_string_view<_CharT, _Traits> __rhs) _NOEXCEPT @@ -737,7 +770,7 @@ bool operator!=(basic_string_view<_CharT, _Traits> __lhs, basic_string_view<_Cha return __lhs.compare(__rhs) != 0; } -template<class _CharT, class _Traits> +template<class _CharT, class _Traits, int = 1> _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY bool operator!=(basic_string_view<_CharT, _Traits> __lhs, typename common_type<basic_string_view<_CharT, _Traits> >::type __rhs) _NOEXCEPT @@ -747,7 +780,7 @@ bool operator!=(basic_string_view<_CharT, _Traits> __lhs, return __lhs.compare(__rhs) != 0; } -template<class _CharT, class _Traits> +template<class _CharT, class _Traits, int = 2> _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY bool operator!=(typename common_type<basic_string_view<_CharT, _Traits> >::type __lhs, basic_string_view<_CharT, _Traits> __rhs) _NOEXCEPT @@ -766,7 +799,7 @@ bool operator<(basic_string_view<_CharT, _Traits> __lhs, basic_string_view<_Char return __lhs.compare(__rhs) < 0; } -template<class _CharT, class _Traits> +template<class _CharT, class _Traits, int = 1> _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY bool operator<(basic_string_view<_CharT, _Traits> __lhs, typename common_type<basic_string_view<_CharT, _Traits> >::type __rhs) _NOEXCEPT @@ -774,7 +807,7 @@ bool operator<(basic_string_view<_CharT, _Traits> __lhs, return __lhs.compare(__rhs) < 0; } -template<class _CharT, class _Traits> +template<class _CharT, class _Traits, int = 2> _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY bool operator<(typename common_type<basic_string_view<_CharT, _Traits> >::type __lhs, basic_string_view<_CharT, _Traits> __rhs) _NOEXCEPT @@ -791,7 +824,7 @@ bool operator> (basic_string_view<_CharT, _Traits> __lhs, basic_string_view<_Cha return __lhs.compare(__rhs) > 0; } -template<class _CharT, class _Traits> +template<class _CharT, class _Traits, int = 1> _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY bool operator>(basic_string_view<_CharT, _Traits> __lhs, typename common_type<basic_string_view<_CharT, _Traits> >::type __rhs) _NOEXCEPT @@ -799,7 +832,7 @@ bool operator>(basic_string_view<_CharT, _Traits> __lhs, return __lhs.compare(__rhs) > 0; } -template<class _CharT, class _Traits> +template<class _CharT, class _Traits, int = 2> _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY bool operator>(typename common_type<basic_string_view<_CharT, _Traits> >::type __lhs, basic_string_view<_CharT, _Traits> __rhs) _NOEXCEPT @@ -816,7 +849,7 @@ bool operator<=(basic_string_view<_CharT, _Traits> __lhs, basic_string_view<_Cha return __lhs.compare(__rhs) <= 0; } -template<class _CharT, class _Traits> +template<class _CharT, class _Traits, int = 1> _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY bool operator<=(basic_string_view<_CharT, _Traits> __lhs, typename common_type<basic_string_view<_CharT, _Traits> >::type __rhs) _NOEXCEPT @@ -824,7 +857,7 @@ bool operator<=(basic_string_view<_CharT, _Traits> __lhs, return __lhs.compare(__rhs) <= 0; } -template<class _CharT, class _Traits> +template<class _CharT, class _Traits, int = 2> _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY bool operator<=(typename common_type<basic_string_view<_CharT, _Traits> >::type __lhs, basic_string_view<_CharT, _Traits> __rhs) _NOEXCEPT @@ -842,7 +875,7 @@ bool operator>=(basic_string_view<_CharT, _Traits> __lhs, basic_string_view<_Cha } -template<class _CharT, class _Traits> +template<class _CharT, class _Traits, int = 1> _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY bool operator>=(basic_string_view<_CharT, _Traits> __lhs, typename common_type<basic_string_view<_CharT, _Traits> >::type __rhs) _NOEXCEPT @@ -850,7 +883,7 @@ bool operator>=(basic_string_view<_CharT, _Traits> __lhs, return __lhs.compare(__rhs) >= 0; } -template<class _CharT, class _Traits> +template<class _CharT, class _Traits, int = 2> _LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY bool operator>=(typename common_type<basic_string_view<_CharT, _Traits> >::type __lhs, basic_string_view<_CharT, _Traits> __rhs) _NOEXCEPT diff --git a/libcxx/include/type_traits b/libcxx/include/type_traits index e9d5e06f36dc..bfb6fcb05134 100644 --- a/libcxx/include/type_traits +++ b/libcxx/include/type_traits @@ -1416,9 +1416,7 @@ template<class _Tp> using type_identity_t = typename type_identity<_Tp>::type; // is_signed -// Before Clang 10, __is_signed didn't work for floating-point types or enums. -#if __has_keyword(__is_signed) && \ - !(defined(_LIBCPP_CLANG_VER) && _LIBCPP_CLANG_VER < 1000) +#if __has_keyword(__is_signed) template<class _Tp> struct _LIBCPP_TEMPLATE_VIS is_signed : _BoolConstant<__is_signed(_Tp)> { }; diff --git a/libcxx/include/utility b/libcxx/include/utility index 2b3c4dfa3f0e..4fa90289a412 100644 --- a/libcxx/include/utility +++ b/libcxx/include/utility @@ -227,6 +227,7 @@ template <class T> #include <__utility/move.h> #include <__utility/pair.h> #include <__utility/piecewise_construct.h> +#include <__utility/priority_tag.h> #include <__utility/rel_ops.h> #include <__utility/swap.h> #include <__utility/to_underlying.h> diff --git a/libcxx/include/vector b/libcxx/include/vector index e41afbaca509..9b0092cfdbd9 100644 --- a/libcxx/include/vector +++ b/libcxx/include/vector @@ -350,23 +350,23 @@ class _LIBCPP_TEMPLATE_VIS vector : private __vector_base<_Tp, _Allocator> { private: - typedef __vector_base<_Tp, _Allocator> __base; - typedef allocator<_Tp> __default_allocator_type; + typedef __vector_base<_Tp, _Allocator> __base; + typedef allocator<_Tp> __default_allocator_type; public: - typedef vector __self; - typedef _Tp value_type; - typedef _Allocator allocator_type; - typedef allocator_traits<allocator_type> __alloc_traits; - typedef value_type& reference; - typedef const value_type& const_reference; - typedef typename __allocator_traits<allocator_type>::size_type size_type; - typedef typename __alloc_traits::difference_type difference_type; - typedef typename __alloc_traits::pointer pointer; - typedef typename __alloc_traits::const_pointer const_pointer; - typedef __wrap_iter<pointer> iterator; - typedef __wrap_iter<const_pointer> const_iterator; - typedef _VSTD::reverse_iterator<iterator> reverse_iterator; - typedef _VSTD::reverse_iterator<const_iterator> const_reverse_iterator; + typedef vector __self; + typedef _Tp value_type; + typedef _Allocator allocator_type; + typedef allocator_traits<allocator_type> __alloc_traits; + typedef value_type& reference; + typedef const value_type& const_reference; + typedef typename __alloc_traits::size_type size_type; + typedef typename __alloc_traits::difference_type difference_type; + typedef typename __alloc_traits::pointer pointer; + typedef typename __alloc_traits::const_pointer const_pointer; + typedef __wrap_iter<pointer> iterator; + typedef __wrap_iter<const_pointer> const_iterator; + typedef _VSTD::reverse_iterator<iterator> reverse_iterator; + typedef _VSTD::reverse_iterator<const_iterator> const_reverse_iterator; static_assert((is_same<typename allocator_type::value_type, value_type>::value), "Allocator::value_type must be same type as value_type"); @@ -395,7 +395,21 @@ public: explicit vector(size_type __n, const allocator_type& __a); #endif vector(size_type __n, const value_type& __x); - vector(size_type __n, const value_type& __x, const allocator_type& __a); + + template <class = __enable_if_t<__is_allocator<_Allocator>::value> > + vector(size_type __n, const value_type& __x, const allocator_type& __a) + : __base(__a) + { +#if _LIBCPP_DEBUG_LEVEL == 2 + __get_db()->__insert_c(this); +#endif + if (__n > 0) + { + __vallocate(__n); + __construct_at_end(__n, __x); + } + } + template <class _InputIterator> vector(_InputIterator __first, typename enable_if<__is_cpp17_input_iterator <_InputIterator>::value && @@ -1127,20 +1141,6 @@ vector<_Tp, _Allocator>::vector(size_type __n, const value_type& __x) } template <class _Tp, class _Allocator> -vector<_Tp, _Allocator>::vector(size_type __n, const value_type& __x, const allocator_type& __a) - : __base(__a) -{ -#if _LIBCPP_DEBUG_LEVEL == 2 - __get_db()->__insert_c(this); -#endif - if (__n > 0) - { - __vallocate(__n); - __construct_at_end(__n, __x); - } -} - -template <class _Tp, class _Allocator> template <class _InputIterator> vector<_Tp, _Allocator>::vector(_InputIterator __first, typename enable_if<__is_cpp17_input_iterator <_InputIterator>::value && diff --git a/libcxx/include/version b/libcxx/include/version index 7c16ac85e430..9322c3b8c05d 100644 --- a/libcxx/include/version +++ b/libcxx/include/version @@ -41,6 +41,7 @@ __cpp_lib_bool_constant 201505L <type_traits> __cpp_lib_bounded_array_traits 201902L <type_traits> __cpp_lib_boyer_moore_searcher 201603L <functional> __cpp_lib_byte 201603L <cstddef> +__cpp_lib_byteswap 202110L <bit> __cpp_lib_char8_t 201811L <atomic> <filesystem> <istream> <limits> <locale> <ostream> <string> <string_view> @@ -72,7 +73,7 @@ __cpp_lib_exchange_function 201304L <utility> __cpp_lib_execution 201902L <execution> 201603L // C++17 __cpp_lib_filesystem 201703L <filesystem> -__cpp_lib_format 201907L <format> +__cpp_lib_format 202106L <format> __cpp_lib_gcd_lcm 201606L <numeric> __cpp_lib_generic_associative_lookup 201304L <map> <set> __cpp_lib_generic_unordered_lookup 201811L <unordered_map> <unordered_set> @@ -300,7 +301,7 @@ __cpp_lib_void_t 201411L <type_traits> # undef __cpp_lib_execution // # define __cpp_lib_execution 201902L # if !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_format) -// # define __cpp_lib_format 201907L +// # define __cpp_lib_format 202106L # endif # define __cpp_lib_generic_unordered_lookup 201811L # define __cpp_lib_int_pow2 202002L @@ -344,6 +345,7 @@ __cpp_lib_void_t 201411L <type_traits> #endif #if _LIBCPP_STD_VER > 20 +# define __cpp_lib_byteswap 202110L # define __cpp_lib_is_scoped_enum 202011L // # define __cpp_lib_stacktrace 202011L // # define __cpp_lib_stdatomic_h 202011L diff --git a/libcxx/src/filesystem/filesystem_common.h b/libcxx/src/filesystem/filesystem_common.h index 70092fe4e24d..a2c340e61083 100644 --- a/libcxx/src/filesystem/filesystem_common.h +++ b/libcxx/src/filesystem/filesystem_common.h @@ -60,7 +60,7 @@ errc __win_err_to_errc(int err); namespace { -static _LIBCPP_FORMAT_PRINTF(1, 0) string +static _LIBCPP_ATTRIBUTE_FORMAT(__printf__, 1, 0) string format_string_impl(const char* msg, va_list ap) { array<char, 256> buf; @@ -84,7 +84,7 @@ format_string_impl(const char* msg, va_list ap) { return result; } -static _LIBCPP_FORMAT_PRINTF(1, 2) string +static _LIBCPP_ATTRIBUTE_FORMAT(__printf__, 1, 2) string format_string(const char* msg, ...) { string ret; va_list ap; @@ -172,7 +172,7 @@ struct ErrorHandler { _LIBCPP_UNREACHABLE(); } - _LIBCPP_FORMAT_PRINTF(3, 0) + _LIBCPP_ATTRIBUTE_FORMAT(__printf__, 3, 0) void report_impl(const error_code& ec, const char* msg, va_list ap) const { if (ec_) { *ec_ = ec; @@ -191,7 +191,7 @@ struct ErrorHandler { _LIBCPP_UNREACHABLE(); } - _LIBCPP_FORMAT_PRINTF(3, 4) + _LIBCPP_ATTRIBUTE_FORMAT(__printf__, 3, 4) T report(const error_code& ec, const char* msg, ...) const { va_list ap; va_start(ap, msg); @@ -213,7 +213,7 @@ struct ErrorHandler { return report(make_error_code(err)); } - _LIBCPP_FORMAT_PRINTF(3, 4) + _LIBCPP_ATTRIBUTE_FORMAT(__printf__, 3, 4) T report(errc const& err, const char* msg, ...) const { va_list ap; va_start(ap, msg); diff --git a/libunwind/src/Unwind-EHABI.cpp b/libunwind/src/Unwind-EHABI.cpp index d3577c9f7cf8..5959d2a25fea 100644 --- a/libunwind/src/Unwind-EHABI.cpp +++ b/libunwind/src/Unwind-EHABI.cpp @@ -187,9 +187,14 @@ static _Unwind_Reason_Code unwindOneFrame(_Unwind_State state, if (result != _URC_CONTINUE_UNWIND) return result; - if (__unw_step(reinterpret_cast<unw_cursor_t *>(context)) != UNW_STEP_SUCCESS) + switch (__unw_step(reinterpret_cast<unw_cursor_t *>(context))) { + case UNW_STEP_SUCCESS: + return _URC_CONTINUE_UNWIND; + case UNW_STEP_END: + return _URC_END_OF_STACK; + default: return _URC_FAILURE; - return _URC_CONTINUE_UNWIND; + } } // Generates mask discriminator for _Unwind_VRS_Pop, e.g. for _UVRSC_CORE / @@ -678,12 +683,13 @@ static _Unwind_Reason_Code unwind_phase2_forced(unw_context_t *uc, unw_cursor_t *cursor, _Unwind_Exception *exception_object, _Unwind_Stop_Fn stop, void *stop_parameter) { + bool endOfStack = false; // See comment at the start of unwind_phase1 regarding VRS integrity. __unw_init_local(cursor, uc); _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_force(ex_ojb=%p)", static_cast<void *>(exception_object)); // Walk each frame until we reach where search phase said to stop - while (true) { + while (!endOfStack) { // Update info about this frame. unw_proc_info_t frameInfo; if (__unw_get_proc_info(cursor, &frameInfo) != UNW_ESUCCESS) { @@ -756,6 +762,14 @@ unwind_phase2_forced(unw_context_t *uc, unw_cursor_t *cursor, // We may get control back if landing pad calls _Unwind_Resume(). __unw_resume(cursor); break; + case _URC_END_OF_STACK: + _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): " + "personality returned " + "_URC_END_OF_STACK", + (void *)exception_object); + // Personalty routine did the step and it can't step forward. + endOfStack = true; + break; default: // Personality routine returned an unknown result code. _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): " @@ -1133,9 +1147,14 @@ extern "C" _LIBUNWIND_EXPORT _Unwind_Reason_Code __gnu_unwind_frame(_Unwind_Exception *exception_object, struct _Unwind_Context *context) { unw_cursor_t *cursor = (unw_cursor_t *)context; - if (__unw_step(cursor) != UNW_STEP_SUCCESS) + switch (__unw_step(cursor)) { + case UNW_STEP_SUCCESS: + return _URC_OK; + case UNW_STEP_END: + return _URC_END_OF_STACK; + default: return _URC_FAILURE; - return _URC_OK; + } } #endif // defined(_LIBUNWIND_ARM_EHABI) diff --git a/lld/COFF/Chunks.cpp b/lld/COFF/Chunks.cpp index 9f6dbd172509..0bff11f450d1 100644 --- a/lld/COFF/Chunks.cpp +++ b/lld/COFF/Chunks.cpp @@ -214,7 +214,8 @@ void SectionChunk::applyRelARM(uint8_t *off, uint16_t type, OutputSection *os, // the page offset from the current instruction to the target. void applyArm64Addr(uint8_t *off, uint64_t s, uint64_t p, int shift) { uint32_t orig = read32le(off); - uint64_t imm = ((orig >> 29) & 0x3) | ((orig >> 3) & 0x1FFFFC); + int64_t imm = + SignExtend64<21>(((orig >> 29) & 0x3) | ((orig >> 3) & 0x1FFFFC)); s += imm; imm = (s >> shift) - (p >> shift); uint32_t immLo = (imm & 0x3) << 29; diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp index 600d14034dea..0788f3519f4e 100644 --- a/lld/COFF/Writer.cpp +++ b/lld/COFF/Writer.cpp @@ -1211,6 +1211,12 @@ void Writer::createSymbolAndStringTable() { if (!d || d->writtenToSymtab) continue; d->writtenToSymtab = true; + if (auto *dc = dyn_cast_or_null<DefinedCOFF>(d)) { + COFFSymbolRef symRef = dc->getCOFFSymbol(); + if (symRef.isSectionDefinition() || + symRef.getStorageClass() == COFF::IMAGE_SYM_CLASS_LABEL) + continue; + } if (Optional<coff_symbol16> sym = createSymbol(d)) outputSymtab.push_back(*sym); diff --git a/lld/ELF/AArch64ErrataFix.cpp b/lld/ELF/AArch64ErrataFix.cpp index b9fd4cdbad69..741ff26a7e6c 100644 --- a/lld/ELF/AArch64ErrataFix.cpp +++ b/lld/ELF/AArch64ErrataFix.cpp @@ -630,8 +630,8 @@ bool AArch64Err843419Patcher::createFixes() { for (OutputSection *os : outputSections) { if (!(os->flags & SHF_ALLOC) || !(os->flags & SHF_EXECINSTR)) continue; - for (BaseCommand *bc : os->sectionCommands) - if (auto *isd = dyn_cast<InputSectionDescription>(bc)) { + for (SectionCommand *cmd : os->commands) + if (auto *isd = dyn_cast<InputSectionDescription>(cmd)) { std::vector<Patch843419Section *> patches = patchInputSectionDescription(*isd); if (!patches.empty()) { diff --git a/lld/ELF/ARMErrataFix.cpp b/lld/ELF/ARMErrataFix.cpp index 77623780ffa5..fe6ec09bd979 100644 --- a/lld/ELF/ARMErrataFix.cpp +++ b/lld/ELF/ARMErrataFix.cpp @@ -525,8 +525,8 @@ bool ARMErr657417Patcher::createFixes() { for (OutputSection *os : outputSections) { if (!(os->flags & SHF_ALLOC) || !(os->flags & SHF_EXECINSTR)) continue; - for (BaseCommand *bc : os->sectionCommands) - if (auto *isd = dyn_cast<InputSectionDescription>(bc)) { + for (SectionCommand *cmd : os->commands) + if (auto *isd = dyn_cast<InputSectionDescription>(cmd)) { std::vector<Patch657417Section *> patches = patchInputSectionDescription(*isd); if (!patches.empty()) { diff --git a/lld/ELF/Arch/ARM.cpp b/lld/ELF/Arch/ARM.cpp index f2e4a2a14ad6..b7c2eb74757c 100644 --- a/lld/ELF/Arch/ARM.cpp +++ b/lld/ELF/Arch/ARM.cpp @@ -140,7 +140,16 @@ RelExpr ARM::getRelExpr(RelType type, const Symbol &s, case R_ARM_THM_MOVT_PREL: return R_PC; case R_ARM_ALU_PC_G0: + case R_ARM_ALU_PC_G0_NC: + case R_ARM_ALU_PC_G1: + case R_ARM_ALU_PC_G1_NC: + case R_ARM_ALU_PC_G2: case R_ARM_LDR_PC_G0: + case R_ARM_LDR_PC_G1: + case R_ARM_LDR_PC_G2: + case R_ARM_LDRS_PC_G0: + case R_ARM_LDRS_PC_G1: + case R_ARM_LDRS_PC_G2: case R_ARM_THM_ALU_PREL_11_0: case R_ARM_THM_PC8: case R_ARM_THM_PC12: @@ -411,56 +420,83 @@ static void stateChangeWarning(uint8_t *loc, RelType relt, const Symbol &s) { } } -// Utility functions taken from ARMAddressingModes.h, only changes are LLD -// coding style. - // Rotate a 32-bit unsigned value right by a specified amt of bits. static uint32_t rotr32(uint32_t val, uint32_t amt) { assert(amt < 32 && "Invalid rotate amount"); return (val >> amt) | (val << ((32 - amt) & 31)); } -// Rotate a 32-bit unsigned value left by a specified amt of bits. -static uint32_t rotl32(uint32_t val, uint32_t amt) { - assert(amt < 32 && "Invalid rotate amount"); - return (val << amt) | (val >> ((32 - amt) & 31)); +static std::pair<uint32_t, uint32_t> getRemAndLZForGroup(unsigned group, + uint32_t val) { + uint32_t rem, lz; + do { + lz = llvm::countLeadingZeros(val) & ~1; + rem = val; + if (lz == 32) // implies rem == 0 + break; + val &= 0xffffff >> lz; + } while (group--); + return {rem, lz}; } -// Try to encode a 32-bit unsigned immediate imm with an immediate shifter -// operand, this form is an 8-bit immediate rotated right by an even number of -// bits. We compute the rotate amount to use. If this immediate value cannot be -// handled with a single shifter-op, determine a good rotate amount that will -// take a maximal chunk of bits out of the immediate. -static uint32_t getSOImmValRotate(uint32_t imm) { - // 8-bit (or less) immediates are trivially shifter_operands with a rotate - // of zero. - if ((imm & ~255U) == 0) - return 0; - - // Use CTZ to compute the rotate amount. - unsigned tz = llvm::countTrailingZeros(imm); - - // Rotate amount must be even. Something like 0x200 must be rotated 8 bits, - // not 9. - unsigned rotAmt = tz & ~1; - - // If we can handle this spread, return it. - if ((rotr32(imm, rotAmt) & ~255U) == 0) - return (32 - rotAmt) & 31; // HW rotates right, not left. +static void encodeAluGroup(uint8_t *loc, const Relocation &rel, uint64_t val, + int group, bool check) { + // ADD/SUB (immediate) add = bit23, sub = bit22 + // immediate field carries is a 12-bit modified immediate, made up of a 4-bit + // even rotate right and an 8-bit immediate. + uint32_t opcode = 0x00800000; + if (val >> 63) { + opcode = 0x00400000; + val = -val; + } + uint32_t imm, lz; + std::tie(imm, lz) = getRemAndLZForGroup(group, val); + uint32_t rot = 0; + if (lz < 24) { + imm = rotr32(imm, 24 - lz); + rot = (lz + 8) << 7; + } + if (check && imm > 0xff) + error(getErrorLocation(loc) + "unencodeable immediate " + Twine(val).str() + + " for relocation " + toString(rel.type)); + write32le(loc, (read32le(loc) & 0xff3ff000) | opcode | rot | (imm & 0xff)); +} - // For values like 0xF000000F, we should ignore the low 6 bits, then - // retry the hunt. - if (imm & 63U) { - unsigned tz2 = countTrailingZeros(imm & ~63U); - unsigned rotAmt2 = tz2 & ~1; - if ((rotr32(imm, rotAmt2) & ~255U) == 0) - return (32 - rotAmt2) & 31; // HW rotates right, not left. +static void encodeLdrGroup(uint8_t *loc, const Relocation &rel, uint64_t val, + int group) { + // R_ARM_LDR_PC_Gn is S + A - P, we have ((S + A) | T) - P, if S is a + // function then addr is 0 (modulo 2) and Pa is 0 (modulo 4) so we can clear + // bottom bit to recover S + A - P. + if (rel.sym->isFunc()) + val &= ~0x1; + // LDR (literal) u = bit23 + uint32_t opcode = 0x00800000; + if (val >> 63) { + opcode = 0x0; + val = -val; } + uint32_t imm = getRemAndLZForGroup(group, val).first; + checkUInt(loc, imm, 12, rel); + write32le(loc, (read32le(loc) & 0xff7ff000) | opcode | imm); +} - // Otherwise, we have no way to cover this span of bits with a single - // shifter_op immediate. Return a chunk of bits that will be useful to - // handle. - return (32 - rotAmt) & 31; // HW rotates right, not left. +static void encodeLdrsGroup(uint8_t *loc, const Relocation &rel, uint64_t val, + int group) { + // R_ARM_LDRS_PC_Gn is S + A - P, we have ((S + A) | T) - P, if S is a + // function then addr is 0 (modulo 2) and Pa is 0 (modulo 4) so we can clear + // bottom bit to recover S + A - P. + if (rel.sym->isFunc()) + val &= ~0x1; + // LDRD/LDRH/LDRSB/LDRSH (literal) u = bit23 + uint32_t opcode = 0x00800000; + if (val >> 63) { + opcode = 0x0; + val = -val; + } + uint32_t imm = getRemAndLZForGroup(group, val).first; + checkUInt(loc, imm, 8, rel); + write32le(loc, (read32le(loc) & 0xff7ff0f0) | opcode | ((imm & 0xf0) << 4) | + (imm & 0xf)); } void ARM::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { @@ -633,45 +669,39 @@ void ARM::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { ((val << 4) & 0x7000) | // imm3 (val & 0x00ff)); // imm8 break; - case R_ARM_ALU_PC_G0: { - // ADR (literal) add = bit23, sub = bit22 - // literal is a 12-bit modified immediate, made up of a 4-bit even rotate - // right and an 8-bit immediate. The code-sequence here is derived from - // ARMAddressingModes.h in llvm/Target/ARM/MCTargetDesc. In our case we - // want to give an error if we cannot encode the constant. - uint32_t opcode = 0x00800000; - if (val >> 63) { - opcode = 0x00400000; - val = ~val + 1; - } - if ((val & ~255U) != 0) { - uint32_t rotAmt = getSOImmValRotate(val); - // Error if we cannot encode this with a single shift - if (rotr32(~255U, rotAmt) & val) - error(getErrorLocation(loc) + "unencodeable immediate " + - Twine(val).str() + " for relocation " + toString(rel.type)); - val = rotl32(val, rotAmt) | ((rotAmt >> 1) << 8); - } - write32le(loc, (read32le(loc) & 0xff0ff000) | opcode | val); + case R_ARM_ALU_PC_G0: + encodeAluGroup(loc, rel, val, 0, true); break; - } - case R_ARM_LDR_PC_G0: { - // R_ARM_LDR_PC_G0 is S + A - P, we have ((S + A) | T) - P, if S is a - // function then addr is 0 (modulo 2) and Pa is 0 (modulo 4) so we can clear - // bottom bit to recover S + A - P. - if (rel.sym->isFunc()) - val &= ~0x1; - // LDR (literal) u = bit23 - int64_t imm = val; - uint32_t u = 0x00800000; - if (imm < 0) { - imm = -imm; - u = 0; - } - checkUInt(loc, imm, 12, rel); - write32le(loc, (read32le(loc) & 0xff7ff000) | u | imm); + case R_ARM_ALU_PC_G0_NC: + encodeAluGroup(loc, rel, val, 0, false); + break; + case R_ARM_ALU_PC_G1: + encodeAluGroup(loc, rel, val, 1, true); + break; + case R_ARM_ALU_PC_G1_NC: + encodeAluGroup(loc, rel, val, 1, false); + break; + case R_ARM_ALU_PC_G2: + encodeAluGroup(loc, rel, val, 2, true); + break; + case R_ARM_LDR_PC_G0: + encodeLdrGroup(loc, rel, val, 0); + break; + case R_ARM_LDR_PC_G1: + encodeLdrGroup(loc, rel, val, 1); + break; + case R_ARM_LDR_PC_G2: + encodeLdrGroup(loc, rel, val, 2); + break; + case R_ARM_LDRS_PC_G0: + encodeLdrsGroup(loc, rel, val, 0); + break; + case R_ARM_LDRS_PC_G1: + encodeLdrsGroup(loc, rel, val, 1); + break; + case R_ARM_LDRS_PC_G2: + encodeLdrsGroup(loc, rel, val, 2); break; - } case R_ARM_THM_ALU_PREL_11_0: { // ADR encoding T2 (sub), T3 (add) i:imm3:imm8 int64_t imm = val; @@ -816,7 +846,11 @@ int64_t ARM::getImplicitAddend(const uint8_t *buf, RelType type) const { ((lo & 0x7000) >> 4) | // imm3 (lo & 0x00ff)); // imm8 } - case R_ARM_ALU_PC_G0: { + case R_ARM_ALU_PC_G0: + case R_ARM_ALU_PC_G0_NC: + case R_ARM_ALU_PC_G1: + case R_ARM_ALU_PC_G1_NC: + case R_ARM_ALU_PC_G2: { // 12-bit immediate is a modified immediate made up of a 4-bit even // right rotation and 8-bit constant. After the rotation the value // is zero-extended. When bit 23 is set the instruction is an add, when @@ -825,13 +859,25 @@ int64_t ARM::getImplicitAddend(const uint8_t *buf, RelType type) const { uint32_t val = rotr32(instr & 0xff, ((instr & 0xf00) >> 8) * 2); return (instr & 0x00400000) ? -val : val; } - case R_ARM_LDR_PC_G0: { + case R_ARM_LDR_PC_G0: + case R_ARM_LDR_PC_G1: + case R_ARM_LDR_PC_G2: { // ADR (literal) add = bit23, sub = bit22 // LDR (literal) u = bit23 unsigned imm12 bool u = read32le(buf) & 0x00800000; uint32_t imm12 = read32le(buf) & 0xfff; return u ? imm12 : -imm12; } + case R_ARM_LDRS_PC_G0: + case R_ARM_LDRS_PC_G1: + case R_ARM_LDRS_PC_G2: { + // LDRD/LDRH/LDRSB/LDRSH (literal) u = bit23 unsigned imm8 + uint32_t opcode = read32le(buf); + bool u = opcode & 0x00800000; + uint32_t imm4l = opcode & 0xf; + uint32_t imm4h = (opcode & 0xf00) >> 4; + return u ? (imm4h | imm4l) : -(imm4h | imm4l); + } case R_ARM_THM_ALU_PREL_11_0: { // Thumb2 ADR, which is an alias for a sub or add instruction with an // unsigned immediate. diff --git a/lld/ELF/Arch/Hexagon.cpp b/lld/ELF/Arch/Hexagon.cpp index 300ca675519f..c33bd935f363 100644 --- a/lld/ELF/Arch/Hexagon.cpp +++ b/lld/ELF/Arch/Hexagon.cpp @@ -146,7 +146,7 @@ RelExpr Hexagon::getRelExpr(RelType type, const Symbol &s, case R_HEX_IE_GOT_32_6_X: case R_HEX_IE_GOT_HI16: case R_HEX_IE_GOT_LO16: - config->hasStaticTlsModel = true; + config->hasTlsIe = true; return R_GOTPLT; case R_HEX_TPREL_11_X: case R_HEX_TPREL_16: diff --git a/lld/ELF/Arch/RISCV.cpp b/lld/ELF/Arch/RISCV.cpp index 5ee9e4185f1a..a0ea403e241d 100644 --- a/lld/ELF/Arch/RISCV.cpp +++ b/lld/ELF/Arch/RISCV.cpp @@ -261,7 +261,7 @@ RelExpr RISCV::getRelExpr(const RelType type, const Symbol &s, case R_RISCV_TLS_GD_HI20: return R_TLSGD_PC; case R_RISCV_TLS_GOT_HI20: - config->hasStaticTlsModel = true; + config->hasTlsIe = true; return R_GOT_PC; case R_RISCV_TPREL_HI20: case R_RISCV_TPREL_LO12_I: diff --git a/lld/ELF/Arch/X86.cpp b/lld/ELF/Arch/X86.cpp index 5d34b769e80e..2560dc883257 100644 --- a/lld/ELF/Arch/X86.cpp +++ b/lld/ELF/Arch/X86.cpp @@ -78,13 +78,8 @@ int X86::getTlsGdRelaxSkip(RelType type) const { RelExpr X86::getRelExpr(RelType type, const Symbol &s, const uint8_t *loc) const { - // There are 4 different TLS variable models with varying degrees of - // flexibility and performance. LocalExec and InitialExec models are fast but - // less-flexible models. If they are in use, we set DF_STATIC_TLS flag in the - // dynamic section to let runtime know about that. - if (type == R_386_TLS_LE || type == R_386_TLS_LE_32 || type == R_386_TLS_IE || - type == R_386_TLS_GOTIE) - config->hasStaticTlsModel = true; + if (type == R_386_TLS_IE || type == R_386_TLS_GOTIE) + config->hasTlsIe = true; switch (type) { case R_386_8: diff --git a/lld/ELF/Arch/X86_64.cpp b/lld/ELF/Arch/X86_64.cpp index 40436752399b..614b5ed59218 100644 --- a/lld/ELF/Arch/X86_64.cpp +++ b/lld/ELF/Arch/X86_64.cpp @@ -99,7 +99,11 @@ X86_64::X86_64() { defaultImageBase = 0x200000; } -int X86_64::getTlsGdRelaxSkip(RelType type) const { return 2; } +int X86_64::getTlsGdRelaxSkip(RelType type) const { + // TLSDESC relocations are processed separately. See relaxTlsGdToLe below. + return type == R_X86_64_GOTPC32_TLSDESC || type == R_X86_64_TLSDESC_CALL ? 1 + : 2; +} // Opcodes for the different X86_64 jmp instructions. enum JmpInsnOpcode : uint32_t { @@ -314,7 +318,7 @@ bool X86_64::deleteFallThruJmpInsn(InputSection &is, InputFile *file, RelExpr X86_64::getRelExpr(RelType type, const Symbol &s, const uint8_t *loc) const { if (type == R_X86_64_GOTTPOFF) - config->hasStaticTlsModel = true; + config->hasTlsIe = true; switch (type) { case R_X86_64_8: @@ -443,24 +447,24 @@ void X86_64::relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, // The original code used a pc relative relocation and so we have to // compensate for the -4 in had in the addend. write32le(loc + 8, val + 4); - } else { - // Convert - // lea x@tlsgd(%rip), %rax - // call *(%rax) - // to the following two instructions. - assert(rel.type == R_X86_64_GOTPC32_TLSDESC); - if (memcmp(loc - 3, "\x48\x8d\x05", 3)) { - error(getErrorLocation(loc - 3) + "R_X86_64_GOTPC32_TLSDESC must be used " - "in callq *x@tlsdesc(%rip), %rax"); + } else if (rel.type == R_X86_64_GOTPC32_TLSDESC) { + // Convert leaq x@tlsdesc(%rip), %REG to movq $x@tpoff, %REG. + if ((loc[-3] & 0xfb) != 0x48 || loc[-2] != 0x8d || + (loc[-1] & 0xc7) != 0x05) { + errorOrWarn(getErrorLocation(loc - 3) + + "R_X86_64_GOTPC32_TLSDESC must be used " + "in leaq x@tlsdesc(%rip), %REG"); return; } - // movq $x@tpoff(%rip),%rax + loc[-3] = 0x48 | ((loc[-3] >> 2) & 1); loc[-2] = 0xc7; - loc[-1] = 0xc0; + loc[-1] = 0xc0 | ((loc[-1] >> 3) & 7); write32le(loc, val + 4); - // xchg ax,ax - loc[4] = 0x66; - loc[5] = 0x90; + } else { + // Convert call *x@tlsdesc(%REG) to xchg ax, ax. + assert(rel.type == R_X86_64_TLSDESC_CALL); + loc[0] = 0x66; + loc[1] = 0x90; } } @@ -484,23 +488,23 @@ void X86_64::relaxTlsGdToIe(uint8_t *loc, const Relocation &rel, // Both code sequences are PC relatives, but since we are moving the // constant forward by 8 bytes we have to subtract the value by 8. write32le(loc + 8, val - 8); - } else { - // Convert - // lea x@tlsgd(%rip), %rax - // call *(%rax) - // to the following two instructions. + } else if (rel.type == R_X86_64_GOTPC32_TLSDESC) { + // Convert leaq x@tlsdesc(%rip), %REG to movq x@gottpoff(%rip), %REG. assert(rel.type == R_X86_64_GOTPC32_TLSDESC); - if (memcmp(loc - 3, "\x48\x8d\x05", 3)) { - error(getErrorLocation(loc - 3) + "R_X86_64_GOTPC32_TLSDESC must be used " - "in callq *x@tlsdesc(%rip), %rax"); + if ((loc[-3] & 0xfb) != 0x48 || loc[-2] != 0x8d || + (loc[-1] & 0xc7) != 0x05) { + errorOrWarn(getErrorLocation(loc - 3) + + "R_X86_64_GOTPC32_TLSDESC must be used " + "in leaq x@tlsdesc(%rip), %REG"); return; } - // movq x@gottpoff(%rip),%rax loc[-2] = 0x8b; write32le(loc, val); - // xchg ax,ax - loc[4] = 0x66; - loc[5] = 0x90; + } else { + // Convert call *x@tlsdesc(%rax) to xchg ax, ax. + assert(rel.type == R_X86_64_TLSDESC_CALL); + loc[0] = 0x66; + loc[1] = 0x90; } } diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h index 79c4fe06d7b2..c660a8e67c21 100644 --- a/lld/ELF/Config.h +++ b/lld/ELF/Config.h @@ -261,7 +261,7 @@ struct Configuration { UnresolvedPolicy unresolvedSymbols; UnresolvedPolicy unresolvedSymbolsInShlib; Target2Policy target2; - bool Power10Stub; + bool power10Stubs; ARMVFPArgKind armVFPArgs = ARMVFPArgKind::Default; BuildIdKind buildId = BuildIdKind::None; SeparateSegmentKind zSeparate; @@ -309,19 +309,10 @@ struct Configuration { // if that's true.) bool isMips64EL; - // True if we need to set the DF_STATIC_TLS flag to an output file, - // which works as a hint to the dynamic loader that the file contains - // code compiled with the static TLS model. The thread-local variable - // compiled with the static TLS model is faster but less flexible, and - // it may not be loaded using dlopen(). - // - // We set this flag to true when we see a relocation for the static TLS - // model. Once this becomes true, it will never become false. - // - // Since the flag is updated by multi-threaded code, we use std::atomic. - // (Writing to a variable is not considered thread-safe even if the - // variable is boolean and we always set the same value from all threads.) - std::atomic<bool> hasStaticTlsModel{false}; + // True if we need to set the DF_STATIC_TLS flag to an output file, which + // works as a hint to the dynamic loader that the shared object contains code + // compiled with the initial-exec TLS model. + bool hasTlsIe = false; // Holds set of ELF header flags for the target. uint32_t eflags = 0; diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index 9fac04558c46..1376e6c2c253 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -460,19 +460,21 @@ static bool isKnownZFlag(StringRef s) { s.startswith("start-stop-visibility="); } -// Report an error for an unknown -z option. +// Report a warning for an unknown -z option. static void checkZOptions(opt::InputArgList &args) { for (auto *arg : args.filtered(OPT_z)) if (!isKnownZFlag(arg->getValue())) - error("unknown -z value: " + StringRef(arg->getValue())); + warn("unknown -z value: " + StringRef(arg->getValue())); } void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) { ELFOptTable parser; opt::InputArgList args = parser.parse(argsArr.slice(1)); - // Interpret this flag early because error() depends on them. + // Interpret the flags early because error()/warn() depend on them. errorHandler().errorLimit = args::getInteger(args, OPT_error_limit, 20); + errorHandler().fatalWarnings = + args.hasFlag(OPT_fatal_warnings, OPT_no_fatal_warnings, false); checkZOptions(args); // Handle -help @@ -750,20 +752,6 @@ static OrphanHandlingPolicy getOrphanHandling(opt::InputArgList &args) { return OrphanHandlingPolicy::Place; } -// Parses --power10-stubs= flags, to disable or enable Power 10 -// instructions in stubs. -static bool getP10StubOpt(opt::InputArgList &args) { - - if (args.getLastArgValue(OPT_power10_stubs_eq)== "no") - return false; - - if (!args.hasArg(OPT_power10_stubs_eq) && - args.hasArg(OPT_no_power10_stubs)) - return false; - - return true; -} - // Parse --build-id or --build-id=<style>. We handle "tree" as a // synonym for "sha1" because all our hash functions including // --build-id=sha1 are actually tree hashes for performance reasons. @@ -985,8 +973,6 @@ static void parseClangOption(StringRef opt, const Twine &msg) { // Initializes Config members by the command line options. static void readConfigs(opt::InputArgList &args) { errorHandler().verbose = args.hasArg(OPT_verbose); - errorHandler().fatalWarnings = - args.hasFlag(OPT_fatal_warnings, OPT_no_fatal_warnings, false); errorHandler().vsDiagnostics = args.hasArg(OPT_visual_studio_diagnostics_format, false); @@ -1190,7 +1176,7 @@ static void readConfigs(opt::InputArgList &args) { config->zText = getZFlag(args, "text", "notext", true); config->zWxneeded = hasZOption(args, "wxneeded"); setUnresolvedSymbolPolicy(args); - config->Power10Stub = getP10StubOpt(args); + config->power10Stubs = args.getLastArgValue(OPT_power10_stubs_eq) != "no"; if (opt::Arg *arg = args.getLastArg(OPT_eb, OPT_el)) { if (arg->getOption().matches(OPT_eb)) @@ -1691,7 +1677,7 @@ static void handleUndefined(Symbol *sym, const char *option) { if (!sym->isLazy()) return; - sym->fetch(); + sym->extract(); if (!config->whyExtract.empty()) whyExtract.emplace_back(option, sym->file, *sym); } @@ -1706,14 +1692,12 @@ static void handleUndefinedGlob(StringRef arg) { return; } + // Calling sym->extract() in the loop is not safe because it may add new + // symbols to the symbol table, invalidating the current iterator. std::vector<Symbol *> syms; - for (Symbol *sym : symtab->symbols()) { - // Calling Sym->fetch() from here is not safe because it may - // add new symbols to the symbol table, invalidating the - // current iterator. So we just keep a note. + for (Symbol *sym : symtab->symbols()) if (pat->match(sym->getName())) syms.push_back(sym); - } for (Symbol *sym : syms) handleUndefined(sym, "--undefined-glob"); @@ -1731,7 +1715,7 @@ static void handleLibcall(StringRef name) { mb = cast<LazyArchive>(sym)->getMemberBuffer(); if (isBitcode(mb)) - sym->fetch(); + sym->extract(); } // Handle --dependency-file=<path>. If that option is given, lld creates a @@ -2207,7 +2191,7 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &args) { symtab->insert(arg->getValue())->traced = true; // Handle -u/--undefined before input files. If both a.a and b.so define foo, - // -u foo a.a b.so will fetch a.a. + // -u foo a.a b.so will extract a.a. for (StringRef name : config->undefined) addUnusedUndefined(name)->referenced = true; @@ -2297,7 +2281,6 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &args) { // Create elfHeader early. We need a dummy section in // addReservedSymbols to mark the created symbols as not absolute. Out::elfHeader = make<OutputSection>("", 0, SHF_ALLOC); - Out::elfHeader->size = sizeof(typename ELFT::Ehdr); std::vector<WrappedSymbol> wrapped = addWrappedSymbols(args); @@ -2476,8 +2459,8 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &args) { // merging MergeInputSections into a single MergeSyntheticSection. From this // point onwards InputSectionDescription::sections should be used instead of // sectionBases. - for (BaseCommand *base : script->sectionCommands) - if (auto *sec = dyn_cast<OutputSection>(base)) + for (SectionCommand *cmd : script->sectionCommands) + if (auto *sec = dyn_cast<OutputSection>(cmd)) sec->finalizeInputSections(); llvm::erase_if(inputSections, [](InputSectionBase *s) { return isa<MergeInputSection>(s); diff --git a/lld/ELF/ICF.cpp b/lld/ELF/ICF.cpp index c13969806916..0ec748e8f990 100644 --- a/lld/ELF/ICF.cpp +++ b/lld/ELF/ICF.cpp @@ -552,10 +552,10 @@ template <class ELFT> void ICF<ELFT>::run() { // InputSectionDescription::sections is populated by processSectionCommands(). // ICF may fold some input sections assigned to output sections. Remove them. - for (BaseCommand *base : script->sectionCommands) - if (auto *sec = dyn_cast<OutputSection>(base)) - for (BaseCommand *sub_base : sec->sectionCommands) - if (auto *isd = dyn_cast<InputSectionDescription>(sub_base)) + for (SectionCommand *cmd : script->sectionCommands) + if (auto *sec = dyn_cast<OutputSection>(cmd)) + for (SectionCommand *subCmd : sec->commands) + if (auto *isd = dyn_cast<InputSectionDescription>(subCmd)) llvm::erase_if(isd->sections, [](InputSection *isec) { return !isec->isLive(); }); } diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp index e8a4188ec775..031a8679db41 100644 --- a/lld/ELF/InputFiles.cpp +++ b/lld/ELF/InputFiles.cpp @@ -395,16 +395,6 @@ uint32_t ObjFile<ELFT>::getSectionIndex(const Elf_Sym &sym) const { this); } -template <class ELFT> ArrayRef<Symbol *> ObjFile<ELFT>::getLocalSymbols() { - if (this->symbols.empty()) - return {}; - return makeArrayRef(this->symbols).slice(1, this->firstGlobal - 1); -} - -template <class ELFT> ArrayRef<Symbol *> ObjFile<ELFT>::getGlobalSymbols() { - return makeArrayRef(this->symbols).slice(this->firstGlobal); -} - template <class ELFT> void ObjFile<ELFT>::parse(bool ignoreComdats) { // Read a section table. justSymbols is usually false. if (this->justSymbols) @@ -966,7 +956,7 @@ InputSectionBase *ObjFile<ELFT>::createInputSection(uint32_t idx, // `nullptr` for the normal case. However, if -r or --emit-relocs is // specified, we need to copy them to the output. (Some post link analysis // tools specify --emit-relocs to obtain the information.) - if (!config->relocatable && !config->emitRelocs) + if (!config->copyRelocs) return nullptr; InputSection *relocSec = make<InputSection>(*this, sec, name); // If the relocated section is discarded (due to /DISCARD/ or @@ -1035,12 +1025,11 @@ InputSectionBase *ObjFile<ELFT>::createInputSection(uint32_t idx, name == ".gnu.linkonce.t.__i686.get_pc_thunk.bx") return &InputSection::discarded; - // If we are creating a new .build-id section, strip existing .build-id - // sections so that the output won't have more than one .build-id. - // This is not usually a problem because input object files normally don't - // have .build-id sections, but you can create such files by - // "ld.{bfd,gold,lld} -r --build-id", and we want to guard against it. - if (name == ".note.gnu.build-id" && config->buildId != BuildIdKind::None) + // Strip existing .note.gnu.build-id sections so that the output won't have + // more than one build-id. This is not usually a problem because input object + // files normally don't have .build-id sections, but you can create such files + // by "ld.{bfd,gold,lld} -r --build-id", and we want to guard against it. + if (name == ".note.gnu.build-id") return &InputSection::discarded; // The linker merges EH (exception handling) frames and creates a @@ -1147,17 +1136,20 @@ template <class ELFT> void ObjFile<ELFT>::initializeSymbols() { if (sec == &InputSection::discarded) { Undefined und{this, name, binding, stOther, type, secIdx}; Symbol *sym = this->symbols[i]; - // !ArchiveFile::parsed or LazyObjFile::fetched means that the file + // !ArchiveFile::parsed or LazyObjFile::extracted means that the file // containing this object has not finished processing, i.e. this symbol is - // a result of a lazy symbol fetch. We should demote the lazy symbol to an - // Undefined so that any relocations outside of the group to it will + // a result of a lazy symbol extract. We should demote the lazy symbol to + // an Undefined so that any relocations outside of the group to it will // trigger a discarded section error. if ((sym->symbolKind == Symbol::LazyArchiveKind && !cast<ArchiveFile>(sym->file)->parsed) || (sym->symbolKind == Symbol::LazyObjectKind && - cast<LazyObjFile>(sym->file)->fetched)) + cast<LazyObjFile>(sym->file)->extracted)) { sym->replace(und); - else + // Prevent LTO from internalizing the symbol in case there is a + // reference to this symbol from this file. + sym->isUsedInRegularObj = true; + } else sym->resolve(und); continue; } @@ -1174,7 +1166,7 @@ template <class ELFT> void ObjFile<ELFT>::initializeSymbols() { } // Undefined symbols (excluding those defined relative to non-prevailing - // sections) can trigger recursive fetch. Process defined symbols first so + // sections) can trigger recursive extract. Process defined symbols first so // that the relative order between a defined symbol and an undefined symbol // does not change the symbol resolution behavior. In addition, a set of // interconnected symbols will all be resolved to the same file, instead of @@ -1202,7 +1194,7 @@ void ArchiveFile::parse() { } // Returns a buffer pointing to a member file containing a given symbol. -void ArchiveFile::fetch(const Archive::Symbol &sym) { +void ArchiveFile::extract(const Archive::Symbol &sym) { Archive::Child c = CHECK(sym.getMember(), toString(this) + ": could not get the member for symbol " + @@ -1291,7 +1283,7 @@ static bool isNonCommonDef(MemoryBufferRef mb, StringRef symName, } } -bool ArchiveFile::shouldFetchForCommon(const Archive::Symbol &sym) { +bool ArchiveFile::shouldExtractForCommon(const Archive::Symbol &sym) { Archive::Child c = CHECK(sym.getMember(), toString(this) + ": could not get the member for symbol " + @@ -1779,10 +1771,10 @@ InputFile *elf::createObjectFile(MemoryBufferRef mb, StringRef archiveName, } } -void LazyObjFile::fetch() { - if (fetched) +void LazyObjFile::extract() { + if (extracted) return; - fetched = true; + extracted = true; InputFile *file = createObjectFile(mb, archiveName, offsetInArchive); file->groupId = groupId; @@ -1835,7 +1827,7 @@ template <class ELFT> void LazyObjFile::parse() { // Replace existing symbols with LazyObject symbols. // - // resolve() may trigger this->fetch() if an existing symbol is an + // resolve() may trigger this->extract() if an existing symbol is an // undefined symbol. If that happens, this LazyObjFile has served // its purpose, and we can exit from the loop early. for (Symbol *sym : this->symbols) { @@ -1843,16 +1835,16 @@ template <class ELFT> void LazyObjFile::parse() { continue; sym->resolve(LazyObject{*this, sym->getName()}); - // If fetched, stop iterating because this->symbols has been transferred + // If extracted, stop iterating because this->symbols has been transferred // to the instantiated ObjFile. - if (fetched) + if (extracted) return; } return; } } -bool LazyObjFile::shouldFetchForCommon(const StringRef &name) { +bool LazyObjFile::shouldExtractForCommon(const StringRef &name) { if (isBitcode(mb)) return isBitcodeNonCommonDef(mb, name, archiveName); diff --git a/lld/ELF/InputFiles.h b/lld/ELF/InputFiles.h index fb4d46b43f35..5bbfb7656e47 100644 --- a/lld/ELF/InputFiles.h +++ b/lld/ELF/InputFiles.h @@ -168,6 +168,15 @@ public: StringRef getStringTable() const { return stringTable; } + ArrayRef<Symbol *> getLocalSymbols() { + if (symbols.empty()) + return {}; + return llvm::makeArrayRef(symbols).slice(1, firstGlobal - 1); + } + ArrayRef<Symbol *> getGlobalSymbols() { + return llvm::makeArrayRef(symbols).slice(firstGlobal); + } + template <typename ELFT> typename ELFT::SymRange getELFSyms() const { return typename ELFT::SymRange( reinterpret_cast<const typename ELFT::Sym *>(elfSyms), numELFSyms); @@ -197,9 +206,6 @@ public: return this->ELFFileBase::getObj<ELFT>(); } - ArrayRef<Symbol *> getLocalSymbols(); - ArrayRef<Symbol *> getGlobalSymbols(); - ObjFile(MemoryBufferRef m, StringRef archiveName) : ELFFileBase(ObjKind, m) { this->archiveName = std::string(archiveName); } @@ -306,13 +312,13 @@ public: static bool classof(const InputFile *f) { return f->kind() == LazyObjKind; } template <class ELFT> void parse(); - void fetch(); + void extract(); - // Check if a non-common symbol should be fetched to override a common + // Check if a non-common symbol should be extracted to override a common // definition. - bool shouldFetchForCommon(const StringRef &name); + bool shouldExtractForCommon(const StringRef &name); - bool fetched = false; + bool extracted = false; private: uint64_t offsetInArchive; @@ -329,14 +335,14 @@ public: // returns it. If the same file was instantiated before, this // function does nothing (so we don't instantiate the same file // more than once.) - void fetch(const Archive::Symbol &sym); + void extract(const Archive::Symbol &sym); - // Check if a non-common symbol should be fetched to override a common + // Check if a non-common symbol should be extracted to override a common // definition. - bool shouldFetchForCommon(const Archive::Symbol &sym); + bool shouldExtractForCommon(const Archive::Symbol &sym); size_t getMemberCount() const; - size_t getFetchedMemberCount() const { return seen.size(); } + size_t getExtractedMemberCount() const { return seen.size(); } bool parsed = false; diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp index 74d4dd309c79..4d5bd1f1e5f2 100644 --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -187,7 +187,7 @@ uint64_t SectionBase::getOffset(uint64_t offset) const { } case Regular: case Synthetic: - return cast<InputSection>(this)->getOffset(offset); + return cast<InputSection>(this)->outSecOff + offset; case EHFrame: // The file crtbeginT.o has relocations pointing to the start of an empty // .eh_frame that is known to be the first in the link. It does that to @@ -196,7 +196,7 @@ uint64_t SectionBase::getOffset(uint64_t offset) const { case Merge: const MergeInputSection *ms = cast<MergeInputSection>(this); if (InputSection *isec = ms->getParent()) - return isec->getOffset(ms->getParentOffset(offset)); + return isec->outSecOff + ms->getParentOffset(offset); return ms->getParentOffset(offset); } llvm_unreachable("invalid section kind"); diff --git a/lld/ELF/InputSection.h b/lld/ELF/InputSection.h index 4bd1f410e388..7ddc43916a0f 100644 --- a/lld/ELF/InputSection.h +++ b/lld/ELF/InputSection.h @@ -78,7 +78,7 @@ public: // These corresponds to the fields in Elf_Shdr. uint32_t alignment; uint64_t flags; - uint64_t entsize; + uint32_t entsize; uint32_t type; uint32_t link; uint32_t info; @@ -99,9 +99,9 @@ public: void markDead() { partition = 0; } protected: - SectionBase(Kind sectionKind, StringRef name, uint64_t flags, - uint64_t entsize, uint64_t alignment, uint32_t type, - uint32_t info, uint32_t link) + constexpr SectionBase(Kind sectionKind, StringRef name, uint64_t flags, + uint32_t entsize, uint32_t alignment, uint32_t type, + uint32_t info, uint32_t link) : name(name), repl(this), sectionKind(sectionKind), bss(false), keepUnique(false), partition(0), alignment(alignment), flags(flags), entsize(entsize), type(type), link(link), info(info) {} @@ -121,14 +121,14 @@ public: static bool classof(const SectionBase *s) { return s->kind() != Output; } - // Section index of the relocation section if exists. - uint32_t relSecIdx = 0; - // The file which contains this section. Its dynamic type is always // ObjFile<ELFT>, but in order to avoid ELFT, we use InputFile as // its static type. InputFile *file; + // Section index of the relocation section if exists. + uint32_t relSecIdx = 0; + template <class ELFT> ObjFile<ELFT> *getFile() const { return cast_or_null<ObjFile<ELFT>>(file); } @@ -352,8 +352,6 @@ public: // beginning of the output section. template <class ELFT> void writeTo(uint8_t *buf); - uint64_t getOffset(uint64_t offset) const { return outSecOff + offset; } - OutputSection *getParent() const; // This variable has two usages. Initially, it represents an index in the diff --git a/lld/ELF/LTO.cpp b/lld/ELF/LTO.cpp index a42d216e4e77..46dc77a6789c 100644 --- a/lld/ELF/LTO.cpp +++ b/lld/ELF/LTO.cpp @@ -279,7 +279,7 @@ void BitcodeCompiler::add(BitcodeFile &f) { // distributed build system that depends on that behavior. static void thinLTOCreateEmptyIndexFiles() { for (LazyObjFile *f : lazyObjFiles) { - if (f->fetched || !isBitcode(f->mb)) + if (f->extracted || !isBitcode(f->mb)) continue; std::string path = replaceThinLTOSuffix(getThinLTOOutputFile(f->getName())); std::unique_ptr<raw_fd_ostream> os = openFile(path + ".thinlto.bc"); diff --git a/lld/ELF/LinkerScript.cpp b/lld/ELF/LinkerScript.cpp index f332b03d757d..cf4da7ab54c9 100644 --- a/lld/ELF/LinkerScript.cpp +++ b/lld/ELF/LinkerScript.cpp @@ -49,23 +49,76 @@ using namespace lld::elf; LinkerScript *elf::script; -static uint64_t getOutputSectionVA(SectionBase *sec) { - OutputSection *os = sec->getOutputSection(); - assert(os && "input section has no output section assigned"); - return os ? os->addr : 0; +static bool isSectionPrefix(StringRef prefix, StringRef name) { + return name.startswith(prefix) || name == prefix.drop_back(); +} + +static StringRef getOutputSectionName(const InputSectionBase *s) { + if (config->relocatable) + return s->name; + + // This is for --emit-relocs. If .text.foo is emitted as .text.bar, we want + // to emit .rela.text.foo as .rela.text.bar for consistency (this is not + // technically required, but not doing it is odd). This code guarantees that. + if (auto *isec = dyn_cast<InputSection>(s)) { + if (InputSectionBase *rel = isec->getRelocatedSection()) { + OutputSection *out = rel->getOutputSection(); + if (s->type == SHT_RELA) + return saver.save(".rela" + out->name); + return saver.save(".rel" + out->name); + } + } + + // A BssSection created for a common symbol is identified as "COMMON" in + // linker scripts. It should go to .bss section. + if (s->name == "COMMON") + return ".bss"; + + if (script->hasSectionsCommand) + return s->name; + + // When no SECTIONS is specified, emulate GNU ld's internal linker scripts + // by grouping sections with certain prefixes. + + // GNU ld places text sections with prefix ".text.hot.", ".text.unknown.", + // ".text.unlikely.", ".text.startup." or ".text.exit." before others. + // We provide an option -z keep-text-section-prefix to group such sections + // into separate output sections. This is more flexible. See also + // sortISDBySectionOrder(). + // ".text.unknown" means the hotness of the section is unknown. When + // SampleFDO is used, if a function doesn't have sample, it could be very + // cold or it could be a new function never being sampled. Those functions + // will be kept in the ".text.unknown" section. + // ".text.split." holds symbols which are split out from functions in other + // input sections. For example, with -fsplit-machine-functions, placing the + // cold parts in .text.split instead of .text.unlikely mitigates against poor + // profile inaccuracy. Techniques such as hugepage remapping can make + // conservative decisions at the section granularity. + if (config->zKeepTextSectionPrefix) + for (StringRef v : {".text.hot.", ".text.unknown.", ".text.unlikely.", + ".text.startup.", ".text.exit.", ".text.split."}) + if (isSectionPrefix(v, s->name)) + return v.drop_back(); + + for (StringRef v : + {".text.", ".rodata.", ".data.rel.ro.", ".data.", ".bss.rel.ro.", + ".bss.", ".init_array.", ".fini_array.", ".ctors.", ".dtors.", ".tbss.", + ".gcc_except_table.", ".tdata.", ".ARM.exidx.", ".ARM.extab."}) + if (isSectionPrefix(v, s->name)) + return v.drop_back(); + + return s->name; } uint64_t ExprValue::getValue() const { if (sec) - return alignTo(sec->getOffset(val) + getOutputSectionVA(sec), + return alignTo(sec->getOutputSection()->addr + sec->getOffset(val), alignment); return alignTo(val, alignment); } uint64_t ExprValue::getSecAddr() const { - if (sec) - return sec->getOffset(0) + getOutputSectionVA(sec); - return 0; + return sec ? sec->getOutputSection()->addr + sec->getOffset(0) : 0; } uint64_t ExprValue::getSectionOffset() const { @@ -102,23 +155,22 @@ OutputSection *LinkerScript::getOrCreateOutputSection(StringRef name) { // Expands the memory region by the specified size. static void expandMemoryRegion(MemoryRegion *memRegion, uint64_t size, - StringRef regionName, StringRef secName) { + StringRef secName) { memRegion->curPos += size; uint64_t newSize = memRegion->curPos - (memRegion->origin)().getValue(); uint64_t length = (memRegion->length)().getValue(); if (newSize > length) - error("section '" + secName + "' will not fit in region '" + regionName + - "': overflowed by " + Twine(newSize - length) + " bytes"); + error("section '" + secName + "' will not fit in region '" + + memRegion->name + "': overflowed by " + Twine(newSize - length) + + " bytes"); } void LinkerScript::expandMemoryRegions(uint64_t size) { if (ctx->memRegion) - expandMemoryRegion(ctx->memRegion, size, ctx->memRegion->name, - ctx->outSec->name); + expandMemoryRegion(ctx->memRegion, size, ctx->outSec->name); // Only expand the LMARegion if it is different from memRegion. if (ctx->lmaRegion && ctx->memRegion != ctx->lmaRegion) - expandMemoryRegion(ctx->lmaRegion, size, ctx->lmaRegion->name, - ctx->outSec->name); + expandMemoryRegion(ctx->lmaRegion, size, ctx->outSec->name); } void LinkerScript::expandOutputSection(uint64_t size) { @@ -215,21 +267,21 @@ using SymbolAssignmentMap = // Collect section/value pairs of linker-script-defined symbols. This is used to // check whether symbol values converge. -static SymbolAssignmentMap -getSymbolAssignmentValues(const std::vector<BaseCommand *> §ionCommands) { +static SymbolAssignmentMap getSymbolAssignmentValues( + const std::vector<SectionCommand *> §ionCommands) { SymbolAssignmentMap ret; - for (BaseCommand *base : sectionCommands) { - if (auto *cmd = dyn_cast<SymbolAssignment>(base)) { - if (cmd->sym) // sym is nullptr for dot. - ret.try_emplace(cmd->sym, - std::make_pair(cmd->sym->section, cmd->sym->value)); + for (SectionCommand *cmd : sectionCommands) { + if (auto *assign = dyn_cast<SymbolAssignment>(cmd)) { + if (assign->sym) // sym is nullptr for dot. + ret.try_emplace(assign->sym, std::make_pair(assign->sym->section, + assign->sym->value)); continue; } - for (BaseCommand *sub_base : cast<OutputSection>(base)->sectionCommands) - if (auto *cmd = dyn_cast<SymbolAssignment>(sub_base)) - if (cmd->sym) - ret.try_emplace(cmd->sym, - std::make_pair(cmd->sym->section, cmd->sym->value)); + for (SectionCommand *subCmd : cast<OutputSection>(cmd)->commands) + if (auto *assign = dyn_cast<SymbolAssignment>(subCmd)) + if (assign->sym) + ret.try_emplace(assign->sym, std::make_pair(assign->sym->section, + assign->sym->value)); } return ret; } @@ -256,9 +308,9 @@ void LinkerScript::processInsertCommands() { for (StringRef name : cmd.names) { // If base is empty, it may have been discarded by // adjustSectionsBeforeSorting(). We do not handle such output sections. - auto from = llvm::find_if(sectionCommands, [&](BaseCommand *base) { - return isa<OutputSection>(base) && - cast<OutputSection>(base)->name == name; + auto from = llvm::find_if(sectionCommands, [&](SectionCommand *subCmd) { + return isa<OutputSection>(subCmd) && + cast<OutputSection>(subCmd)->name == name; }); if (from == sectionCommands.end()) continue; @@ -266,10 +318,11 @@ void LinkerScript::processInsertCommands() { sectionCommands.erase(from); } - auto insertPos = llvm::find_if(sectionCommands, [&cmd](BaseCommand *base) { - auto *to = dyn_cast<OutputSection>(base); - return to != nullptr && to->name == cmd.where; - }); + auto insertPos = + llvm::find_if(sectionCommands, [&cmd](SectionCommand *subCmd) { + auto *to = dyn_cast<OutputSection>(subCmd); + return to != nullptr && to->name == cmd.where; + }); if (insertPos == sectionCommands.end()) { error("unable to insert " + cmd.names[0] + (cmd.isAfter ? " after " : " before ") + cmd.where); @@ -287,9 +340,9 @@ void LinkerScript::processInsertCommands() { // over symbol assignment commands and create placeholder symbols if needed. void LinkerScript::declareSymbols() { assert(!ctx); - for (BaseCommand *base : sectionCommands) { - if (auto *cmd = dyn_cast<SymbolAssignment>(base)) { - declareSymbol(cmd); + for (SectionCommand *cmd : sectionCommands) { + if (auto *assign = dyn_cast<SymbolAssignment>(cmd)) { + declareSymbol(assign); continue; } @@ -297,12 +350,12 @@ void LinkerScript::declareSymbols() { // we can't say for sure if it is going to be included or not. // Skip such sections for now. Improve the checks if we ever // need symbols from that sections to be declared early. - auto *sec = cast<OutputSection>(base); + auto *sec = cast<OutputSection>(cmd); if (sec->constraint != ConstraintKind::NoConstraint) continue; - for (BaseCommand *base2 : sec->sectionCommands) - if (auto *cmd = dyn_cast<SymbolAssignment>(base2)) - declareSymbol(cmd); + for (SectionCommand *cmd : sec->commands) + if (auto *assign = dyn_cast<SymbolAssignment>(cmd)) + declareSymbol(assign); } } @@ -528,10 +581,10 @@ void LinkerScript::discardSynthetic(OutputSection &outCmd) { continue; std::vector<InputSectionBase *> secs(part.armExidx->exidxSections.begin(), part.armExidx->exidxSections.end()); - for (BaseCommand *base : outCmd.sectionCommands) - if (auto *cmd = dyn_cast<InputSectionDescription>(base)) { + for (SectionCommand *cmd : outCmd.commands) + if (auto *isd = dyn_cast<InputSectionDescription>(cmd)) { std::vector<InputSectionBase *> matches = - computeInputSections(cmd, secs); + computeInputSections(isd, secs); for (InputSectionBase *s : matches) discard(s); } @@ -542,12 +595,12 @@ std::vector<InputSectionBase *> LinkerScript::createInputSectionList(OutputSection &outCmd) { std::vector<InputSectionBase *> ret; - for (BaseCommand *base : outCmd.sectionCommands) { - if (auto *cmd = dyn_cast<InputSectionDescription>(base)) { - cmd->sectionBases = computeInputSections(cmd, inputSections); - for (InputSectionBase *s : cmd->sectionBases) + for (SectionCommand *cmd : outCmd.commands) { + if (auto *isd = dyn_cast<InputSectionDescription>(cmd)) { + isd->sectionBases = computeInputSections(isd, inputSections); + for (InputSectionBase *s : isd->sectionBases) s->parent = &outCmd; - ret.insert(ret.end(), cmd->sectionBases.begin(), cmd->sectionBases.end()); + ret.insert(ret.end(), isd->sectionBases.begin(), isd->sectionBases.end()); } } return ret; @@ -564,7 +617,7 @@ void LinkerScript::processSectionCommands() { for (InputSectionBase *s : v) discard(s); discardSynthetic(*osec); - osec->sectionCommands.clear(); + osec->commands.clear(); return false; } @@ -578,7 +631,7 @@ void LinkerScript::processSectionCommands() { if (!matchConstraints(v, osec->constraint)) { for (InputSectionBase *s : v) s->parent = nullptr; - osec->sectionCommands.clear(); + osec->commands.clear(); return false; } @@ -605,7 +658,7 @@ void LinkerScript::processSectionCommands() { for (OutputSection *osec : overwriteSections) if (process(osec) && !map.try_emplace(osec->name, osec).second) warn("OVERWRITE_SECTIONS specifies duplicate " + osec->name); - for (BaseCommand *&base : sectionCommands) + for (SectionCommand *&base : sectionCommands) if (auto *osec = dyn_cast<OutputSection>(base)) { if (OutputSection *overwrite = map.lookup(osec->name)) { log(overwrite->location + " overwrites " + osec->name); @@ -639,22 +692,22 @@ void LinkerScript::processSymbolAssignments() { ctx = &state; ctx->outSec = aether; - for (BaseCommand *base : sectionCommands) { - if (auto *cmd = dyn_cast<SymbolAssignment>(base)) - addSymbol(cmd); + for (SectionCommand *cmd : sectionCommands) { + if (auto *assign = dyn_cast<SymbolAssignment>(cmd)) + addSymbol(assign); else - for (BaseCommand *sub_base : cast<OutputSection>(base)->sectionCommands) - if (auto *cmd = dyn_cast<SymbolAssignment>(sub_base)) - addSymbol(cmd); + for (SectionCommand *subCmd : cast<OutputSection>(cmd)->commands) + if (auto *assign = dyn_cast<SymbolAssignment>(subCmd)) + addSymbol(assign); } ctx = nullptr; } -static OutputSection *findByName(ArrayRef<BaseCommand *> vec, +static OutputSection *findByName(ArrayRef<SectionCommand *> vec, StringRef name) { - for (BaseCommand *base : vec) - if (auto *sec = dyn_cast<OutputSection>(base)) + for (SectionCommand *cmd : vec) + if (auto *sec = dyn_cast<OutputSection>(cmd)) if (sec->name == name) return sec; return nullptr; @@ -753,8 +806,7 @@ addInputSec(StringMap<TinyPtrVector<OutputSection *>> &map, // end up being linked to the same output section. The casts are fine // because everything in the map was created by the orphan placement code. auto *firstIsec = cast<InputSectionBase>( - cast<InputSectionDescription>(sec->sectionCommands[0]) - ->sectionBases[0]); + cast<InputSectionDescription>(sec->commands[0])->sectionBases[0]); OutputSection *firstIsecOut = firstIsec->flags & SHF_LINK_ORDER ? firstIsec->getLinkOrderDep()->getOutputSection() @@ -848,38 +900,6 @@ void LinkerScript::diagnoseOrphanHandling() const { } } -uint64_t LinkerScript::advance(uint64_t size, unsigned alignment) { - dot = alignTo(dot, alignment) + size; - return dot; -} - -void LinkerScript::output(InputSection *s) { - assert(ctx->outSec == s->getParent()); - uint64_t before = advance(0, 1); - uint64_t pos = advance(s->getSize(), s->alignment); - s->outSecOff = pos - s->getSize() - ctx->outSec->addr; - - // Update output section size after adding each section. This is so that - // SIZEOF works correctly in the case below: - // .foo { *(.aaa) a = SIZEOF(.foo); *(.bbb) } - expandOutputSection(pos - before); -} - -void LinkerScript::switchTo(OutputSection *sec) { - ctx->outSec = sec; - - uint64_t pos = advance(0, 1); - if (sec->addrExpr && script->hasSectionsCommand) { - // The alignment is ignored. - ctx->outSec->addr = pos; - } else { - // ctx->outSec->alignment is the max of ALIGN and the maximum of input - // section alignments. - ctx->outSec->addr = advance(0, ctx->outSec->alignment); - expandMemoryRegions(ctx->outSec->addr - pos); - } -} - // This function searches for a memory region to place the given output // section in. If found, a pointer to the appropriate memory region is // returned in the first member of the pair. Otherwise, a nullptr is returned. @@ -917,7 +937,7 @@ LinkerScript::findMemoryRegion(OutputSection *sec, MemoryRegion *hint) { // See if a region can be found by matching section flags. for (auto &pair : memoryRegions) { MemoryRegion *m = pair.second; - if ((m->flags & sec->flags) && (m->negFlags & sec->flags) == 0) + if (m->compatibleWith(sec->flags)) return {m, nullptr}; } @@ -965,10 +985,21 @@ void LinkerScript::assignOffsets(OutputSection *sec) { // between the previous section, if any, and the start of this section. if (ctx->memRegion && ctx->memRegion->curPos < dot) expandMemoryRegion(ctx->memRegion, dot - ctx->memRegion->curPos, - ctx->memRegion->name, sec->name); + sec->name); } - switchTo(sec); + ctx->outSec = sec; + if (sec->addrExpr && script->hasSectionsCommand) { + // The alignment is ignored. + sec->addr = dot; + } else { + // sec->alignment is the max of ALIGN and the maximum of input + // section alignments. + const uint64_t pos = dot; + dot = alignTo(dot, sec->alignment); + sec->addr = dot; + expandMemoryRegions(dot - pos); + } // ctx->lmaOffset is LMA minus VMA. If LMA is explicitly specified via AT() or // AT>, recompute ctx->lmaOffset; otherwise, if both previous/current LMA @@ -981,14 +1012,14 @@ void LinkerScript::assignOffsets(OutputSection *sec) { } else if (MemoryRegion *mr = sec->lmaRegion) { uint64_t lmaStart = alignTo(mr->curPos, sec->alignment); if (mr->curPos < lmaStart) - expandMemoryRegion(mr, lmaStart - mr->curPos, mr->name, sec->name); + expandMemoryRegion(mr, lmaStart - mr->curPos, sec->name); ctx->lmaOffset = lmaStart - dot; } else if (!sameMemRegion || !prevLMARegionIsDefault) { ctx->lmaOffset = 0; } // Propagate ctx->lmaOffset to the first "non-header" section. - if (PhdrEntry *l = ctx->outSec->ptLoad) + if (PhdrEntry *l = sec->ptLoad) if (sec == findFirstSection(l)) l->lmaOffset = ctx->lmaOffset; @@ -999,28 +1030,38 @@ void LinkerScript::assignOffsets(OutputSection *sec) { // We visited SectionsCommands from processSectionCommands to // layout sections. Now, we visit SectionsCommands again to fix // section offsets. - for (BaseCommand *base : sec->sectionCommands) { + for (SectionCommand *cmd : sec->commands) { // This handles the assignments to symbol or to the dot. - if (auto *cmd = dyn_cast<SymbolAssignment>(base)) { - cmd->addr = dot; - assignSymbol(cmd, true); - cmd->size = dot - cmd->addr; + if (auto *assign = dyn_cast<SymbolAssignment>(cmd)) { + assign->addr = dot; + assignSymbol(assign, true); + assign->size = dot - assign->addr; continue; } // Handle BYTE(), SHORT(), LONG(), or QUAD(). - if (auto *cmd = dyn_cast<ByteCommand>(base)) { - cmd->offset = dot - ctx->outSec->addr; - dot += cmd->size; - expandOutputSection(cmd->size); + if (auto *data = dyn_cast<ByteCommand>(cmd)) { + data->offset = dot - sec->addr; + dot += data->size; + expandOutputSection(data->size); continue; } // Handle a single input section description command. // It calculates and assigns the offsets for each section and also // updates the output section size. - for (InputSection *sec : cast<InputSectionDescription>(base)->sections) - output(sec); + for (InputSection *isec : cast<InputSectionDescription>(cmd)->sections) { + assert(isec->getParent() == sec); + const uint64_t pos = dot; + dot = alignTo(dot, isec->alignment); + isec->outSecOff = dot - sec->addr; + dot += isec->getSize(); + + // Update output section size after adding each section. This is so that + // SIZEOF works correctly in the case below: + // .foo { *(.aaa) a = SIZEOF(.foo); *(.bbb) } + expandOutputSection(dot - pos); + } } // Non-SHF_ALLOC sections do not affect the addresses of other OutputSections @@ -1050,14 +1091,14 @@ static bool isDiscardable(const OutputSection &sec) { if (sec.usedInExpression) return false; - for (BaseCommand *base : sec.sectionCommands) { - if (auto cmd = dyn_cast<SymbolAssignment>(base)) + for (SectionCommand *cmd : sec.commands) { + if (auto assign = dyn_cast<SymbolAssignment>(cmd)) // Don't create empty output sections just for unreferenced PROVIDE // symbols. - if (cmd->name != "." && !cmd->sym) + if (assign->name != "." && !assign->sym) continue; - if (!isa<InputSectionDescription>(*base)) + if (!isa<InputSectionDescription>(*cmd)) return false; } return true; @@ -1104,7 +1145,7 @@ void LinkerScript::adjustSectionsBeforeSorting() { uint64_t flags = SHF_ALLOC; std::vector<StringRef> defPhdrs; - for (BaseCommand *&cmd : sectionCommands) { + for (SectionCommand *&cmd : sectionCommands) { auto *sec = dyn_cast<OutputSection>(cmd); if (!sec) continue; @@ -1150,14 +1191,14 @@ void LinkerScript::adjustSectionsBeforeSorting() { // clutter the output. // We instead remove trivially empty sections. The bfd linker seems even // more aggressive at removing them. - llvm::erase_if(sectionCommands, [&](BaseCommand *base) { return !base; }); + llvm::erase_if(sectionCommands, [&](SectionCommand *cmd) { return !cmd; }); } void LinkerScript::adjustSectionsAfterSorting() { // Try and find an appropriate memory region to assign offsets in. MemoryRegion *hint = nullptr; - for (BaseCommand *base : sectionCommands) { - if (auto *sec = dyn_cast<OutputSection>(base)) { + for (SectionCommand *cmd : sectionCommands) { + if (auto *sec = dyn_cast<OutputSection>(cmd)) { if (!sec->lmaRegionName.empty()) { if (MemoryRegion *m = memoryRegions.lookup(sec->lmaRegionName)) sec->lmaRegion = m; @@ -1183,8 +1224,8 @@ void LinkerScript::adjustSectionsAfterSorting() { // Walk the commands and propagate the program headers to commands that don't // explicitly specify them. - for (BaseCommand *base : sectionCommands) - if (auto *sec = dyn_cast<OutputSection>(base)) + for (SectionCommand *cmd : sectionCommands) + if (auto *sec = dyn_cast<OutputSection>(cmd)) maybePropagatePhdrs(*sec, defPhdrs); } @@ -1267,20 +1308,20 @@ const Defined *LinkerScript::assignAddresses() { dot += getHeaderSize(); } - auto deleter = std::make_unique<AddressState>(); - ctx = deleter.get(); + AddressState state; + ctx = &state; errorOnMissingSection = true; - switchTo(aether); + ctx->outSec = aether; SymbolAssignmentMap oldValues = getSymbolAssignmentValues(sectionCommands); - for (BaseCommand *base : sectionCommands) { - if (auto *cmd = dyn_cast<SymbolAssignment>(base)) { - cmd->addr = dot; - assignSymbol(cmd, false); - cmd->size = dot - cmd->addr; + for (SectionCommand *cmd : sectionCommands) { + if (auto *assign = dyn_cast<SymbolAssignment>(cmd)) { + assign->addr = dot; + assignSymbol(assign, false); + assign->size = dot - assign->addr; continue; } - assignOffsets(cast<OutputSection>(base)); + assignOffsets(cast<OutputSection>(cmd)); } ctx = nullptr; diff --git a/lld/ELF/LinkerScript.h b/lld/ELF/LinkerScript.h index b366da4f274e..badc4d126be8 100644 --- a/lld/ELF/LinkerScript.h +++ b/lld/ELF/LinkerScript.h @@ -41,7 +41,7 @@ class ThunkSection; struct ExprValue { ExprValue(SectionBase *sec, bool forceAbsolute, uint64_t val, const Twine &loc) - : sec(sec), forceAbsolute(forceAbsolute), val(val), loc(loc.str()) {} + : sec(sec), val(val), forceAbsolute(forceAbsolute), loc(loc.str()) {} ExprValue(uint64_t val) : ExprValue(nullptr, false, val, "") {} @@ -53,10 +53,6 @@ struct ExprValue { // If a value is relative to a section, it has a non-null Sec. SectionBase *sec; - // True if this expression is enclosed in ABSOLUTE(). - // This flag affects the return value of getValue(). - bool forceAbsolute; - uint64_t val; uint64_t alignment = 1; @@ -64,6 +60,10 @@ struct ExprValue { // resets type to STT_NOTYPE. uint8_t type = llvm::ELF::STT_NOTYPE; + // True if this expression is enclosed in ABSOLUTE(). + // This flag affects the return value of getValue(). + bool forceAbsolute; + // Original source location. Used for error messages. std::string loc; }; @@ -82,17 +82,18 @@ enum SectionsCommandKind { ByteKind // BYTE(expr), SHORT(expr), LONG(expr) or QUAD(expr) }; -struct BaseCommand { - BaseCommand(int k) : kind(k) {} +struct SectionCommand { + SectionCommand(int k) : kind(k) {} int kind; }; // This represents ". = <expr>" or "<symbol> = <expr>". -struct SymbolAssignment : BaseCommand { +struct SymbolAssignment : SectionCommand { SymbolAssignment(StringRef name, Expr e, std::string loc) - : BaseCommand(AssignmentKind), name(name), expression(e), location(loc) {} + : SectionCommand(AssignmentKind), name(name), expression(e), + location(loc) {} - static bool classof(const BaseCommand *c) { + static bool classof(const SectionCommand *c) { return c->kind == AssignmentKind; } @@ -132,16 +133,32 @@ enum class ConstraintKind { NoConstraint, ReadOnly, ReadWrite }; // MEMORY command. struct MemoryRegion { MemoryRegion(StringRef name, Expr origin, Expr length, uint32_t flags, - uint32_t negFlags) + uint32_t invFlags, uint32_t negFlags, uint32_t negInvFlags) : name(std::string(name)), origin(origin), length(length), flags(flags), - negFlags(negFlags) {} + invFlags(invFlags), negFlags(negFlags), negInvFlags(negInvFlags) {} std::string name; Expr origin; Expr length; + // A section can be assigned to the region if any of these ELF section flags + // are set... uint32_t flags; + // ... or any of these flags are not set. + // For example, the memory region attribute "r" maps to SHF_WRITE. + uint32_t invFlags; + // A section cannot be assigned to the region if any of these ELF section + // flags are set... uint32_t negFlags; + // ... or any of these flags are not set. + // For example, the memory region attribute "!r" maps to SHF_WRITE. + uint32_t negInvFlags; uint64_t curPos = 0; + + bool compatibleWith(uint32_t secFlags) const { + if ((secFlags & negFlags) || (~secFlags & negInvFlags)) + return false; + return (secFlags & flags) || (~secFlags & invFlags); + } }; // This struct represents one section match pattern in SECTIONS() command. @@ -166,7 +183,7 @@ public: SortSectionPolicy sortInner; }; -class InputSectionDescription : public BaseCommand { +class InputSectionDescription : public SectionCommand { SingleStringMatcher filePat; // Cache of the most recent input argument and result of matchesFile(). @@ -175,10 +192,10 @@ class InputSectionDescription : public BaseCommand { public: InputSectionDescription(StringRef filePattern, uint64_t withFlags = 0, uint64_t withoutFlags = 0) - : BaseCommand(InputSectionKind), filePat(filePattern), + : SectionCommand(InputSectionKind), filePat(filePattern), withFlags(withFlags), withoutFlags(withoutFlags) {} - static bool classof(const BaseCommand *c) { + static bool classof(const SectionCommand *c) { return c->kind == InputSectionKind; } @@ -207,12 +224,12 @@ public: }; // Represents BYTE(), SHORT(), LONG(), or QUAD(). -struct ByteCommand : BaseCommand { +struct ByteCommand : SectionCommand { ByteCommand(Expr e, unsigned size, std::string commandString) - : BaseCommand(ByteKind), commandString(commandString), expression(e), + : SectionCommand(ByteKind), commandString(commandString), expression(e), size(size) {} - static bool classof(const BaseCommand *c) { return c->kind == ByteKind; } + static bool classof(const SectionCommand *c) { return c->kind == ByteKind; } // Keeps string representing the command. Used for -Map" is perhaps better. std::string commandString; @@ -275,10 +292,6 @@ class LinkerScript final { std::pair<MemoryRegion *, MemoryRegion *> findMemoryRegion(OutputSection *sec, MemoryRegion *hint); - void switchTo(OutputSection *sec); - uint64_t advance(uint64_t size, unsigned align); - void output(InputSection *sec); - void assignOffsets(OutputSection *sec); // Ctx captures the local AddressState and makes it accessible @@ -324,7 +337,7 @@ public: void processInsertCommands(); // SECTIONS command list. - std::vector<BaseCommand *> sectionCommands; + std::vector<SectionCommand *> sectionCommands; // PHDRS command list. std::vector<PhdrsCommand> phdrsCommands; diff --git a/lld/ELF/MapFile.cpp b/lld/ELF/MapFile.cpp index c4690ae5aefd..06735802f7f1 100644 --- a/lld/ELF/MapFile.cpp +++ b/lld/ELF/MapFile.cpp @@ -139,20 +139,7 @@ static void printEhFrame(raw_ostream &os, const EhFrameSection *sec) { } } -void elf::writeMapFile() { - if (config->mapFile.empty()) - return; - - llvm::TimeTraceScope timeScope("Write map file"); - - // Open a map file for writing. - std::error_code ec; - raw_fd_ostream os(config->mapFile, ec, sys::fs::OF_None); - if (ec) { - error("cannot open " + config->mapFile + ": " + ec.message()); - return; - } - +static void writeMapFile(raw_fd_ostream &os) { // Collect symbol info that we want to print out. std::vector<Defined *> syms = getSymbols(); SymbolMapTy sectionSyms = getSectionSyms(syms); @@ -164,30 +151,30 @@ void elf::writeMapFile() { << " Size Align Out In Symbol\n"; OutputSection* osec = nullptr; - for (BaseCommand *base : script->sectionCommands) { - if (auto *cmd = dyn_cast<SymbolAssignment>(base)) { - if (cmd->provide && !cmd->sym) + for (SectionCommand *cmd : script->sectionCommands) { + if (auto *assign = dyn_cast<SymbolAssignment>(cmd)) { + if (assign->provide && !assign->sym) continue; - uint64_t lma = osec ? osec->getLMA() + cmd->addr - osec->getVA(0) : 0; - writeHeader(os, cmd->addr, lma, cmd->size, 1); - os << cmd->commandString << '\n'; + uint64_t lma = osec ? osec->getLMA() + assign->addr - osec->getVA(0) : 0; + writeHeader(os, assign->addr, lma, assign->size, 1); + os << assign->commandString << '\n'; continue; } - osec = cast<OutputSection>(base); + osec = cast<OutputSection>(cmd); writeHeader(os, osec->addr, osec->getLMA(), osec->size, osec->alignment); os << osec->name << '\n'; // Dump symbols for each input section. - for (BaseCommand *base : osec->sectionCommands) { - if (auto *isd = dyn_cast<InputSectionDescription>(base)) { + for (SectionCommand *subCmd : osec->commands) { + if (auto *isd = dyn_cast<InputSectionDescription>(subCmd)) { for (InputSection *isec : isd->sections) { if (auto *ehSec = dyn_cast<EhFrameSection>(isec)) { printEhFrame(os, ehSec); continue; } - writeHeader(os, isec->getVA(0), osec->getLMA() + isec->getOffset(0), + writeHeader(os, isec->getVA(), osec->getLMA() + isec->outSecOff, isec->getSize(), isec->alignment); os << indent8 << toString(isec) << '\n'; for (Symbol *sym : sectionSyms[isec]) @@ -196,19 +183,20 @@ void elf::writeMapFile() { continue; } - if (auto *cmd = dyn_cast<ByteCommand>(base)) { - writeHeader(os, osec->addr + cmd->offset, osec->getLMA() + cmd->offset, - cmd->size, 1); - os << indent8 << cmd->commandString << '\n'; + if (auto *data = dyn_cast<ByteCommand>(subCmd)) { + writeHeader(os, osec->addr + data->offset, + osec->getLMA() + data->offset, data->size, 1); + os << indent8 << data->commandString << '\n'; continue; } - if (auto *cmd = dyn_cast<SymbolAssignment>(base)) { - if (cmd->provide && !cmd->sym) + if (auto *assign = dyn_cast<SymbolAssignment>(subCmd)) { + if (assign->provide && !assign->sym) continue; - writeHeader(os, cmd->addr, osec->getLMA() + cmd->addr - osec->getVA(0), - cmd->size, 1); - os << indent8 << cmd->commandString << '\n'; + writeHeader(os, assign->addr, + osec->getLMA() + assign->addr - osec->getVA(0), + assign->size, 1); + os << indent8 << assign->commandString << '\n'; continue; } } @@ -234,10 +222,6 @@ void elf::writeWhyExtract() { } } -static void print(StringRef a, StringRef b) { - lld::outs() << left_justify(a, 49) << " " << b << "\n"; -} - // Output a cross reference table to stdout. This is for --cref. // // For each global symbol, we print out a file that defines the symbol @@ -249,10 +233,7 @@ static void print(StringRef a, StringRef b) { // // In this case, strlen is defined by libc.so.6 and used by other two // files. -void elf::writeCrossReferenceTable() { - if (!config->cref) - return; - +static void writeCref(raw_fd_ostream &os) { // Collect symbols and files. MapVector<Symbol *, SetVector<InputFile *>> map; for (InputFile *file : objectFiles) { @@ -265,8 +246,12 @@ void elf::writeCrossReferenceTable() { } } - // Print out a header. - lld::outs() << "Cross Reference Table\n\n"; + auto print = [&](StringRef a, StringRef b) { + os << left_justify(a, 49) << ' ' << b << '\n'; + }; + + // Print a blank line and a header. The format matches GNU ld. + os << "\nCross Reference Table\n\n"; print("Symbol", "File"); // Print out a table. @@ -281,6 +266,27 @@ void elf::writeCrossReferenceTable() { } } +void elf::writeMapAndCref() { + if (config->mapFile.empty() && !config->cref) + return; + + llvm::TimeTraceScope timeScope("Write map file"); + + // Open a map file for writing. + std::error_code ec; + StringRef mapFile = config->mapFile.empty() ? "-" : config->mapFile; + raw_fd_ostream os(mapFile, ec, sys::fs::OF_None); + if (ec) { + error("cannot open " + mapFile + ": " + ec.message()); + return; + } + + if (!config->mapFile.empty()) + writeMapFile(os); + if (config->cref) + writeCref(os); +} + void elf::writeArchiveStats() { if (config->printArchiveStats.empty()) return; @@ -293,8 +299,8 @@ void elf::writeArchiveStats() { return; } - os << "members\tfetched\tarchive\n"; + os << "members\textracted\tarchive\n"; for (const ArchiveFile *f : archiveFiles) - os << f->getMemberCount() << '\t' << f->getFetchedMemberCount() << '\t' + os << f->getMemberCount() << '\t' << f->getExtractedMemberCount() << '\t' << f->getName() << '\n'; } diff --git a/lld/ELF/MapFile.h b/lld/ELF/MapFile.h index 1b8c0168c0de..df548988c03b 100644 --- a/lld/ELF/MapFile.h +++ b/lld/ELF/MapFile.h @@ -11,9 +11,8 @@ namespace lld { namespace elf { -void writeMapFile(); +void writeMapAndCref(); void writeWhyExtract(); -void writeCrossReferenceTable(); void writeArchiveStats(); } // namespace elf } // namespace lld diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td index ce82eb8d2754..f9f9f54a80d8 100644 --- a/lld/ELF/Options.td +++ b/lld/ELF/Options.td @@ -129,7 +129,8 @@ def color_diagnostics_eq: J<"color-diagnostics=">, HelpText<"Use colors in diagnostics (default: auto)">, MetaVarName<"[auto,always,never]">; -def cref: FF<"cref">, HelpText<"Output cross reference table">; +def cref: FF<"cref">, + HelpText<"Output cross reference table. If -Map is specified, print to the map file">; defm define_common: B<"define-common", "Assign space to common symbols", @@ -304,8 +305,8 @@ def no_undefined: F<"no-undefined">, def o: JoinedOrSeparate<["-"], "o">, MetaVarName<"<path>">, HelpText<"Path to file to write output">; -def oformat: Separate<["--"], "oformat">, MetaVarName<"<format>">, - HelpText<"Specify the binary format for the output object file">; +defm oformat: EEq<"oformat", "Specify the binary format for the output object file">, + MetaVarName<"[elf,binary]">; def omagic: FF<"omagic">, MetaVarName<"<magic>">, HelpText<"Set the text and data sections to be readable and writable, do not page align sections, link against static libraries">; @@ -338,7 +339,7 @@ defm print_icf_sections: B<"print-icf-sections", def print_archive_stats: J<"print-archive-stats=">, HelpText<"Write archive usage statistics to the specified file. " - "Print the numbers of members and fetched members for each archive">; + "Print the numbers of members and extracted members for each archive">; defm print_symbol_order: Eq<"print-symbol-order", "Print a symbol order specified by --call-graph-ordering-file into the specified file">; @@ -454,22 +455,19 @@ def verbose: F<"verbose">, HelpText<"Verbose mode">; def version: F<"version">, HelpText<"Display the version number and exit">; -def power10_stubs: F<"power10-stubs">, HelpText<"Alias for --power10-stubs=auto">; - -def no_power10_stubs: F<"no-power10-stubs">, HelpText<"Alias for --power10-stubs=no">; - -def power10_stubs_eq: - J<"power10-stubs=">, HelpText< - "Enables Power10 instructions in all stubs without options, " - "options override previous flags." - "auto: Allow Power10 instructions in stubs if applicable." - "no: No Power10 instructions in stubs.">; +def power10_stubs_eq: JJ<"power10-stubs=">, MetaVarName<"<mode>">, + HelpText<"Whether to use Power10 instructions in call stubs for R_PPC64_REL24_NOTOC and TOC/NOTOC " + "interworking (yes (default): use; no: don't use). \"auto\" is currently the same as \"yes\"">; +def power10_stubs: FF<"power10-stubs">, Alias<power10_stubs_eq>, AliasArgs<["yes"]>, + HelpText<"Alias for --power10-stubs=auto">; +def no_power10_stubs: FF<"no-power10-stubs">, Alias<power10_stubs_eq>, AliasArgs<["no"]>, + HelpText<"Alias for --power10-stubs=no">; defm version_script: Eq<"version-script", "Read a version script">; defm warn_backrefs: BB<"warn-backrefs", - "Warn about backward symbol references to fetch archive members", - "Do not warn about backward symbol references to fetch archive members (default)">; + "Warn about backward symbol references to extract archive members", + "Do not warn about backward symbol references to extract archive members (default)">; defm warn_backrefs_exclude : EEq<"warn-backrefs-exclude", diff --git a/lld/ELF/OutputSections.cpp b/lld/ELF/OutputSections.cpp index cc4f0688701a..a17f713b742a 100644 --- a/lld/ELF/OutputSections.cpp +++ b/lld/ELF/OutputSections.cpp @@ -33,7 +33,6 @@ using namespace lld; using namespace lld::elf; uint8_t *Out::bufferStart; -uint8_t Out::first; PhdrEntry *Out::tlsPhdr; OutputSection *Out::elfHeader; OutputSection *Out::programHeaders; @@ -69,7 +68,7 @@ void OutputSection::writeHeaderTo(typename ELFT::Shdr *shdr) { } OutputSection::OutputSection(StringRef name, uint32_t type, uint64_t flags) - : BaseCommand(OutputSectionKind), + : SectionCommand(OutputSectionKind), SectionBase(Output, name, flags, /*Entsize*/ 0, /*Alignment*/ 1, type, /*Info*/ 0, /*Link*/ 0) {} @@ -100,10 +99,9 @@ static bool canMergeToProgbits(unsigned type) { void OutputSection::recordSection(InputSectionBase *isec) { partition = isec->partition; isec->parent = this; - if (sectionCommands.empty() || - !isa<InputSectionDescription>(sectionCommands.back())) - sectionCommands.push_back(make<InputSectionDescription>("")); - auto *isd = cast<InputSectionDescription>(sectionCommands.back()); + if (commands.empty() || !isa<InputSectionDescription>(commands.back())) + commands.push_back(make<InputSectionDescription>("")); + auto *isd = cast<InputSectionDescription>(commands.back()); isd->sectionBases.push_back(isec); } @@ -166,15 +164,15 @@ void OutputSection::commitSection(InputSection *isec) { // to compute an output offset for each piece of each input section. void OutputSection::finalizeInputSections() { std::vector<MergeSyntheticSection *> mergeSections; - for (BaseCommand *base : sectionCommands) { - auto *cmd = dyn_cast<InputSectionDescription>(base); - if (!cmd) + for (SectionCommand *cmd : commands) { + auto *isd = dyn_cast<InputSectionDescription>(cmd); + if (!isd) continue; - cmd->sections.reserve(cmd->sectionBases.size()); - for (InputSectionBase *s : cmd->sectionBases) { + isd->sections.reserve(isd->sectionBases.size()); + for (InputSectionBase *s : isd->sectionBases) { MergeInputSection *ms = dyn_cast<MergeInputSection>(s); if (!ms) { - cmd->sections.push_back(cast<InputSection>(s)); + isd->sections.push_back(cast<InputSection>(s)); continue; } @@ -203,17 +201,17 @@ void OutputSection::finalizeInputSections() { mergeSections.push_back(syn); i = std::prev(mergeSections.end()); syn->entsize = ms->entsize; - cmd->sections.push_back(syn); + isd->sections.push_back(syn); } (*i)->addSection(ms); } // sectionBases should not be used from this point onwards. Clear it to // catch misuses. - cmd->sectionBases.clear(); + isd->sectionBases.clear(); // Some input sections may be removed from the list after ICF. - for (InputSection *s : cmd->sections) + for (InputSection *s : isd->sections) commitSection(s); } for (auto *ms : mergeSections) @@ -237,13 +235,13 @@ uint64_t elf::getHeaderSize() { return Out::elfHeader->size + Out::programHeaders->size; } -bool OutputSection::classof(const BaseCommand *c) { +bool OutputSection::classof(const SectionCommand *c) { return c->kind == OutputSectionKind; } void OutputSection::sort(llvm::function_ref<int(InputSectionBase *s)> order) { assert(isLive()); - for (BaseCommand *b : sectionCommands) + for (SectionCommand *b : commands) if (auto *isd = dyn_cast<InputSectionDescription>(b)) sortByOrder(isd->sections, order); } @@ -367,8 +365,8 @@ template <class ELFT> void OutputSection::writeTo(uint8_t *buf) { // Linker scripts may have BYTE()-family commands with which you // can write arbitrary bytes to the output. Process them if any. - for (BaseCommand *base : sectionCommands) - if (auto *data = dyn_cast<ByteCommand>(base)) + for (SectionCommand *cmd : commands) + if (auto *data = dyn_cast<ByteCommand>(cmd)) writeInt(buf + data->offset, data->expression().getValue(), data->size); } @@ -485,8 +483,8 @@ static bool compCtors(const InputSection *a, const InputSection *b) { // Unfortunately, the rules are different from the one for .{init,fini}_array. // Read the comment above. void OutputSection::sortCtorsDtors() { - assert(sectionCommands.size() == 1); - auto *isd = cast<InputSectionDescription>(sectionCommands[0]); + assert(commands.size() == 1); + auto *isd = cast<InputSectionDescription>(commands[0]); llvm::stable_sort(isd->sections, compCtors); } @@ -505,8 +503,8 @@ int elf::getPriority(StringRef s) { } InputSection *elf::getFirstInputSection(const OutputSection *os) { - for (BaseCommand *base : os->sectionCommands) - if (auto *isd = dyn_cast<InputSectionDescription>(base)) + for (SectionCommand *cmd : os->commands) + if (auto *isd = dyn_cast<InputSectionDescription>(cmd)) if (!isd->sections.empty()) return isd->sections[0]; return nullptr; @@ -514,8 +512,8 @@ InputSection *elf::getFirstInputSection(const OutputSection *os) { std::vector<InputSection *> elf::getInputSections(const OutputSection *os) { std::vector<InputSection *> ret; - for (BaseCommand *base : os->sectionCommands) - if (auto *isd = dyn_cast<InputSectionDescription>(base)) + for (SectionCommand *cmd : os->commands) + if (auto *isd = dyn_cast<InputSectionDescription>(cmd)) ret.insert(ret.end(), isd->sections.begin(), isd->sections.end()); return ret; } diff --git a/lld/ELF/OutputSections.h b/lld/ELF/OutputSections.h index a0f806614387..a5b05cf28aa8 100644 --- a/lld/ELF/OutputSections.h +++ b/lld/ELF/OutputSections.h @@ -29,7 +29,7 @@ class InputSectionBase; // It is composed of multiple InputSections. // The writer creates multiple OutputSections and assign them unique, // non-overlapping file offsets and VAs. -class OutputSection final : public BaseCommand, public SectionBase { +class OutputSection final : public SectionCommand, public SectionBase { public: OutputSection(StringRef name, uint32_t type, uint64_t flags); @@ -37,7 +37,7 @@ public: return s->kind() == SectionBase::Output; } - static bool classof(const BaseCommand *c); + static bool classof(const SectionCommand *c); uint64_t getLMA() const { return ptLoad ? addr + ptLoad->lmaOffset : addr; } template <typename ELFT> void writeHeaderTo(typename ELFT::Shdr *sHdr); @@ -82,7 +82,7 @@ public: Expr alignExpr; Expr lmaExpr; Expr subalignExpr; - std::vector<BaseCommand *> sectionCommands; + std::vector<SectionCommand *> commands; std::vector<StringRef> phdrs; llvm::Optional<std::array<uint8_t, 4>> filler; ConstraintKind constraint = ConstraintKind::NoConstraint; @@ -128,7 +128,6 @@ std::vector<InputSection *> getInputSections(const OutputSection *os); // until Writer is initialized. struct Out { static uint8_t *bufferStart; - static uint8_t first; static PhdrEntry *tlsPhdr; static OutputSection *elfHeader; static OutputSection *programHeaders; diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index 9c22ce7d6013..5136ba2151a3 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -66,10 +66,10 @@ using namespace lld; using namespace lld::elf; static Optional<std::string> getLinkerScriptLocation(const Symbol &sym) { - for (BaseCommand *base : script->sectionCommands) - if (auto *cmd = dyn_cast<SymbolAssignment>(base)) - if (cmd->sym == &sym) - return cmd->location; + for (SectionCommand *cmd : script->sectionCommands) + if (auto *assign = dyn_cast<SymbolAssignment>(cmd)) + if (assign->sym == &sym) + return assign->location; return None; } @@ -366,10 +366,10 @@ template <class ELFT> static void addCopyRelSymbol(SharedSymbol &ss) { // At this point, sectionBases has been migrated to sections. Append sec to // sections. - if (osec->sectionCommands.empty() || - !isa<InputSectionDescription>(osec->sectionCommands.back())) - osec->sectionCommands.push_back(make<InputSectionDescription>("")); - auto *isd = cast<InputSectionDescription>(osec->sectionCommands.back()); + if (osec->commands.empty() || + !isa<InputSectionDescription>(osec->commands.back())) + osec->commands.push_back(make<InputSectionDescription>("")); + auto *isd = cast<InputSectionDescription>(osec->commands.back()); isd->sections.push_back(sec); osec->commitSection(sec); @@ -1358,32 +1358,6 @@ static void scanReloc(InputSectionBase &sec, OffsetGetter &getOffset, RelTy *&i, } } - // Relax relocations. - // - // If we know that a PLT entry will be resolved within the same ELF module, we - // can skip PLT access and directly jump to the destination function. For - // example, if we are linking a main executable, all dynamic symbols that can - // be resolved within the executable will actually be resolved that way at - // runtime, because the main executable is always at the beginning of a search - // list. We can leverage that fact. - if (!sym.isPreemptible && (!sym.isGnuIFunc() || config->zIfuncNoplt)) { - if (expr != R_GOT_PC) { - // The 0x8000 bit of r_addend of R_PPC_PLTREL24 is used to choose call - // stub type. It should be ignored if optimized to R_PC. - if (config->emachine == EM_PPC && expr == R_PPC32_PLTREL) - addend &= ~0x8000; - // R_HEX_GD_PLT_B22_PCREL (call a@GDPLT) is transformed into - // call __tls_get_addr even if the symbol is non-preemptible. - if (!(config->emachine == EM_HEXAGON && - (type == R_HEX_GD_PLT_B22_PCREL || - type == R_HEX_GD_PLT_B22_PCREL_X || - type == R_HEX_GD_PLT_B32_PCREL_X))) - expr = fromPlt(expr); - } else if (!isAbsoluteValue(sym)) { - expr = target->adjustGotPcExpr(type, addend, relocatedAddr); - } - } - // If the relocation does not emit a GOT or GOTPLT entry but its computation // uses their addresses, we need GOT or GOTPLT to be created. // @@ -1411,6 +1385,32 @@ static void scanReloc(InputSectionBase &sec, OffsetGetter &getOffset, RelTy *&i, return; } + // Relax relocations. + // + // If we know that a PLT entry will be resolved within the same ELF module, we + // can skip PLT access and directly jump to the destination function. For + // example, if we are linking a main executable, all dynamic symbols that can + // be resolved within the executable will actually be resolved that way at + // runtime, because the main executable is always at the beginning of a search + // list. We can leverage that fact. + if (!sym.isPreemptible && (!sym.isGnuIFunc() || config->zIfuncNoplt)) { + if (expr != R_GOT_PC) { + // The 0x8000 bit of r_addend of R_PPC_PLTREL24 is used to choose call + // stub type. It should be ignored if optimized to R_PC. + if (config->emachine == EM_PPC && expr == R_PPC32_PLTREL) + addend &= ~0x8000; + // R_HEX_GD_PLT_B22_PCREL (call a@GDPLT) is transformed into + // call __tls_get_addr even if the symbol is non-preemptible. + if (!(config->emachine == EM_HEXAGON && + (type == R_HEX_GD_PLT_B22_PCREL || + type == R_HEX_GD_PLT_B22_PCREL_X || + type == R_HEX_GD_PLT_B32_PCREL_X))) + expr = fromPlt(expr); + } else if (!isAbsoluteValue(sym)) { + expr = target->adjustGotPcExpr(type, addend, relocatedAddr); + } + } + // We were asked not to generate PLT entries for ifuncs. Instead, pass the // direct relocation on through. if (sym.isGnuIFunc() && config->zIfuncNoplt) { @@ -1640,7 +1640,7 @@ static void forEachInputSectionDescription( for (OutputSection *os : outputSections) { if (!(os->flags & SHF_ALLOC) || !(os->flags & SHF_EXECINSTR)) continue; - for (BaseCommand *bc : os->sectionCommands) + for (SectionCommand *bc : os->commands) if (auto *isd = dyn_cast<InputSectionDescription>(bc)) fn(os, isd); } @@ -1817,7 +1817,7 @@ ThunkSection *ThunkCreator::getISThunkSec(InputSection *isec) { // Find InputSectionRange within Target Output Section (TOS) that the // InputSection (IS) that we need to precede is in. OutputSection *tos = isec->getParent(); - for (BaseCommand *bc : tos->sectionCommands) { + for (SectionCommand *bc : tos->commands) { auto *isd = dyn_cast<InputSectionDescription>(bc); if (!isd || isd->sections.empty()) continue; diff --git a/lld/ELF/ScriptParser.cpp b/lld/ELF/ScriptParser.cpp index ad3b3e61ad59..d3b0296acab0 100644 --- a/lld/ELF/ScriptParser.cpp +++ b/lld/ELF/ScriptParser.cpp @@ -93,7 +93,7 @@ private: void readSectionAddressType(OutputSection *cmd); OutputSection *readOverlaySectionDescription(); OutputSection *readOutputSectionDescription(StringRef outSec); - std::vector<BaseCommand *> readOverlay(); + std::vector<SectionCommand *> readOverlay(); std::vector<StringRef> readOutputSectionPhdrs(); std::pair<uint64_t, uint64_t> readInputSectionFlags(); InputSectionDescription *readInputSectionDescription(StringRef tok); @@ -113,7 +113,8 @@ private: Expr getPageSize(); Expr readMemoryAssignment(StringRef, StringRef, StringRef); - std::pair<uint32_t, uint32_t> readMemoryAttributes(); + void readMemoryAttributes(uint32_t &flags, uint32_t &invFlags, + uint32_t &negFlags, uint32_t &negInvFlags); Expr combine(StringRef op, Expr l, Expr r); Expr readExpr(); @@ -518,7 +519,7 @@ void ScriptParser::readSearchDir() { // sections that use the same virtual memory range and normally would trigger // linker's sections sanity check failures. // https://sourceware.org/binutils/docs/ld/Overlay-Description.html#Overlay-Description -std::vector<BaseCommand *> ScriptParser::readOverlay() { +std::vector<SectionCommand *> ScriptParser::readOverlay() { // VA and LMA expressions are optional, though for simplicity of // implementation we assume they are not. That is what OVERLAY was designed // for first of all: to allow sections with overlapping VAs at different LMAs. @@ -528,7 +529,7 @@ std::vector<BaseCommand *> ScriptParser::readOverlay() { Expr lmaExpr = readParenExpr(); expect("{"); - std::vector<BaseCommand *> v; + std::vector<SectionCommand *> v; OutputSection *prev = nullptr; while (!errorCount() && !consume("}")) { // VA is the same for all sections. The LMAs are consecutive in memory @@ -549,7 +550,7 @@ std::vector<BaseCommand *> ScriptParser::readOverlay() { // Here we want to create the Dot assignment command to achieve that. Expr moveDot = [=] { uint64_t max = 0; - for (BaseCommand *cmd : v) + for (SectionCommand *cmd : v) max = std::max(max, cast<OutputSection>(cmd)->size); return addrExpr().getValue() + max; }; @@ -565,11 +566,11 @@ void ScriptParser::readOverwriteSections() { void ScriptParser::readSections() { expect("{"); - std::vector<BaseCommand *> v; + std::vector<SectionCommand *> v; while (!errorCount() && !consume("}")) { StringRef tok = next(); if (tok == "OVERLAY") { - for (BaseCommand *cmd : readOverlay()) + for (SectionCommand *cmd : readOverlay()) v.push_back(cmd); continue; } else if (tok == "INCLUDE") { @@ -577,7 +578,7 @@ void ScriptParser::readSections() { continue; } - if (BaseCommand *cmd = readAssignment(tok)) + if (SectionCommand *cmd = readAssignment(tok)) v.push_back(cmd); else v.push_back(readOutputSectionDescription(tok)); @@ -597,7 +598,7 @@ void ScriptParser::readSections() { setError("expected AFTER/BEFORE, but got '" + next() + "'"); StringRef where = next(); std::vector<StringRef> names; - for (BaseCommand *cmd : v) + for (SectionCommand *cmd : v) if (auto *os = dyn_cast<OutputSection>(cmd)) names.push_back(os->name); if (!names.empty()) @@ -848,7 +849,7 @@ OutputSection *ScriptParser::readOverlaySectionDescription() { uint64_t withoutFlags = 0; if (consume("INPUT_SECTION_FLAGS")) std::tie(withFlags, withoutFlags) = readInputSectionFlags(); - cmd->sectionCommands.push_back( + cmd->commands.push_back( readInputSectionRules(next(), withFlags, withoutFlags)); } return cmd; @@ -884,9 +885,9 @@ OutputSection *ScriptParser::readOutputSectionDescription(StringRef outSec) { if (tok == ";") { // Empty commands are allowed. Do nothing here. } else if (SymbolAssignment *assign = readAssignment(tok)) { - cmd->sectionCommands.push_back(assign); + cmd->commands.push_back(assign); } else if (ByteCommand *data = readByteCommand(tok)) { - cmd->sectionCommands.push_back(data); + cmd->commands.push_back(data); } else if (tok == "CONSTRUCTORS") { // CONSTRUCTORS is a keyword to make the linker recognize C++ ctors/dtors // by name. This is for very old file formats such as ECOFF/XCOFF. @@ -903,7 +904,7 @@ OutputSection *ScriptParser::readOutputSectionDescription(StringRef outSec) { } else if (tok == "INCLUDE") { readInclude(); } else if (peek() == "(") { - cmd->sectionCommands.push_back(readInputSectionDescription(tok)); + cmd->commands.push_back(readInputSectionDescription(tok)); } else { // We have a file name and no input sections description. It is not a // commonly used syntax, but still acceptable. In that case, all sections @@ -913,7 +914,7 @@ OutputSection *ScriptParser::readOutputSectionDescription(StringRef outSec) { // case above. auto *isd = make<InputSectionDescription>(tok); isd->sectionPatterns.push_back({{}, StringMatcher("*")}); - cmd->sectionCommands.push_back(isd); + cmd->commands.push_back(isd); } } @@ -1614,9 +1615,11 @@ void ScriptParser::readMemory() { } uint32_t flags = 0; + uint32_t invFlags = 0; uint32_t negFlags = 0; + uint32_t negInvFlags = 0; if (consume("(")) { - std::tie(flags, negFlags) = readMemoryAttributes(); + readMemoryAttributes(flags, invFlags, negFlags, negInvFlags); expect(")"); } expect(":"); @@ -1626,7 +1629,8 @@ void ScriptParser::readMemory() { Expr length = readMemoryAssignment("LENGTH", "len", "l"); // Add the memory region to the region map. - MemoryRegion *mr = make<MemoryRegion>(tok, origin, length, flags, negFlags); + MemoryRegion *mr = make<MemoryRegion>(tok, origin, length, flags, invFlags, + negFlags, negInvFlags); if (!script->memoryRegions.insert({tok, mr}).second) setError("region '" + tok + "' already defined"); } @@ -1635,30 +1639,34 @@ void ScriptParser::readMemory() { // This function parses the attributes used to match against section // flags when placing output sections in a memory region. These flags // are only used when an explicit memory region name is not used. -std::pair<uint32_t, uint32_t> ScriptParser::readMemoryAttributes() { - uint32_t flags = 0; - uint32_t negFlags = 0; +void ScriptParser::readMemoryAttributes(uint32_t &flags, uint32_t &invFlags, + uint32_t &negFlags, + uint32_t &negInvFlags) { bool invert = false; for (char c : next().lower()) { - uint32_t flag = 0; - if (c == '!') + if (c == '!') { invert = !invert; - else if (c == 'w') - flag = SHF_WRITE; + std::swap(flags, negFlags); + std::swap(invFlags, negInvFlags); + continue; + } + if (c == 'w') + flags |= SHF_WRITE; else if (c == 'x') - flag = SHF_EXECINSTR; + flags |= SHF_EXECINSTR; else if (c == 'a') - flag = SHF_ALLOC; - else if (c != 'r') + flags |= SHF_ALLOC; + else if (c == 'r') + invFlags |= SHF_WRITE; + else setError("invalid memory region attribute"); + } - if (invert) - negFlags |= flag; - else - flags |= flag; + if (invert) { + std::swap(flags, negFlags); + std::swap(invFlags, negInvFlags); } - return {flags, negFlags}; } void elf::readLinkerScript(MemoryBufferRef mb) { diff --git a/lld/ELF/SymbolTable.cpp b/lld/ELF/SymbolTable.cpp index c309957ee5ba..e615fb70a40f 100644 --- a/lld/ELF/SymbolTable.cpp +++ b/lld/ELF/SymbolTable.cpp @@ -113,7 +113,7 @@ Symbol *SymbolTable::find(StringRef name) { // A version script/dynamic list is only meaningful for a Defined symbol. // A CommonSymbol will be converted to a Defined in replaceCommonSymbols(). -// A lazy symbol may be made Defined if an LTO libcall fetches it. +// A lazy symbol may be made Defined if an LTO libcall extracts it. static bool canBeVersioned(const Symbol &sym) { return sym.isDefined() || sym.isCommon() || sym.isLazy(); } diff --git a/lld/ELF/Symbols.cpp b/lld/ELF/Symbols.cpp index 5f95a1b3c7ac..8c410b4d5bfb 100644 --- a/lld/ELF/Symbols.cpp +++ b/lld/ELF/Symbols.cpp @@ -256,18 +256,11 @@ void Symbol::parseSymbolVersion() { verstr); } -void Symbol::fetch() const { - if (auto *sym = dyn_cast<LazyArchive>(this)) { - cast<ArchiveFile>(sym->file)->fetch(sym->sym); - return; - } - - if (auto *sym = dyn_cast<LazyObject>(this)) { - dyn_cast<LazyObjFile>(sym->file)->fetch(); - return; - } - - llvm_unreachable("Symbol::fetch() is called on a non-lazy symbol"); +void Symbol::extract() const { + if (auto *sym = dyn_cast<LazyArchive>(this)) + cast<ArchiveFile>(sym->file)->extract(sym->sym); + else + cast<LazyObjFile>(this->file)->extract(); } MemoryBufferRef LazyArchive::getMemberBuffer() { @@ -478,8 +471,8 @@ void Symbol::resolveUndefined(const Undefined &other) { printTraceSymbol(&other); if (isLazy()) { - // An undefined weak will not fetch archive members. See comment on Lazy in - // Symbols.h for the details. + // An undefined weak will not extract archive members. See comment on Lazy + // in Symbols.h for the details. if (other.binding == STB_WEAK) { binding = STB_WEAK; type = other.type; @@ -489,9 +482,9 @@ void Symbol::resolveUndefined(const Undefined &other) { // Do extra check for --warn-backrefs. // // --warn-backrefs is an option to prevent an undefined reference from - // fetching an archive member written earlier in the command line. It can be - // used to keep compatibility with GNU linkers to some degree. - // I'll explain the feature and why you may find it useful in this comment. + // extracting an archive member written earlier in the command line. It can + // be used to keep compatibility with GNU linkers to some degree. I'll + // explain the feature and why you may find it useful in this comment. // // lld's symbol resolution semantics is more relaxed than traditional Unix // linkers. For example, @@ -538,7 +531,7 @@ void Symbol::resolveUndefined(const Undefined &other) { // group assignment rule simulates the traditional linker's semantics. bool backref = config->warnBackrefs && other.file && file->groupId < other.file->groupId; - fetch(); + extract(); if (!config->whyExtract.empty()) recordWhyExtract(other.file, *file, *this); @@ -712,23 +705,23 @@ template <class LazyT> static void replaceCommon(Symbol &oldSym, const LazyT &newSym) { backwardReferences.erase(&oldSym); oldSym.replace(newSym); - newSym.fetch(); + newSym.extract(); } template <class LazyT> void Symbol::resolveLazy(const LazyT &other) { // For common objects, we want to look for global or weak definitions that - // should be fetched as the canonical definition instead. + // should be extracted as the canonical definition instead. if (isCommon() && elf::config->fortranCommon) { if (auto *laSym = dyn_cast<LazyArchive>(&other)) { ArchiveFile *archive = cast<ArchiveFile>(laSym->file); const Archive::Symbol &archiveSym = laSym->sym; - if (archive->shouldFetchForCommon(archiveSym)) { + if (archive->shouldExtractForCommon(archiveSym)) { replaceCommon(*this, other); return; } } else if (auto *loSym = dyn_cast<LazyObject>(&other)) { LazyObjFile *obj = cast<LazyObjFile>(loSym->file); - if (obj->shouldFetchForCommon(loSym->getName())) { + if (obj->shouldExtractForCommon(loSym->getName())) { replaceCommon(*this, other); return; } @@ -742,7 +735,7 @@ template <class LazyT> void Symbol::resolveLazy(const LazyT &other) { return; } - // An undefined weak will not fetch archive members. See comment on Lazy in + // An undefined weak will not extract archive members. See comment on Lazy in // Symbols.h for the details. if (isWeak()) { uint8_t ty = type; @@ -753,7 +746,7 @@ template <class LazyT> void Symbol::resolveLazy(const LazyT &other) { } const InputFile *oldFile = file; - other.fetch(); + other.extract(); if (!config->whyExtract.empty()) recordWhyExtract(oldFile, *file, *this); } diff --git a/lld/ELF/Symbols.h b/lld/ELF/Symbols.h index 816d61563021..cc48ef0ab3b7 100644 --- a/lld/ELF/Symbols.h +++ b/lld/ELF/Symbols.h @@ -93,7 +93,7 @@ public: // Symbol binding. This is not overwritten by replace() to track // changes during resolution. In particular: // - An undefined weak is still weak when it resolves to a shared library. - // - An undefined weak will not fetch archive members, but we have to + // - An undefined weak will not extract archive members, but we have to // remember it is weak. uint8_t binding; @@ -216,10 +216,10 @@ public: void mergeProperties(const Symbol &other); void resolve(const Symbol &other); - // If this is a lazy symbol, fetch an input file and add the symbol + // If this is a lazy symbol, extract an input file and add the symbol // in the file to the symbol table. Calling this function on // non-lazy object causes a runtime error. - void fetch() const; + void extract() const; static bool isExportDynamic(Kind k, uint8_t visibility) { if (k == SharedKind) diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp index f1594eb8df86..4078f7e01674 100644 --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -900,7 +900,7 @@ void MipsGotSection::build() { got.pagesMap) { const OutputSection *os = p.first; uint64_t secSize = 0; - for (BaseCommand *cmd : os->sectionCommands) { + for (SectionCommand *cmd : os->commands) { if (auto *isd = dyn_cast<InputSectionDescription>(cmd)) for (InputSection *isec : isd->sections) { uint64_t off = alignTo(secSize, isec->alignment); @@ -1258,43 +1258,6 @@ DynamicSection<ELFT>::DynamicSection() this->flags = SHF_ALLOC; } -template <class ELFT> -void DynamicSection<ELFT>::add(int32_t tag, std::function<uint64_t()> fn) { - entries.push_back({tag, fn}); -} - -template <class ELFT> -void DynamicSection<ELFT>::addInt(int32_t tag, uint64_t val) { - entries.push_back({tag, [=] { return val; }}); -} - -template <class ELFT> -void DynamicSection<ELFT>::addInSec(int32_t tag, InputSection *sec) { - entries.push_back({tag, [=] { return sec->getVA(0); }}); -} - -template <class ELFT> -void DynamicSection<ELFT>::addInSecRelative(int32_t tag, InputSection *sec) { - size_t tagOffset = entries.size() * entsize; - entries.push_back( - {tag, [=] { return sec->getVA(0) - (getVA() + tagOffset); }}); -} - -template <class ELFT> -void DynamicSection<ELFT>::addOutSec(int32_t tag, OutputSection *sec) { - entries.push_back({tag, [=] { return sec->addr; }}); -} - -template <class ELFT> -void DynamicSection<ELFT>::addSize(int32_t tag, OutputSection *sec) { - entries.push_back({tag, [=] { return sec->size; }}); -} - -template <class ELFT> -void DynamicSection<ELFT>::addSym(int32_t tag, Symbol *sym) { - entries.push_back({tag, [=] { return sym->getVA(); }}); -} - // The output section .rela.dyn may include these synthetic sections: // // - part.relaDyn @@ -1303,15 +1266,13 @@ void DynamicSection<ELFT>::addSym(int32_t tag, Symbol *sym) { // .rela.dyn // // DT_RELASZ is the total size of the included sections. -static std::function<uint64_t()> addRelaSz(RelocationBaseSection *relaDyn) { - return [=]() { - size_t size = relaDyn->getSize(); - if (in.relaIplt->getParent() == relaDyn->getParent()) - size += in.relaIplt->getSize(); - if (in.relaPlt->getParent() == relaDyn->getParent()) - size += in.relaPlt->getSize(); - return size; - }; +static uint64_t addRelaSz(RelocationBaseSection *relaDyn) { + size_t size = relaDyn->getSize(); + if (in.relaIplt->getParent() == relaDyn->getParent()) + size += in.relaIplt->getSize(); + if (in.relaPlt->getParent() == relaDyn->getParent()) + size += in.relaPlt->getSize(); + return size; } // A Linker script may assign the RELA relocation sections to the same @@ -1327,9 +1288,19 @@ static uint64_t addPltRelSz() { } // Add remaining entries to complete .dynamic contents. -template <class ELFT> void DynamicSection<ELFT>::finalizeContents() { +template <class ELFT> +std::vector<std::pair<int32_t, uint64_t>> +DynamicSection<ELFT>::computeContents() { elf::Partition &part = getPartition(); bool isMain = part.name.empty(); + std::vector<std::pair<int32_t, uint64_t>> entries; + + auto addInt = [&](int32_t tag, uint64_t val) { + entries.emplace_back(tag, val); + }; + auto addInSec = [&](int32_t tag, const InputSection *sec) { + entries.emplace_back(tag, sec->getVA()); + }; for (StringRef s : config->filterList) addInt(DT_FILTER, part.dynStrTab->addString(s)); @@ -1382,7 +1353,7 @@ template <class ELFT> void DynamicSection<ELFT>::finalizeContents() { } if (!config->zText) dtFlags |= DF_TEXTREL; - if (config->hasStaticTlsModel) + if (config->hasTlsIe && config->shared) dtFlags |= DF_STATIC_TLS; if (dtFlags) @@ -1401,14 +1372,11 @@ template <class ELFT> void DynamicSection<ELFT>::finalizeContents() { if (!config->shared && !config->relocatable && !config->zRodynamic) addInt(DT_DEBUG, 0); - if (OutputSection *sec = part.dynStrTab->getParent()) - this->link = sec->sectionIndex; - if (part.relaDyn->isNeeded() || (in.relaIplt->isNeeded() && part.relaDyn->getParent() == in.relaIplt->getParent())) { addInSec(part.relaDyn->dynamicTag, part.relaDyn); - entries.push_back({part.relaDyn->sizeDynamicTag, addRelaSz(part.relaDyn)}); + entries.emplace_back(part.relaDyn->sizeDynamicTag, addRelaSz(part.relaDyn)); bool isRela = config->isRela; addInt(isRela ? DT_RELAENT : DT_RELENT, @@ -1426,8 +1394,8 @@ template <class ELFT> void DynamicSection<ELFT>::finalizeContents() { if (part.relrDyn && !part.relrDyn->relocs.empty()) { addInSec(config->useAndroidRelrTags ? DT_ANDROID_RELR : DT_RELR, part.relrDyn); - addSize(config->useAndroidRelrTags ? DT_ANDROID_RELRSZ : DT_RELRSZ, - part.relrDyn->getParent()); + addInt(config->useAndroidRelrTags ? DT_ANDROID_RELRSZ : DT_RELRSZ, + part.relrDyn->getParent()->size); addInt(config->useAndroidRelrTags ? DT_ANDROID_RELRENT : DT_RELRENT, sizeof(Elf_Relr)); } @@ -1439,7 +1407,7 @@ template <class ELFT> void DynamicSection<ELFT>::finalizeContents() { // .rel[a].plt section. if (isMain && (in.relaPlt->isNeeded() || in.relaIplt->isNeeded())) { addInSec(DT_JMPREL, in.relaPlt); - entries.push_back({DT_PLTRELSZ, addPltRelSz}); + entries.emplace_back(DT_PLTRELSZ, addPltRelSz()); switch (config->emachine) { case EM_MIPS: addInSec(DT_MIPS_PLTGOT, in.gotPlt); @@ -1481,24 +1449,24 @@ template <class ELFT> void DynamicSection<ELFT>::finalizeContents() { if (isMain) { if (Out::preinitArray) { - addOutSec(DT_PREINIT_ARRAY, Out::preinitArray); - addSize(DT_PREINIT_ARRAYSZ, Out::preinitArray); + addInt(DT_PREINIT_ARRAY, Out::preinitArray->addr); + addInt(DT_PREINIT_ARRAYSZ, Out::preinitArray->size); } if (Out::initArray) { - addOutSec(DT_INIT_ARRAY, Out::initArray); - addSize(DT_INIT_ARRAYSZ, Out::initArray); + addInt(DT_INIT_ARRAY, Out::initArray->addr); + addInt(DT_INIT_ARRAYSZ, Out::initArray->size); } if (Out::finiArray) { - addOutSec(DT_FINI_ARRAY, Out::finiArray); - addSize(DT_FINI_ARRAYSZ, Out::finiArray); + addInt(DT_FINI_ARRAY, Out::finiArray->addr); + addInt(DT_FINI_ARRAYSZ, Out::finiArray->size); } if (Symbol *b = symtab->find(config->init)) if (b->isDefined()) - addSym(DT_INIT, b); + addInt(DT_INIT, b->getVA()); if (Symbol *b = symtab->find(config->fini)) if (b->isDefined()) - addSym(DT_FINI, b); + addInt(DT_FINI, b->getVA()); } if (part.verSym && part.verSym->isNeeded()) @@ -1521,8 +1489,7 @@ template <class ELFT> void DynamicSection<ELFT>::finalizeContents() { addInt(DT_MIPS_FLAGS, RHF_NOTPOT); addInt(DT_MIPS_BASE_ADDRESS, target->getImageBase()); addInt(DT_MIPS_SYMTABNO, part.dynSymTab->getNumSymbols()); - - add(DT_MIPS_LOCAL_GOTNO, [] { return in.mipsGot->getLocalEntriesNum(); }); + addInt(DT_MIPS_LOCAL_GOTNO, in.mipsGot->getLocalEntriesNum()); if (const Symbol *b = in.mipsGot->getFirstGlobalEntry()) addInt(DT_MIPS_GOTSYM, b->dynsymIndex); @@ -1534,37 +1501,39 @@ template <class ELFT> void DynamicSection<ELFT>::finalizeContents() { addInSec(DT_MIPS_RLD_MAP, in.mipsRldMap); // Store the offset to the .rld_map section // relative to the address of the tag. - addInSecRelative(DT_MIPS_RLD_MAP_REL, in.mipsRldMap); + addInt(DT_MIPS_RLD_MAP_REL, + in.mipsRldMap->getVA() - (getVA() + entries.size() * entsize)); } } // DT_PPC_GOT indicates to glibc Secure PLT is used. If DT_PPC_GOT is absent, // glibc assumes the old-style BSS PLT layout which we don't support. if (config->emachine == EM_PPC) - add(DT_PPC_GOT, [] { return in.got->getVA(); }); + addInSec(DT_PPC_GOT, in.got); // Glink dynamic tag is required by the V2 abi if the plt section isn't empty. if (config->emachine == EM_PPC64 && in.plt->isNeeded()) { // The Glink tag points to 32 bytes before the first lazy symbol resolution // stub, which starts directly after the header. - entries.push_back({DT_PPC64_GLINK, [=] { - unsigned offset = target->pltHeaderSize - 32; - return in.plt->getVA(0) + offset; - }}); + addInt(DT_PPC64_GLINK, in.plt->getVA() + target->pltHeaderSize - 32); } addInt(DT_NULL, 0); + return entries; +} - getParent()->link = this->link; - this->size = entries.size() * this->entsize; +template <class ELFT> void DynamicSection<ELFT>::finalizeContents() { + if (OutputSection *sec = getPartition().dynStrTab->getParent()) + getParent()->link = sec->sectionIndex; + this->size = computeContents().size() * this->entsize; } template <class ELFT> void DynamicSection<ELFT>::writeTo(uint8_t *buf) { auto *p = reinterpret_cast<Elf_Dyn *>(buf); - for (std::pair<int32_t, std::function<uint64_t()>> &kv : entries) { + for (std::pair<int32_t, uint64_t> kv : computeContents()) { p->d_tag = kv.first; - p->d_un.d_val = kv.second(); + p->d_un.d_val = kv.second; ++p; } } @@ -2331,8 +2300,8 @@ bool SymtabShndxSection::isNeeded() const { // late, and we do not know them here. For simplicity, we just always create // a .symtab_shndx section when the amount of output sections is huge. size_t size = 0; - for (BaseCommand *base : script->sectionCommands) - if (isa<OutputSection>(base)) + for (SectionCommand *cmd : script->sectionCommands) + if (isa<OutputSection>(cmd)) ++size; return size >= SHN_LORESERVE; } @@ -2411,21 +2380,8 @@ void GnuHashTableSection::writeTo(uint8_t *buf) { write32(buf + 12, Shift2); buf += 16; - // Write a bloom filter and a hash table. - writeBloomFilter(buf); - buf += config->wordsize * maskWords; - writeHashTable(buf); -} - -// This function writes a 2-bit bloom filter. This bloom filter alone -// usually filters out 80% or more of all symbol lookups [1]. -// The dynamic linker uses the hash table only when a symbol is not -// filtered out by a bloom filter. -// -// [1] Ulrich Drepper (2011), "How To Write Shared Libraries" (Ver. 4.1.2), -// p.9, https://www.akkadia.org/drepper/dsohowto.pdf -void GnuHashTableSection::writeBloomFilter(uint8_t *buf) { - unsigned c = config->is64 ? 64 : 32; + // Write the 2-bit bloom filter. + const unsigned c = config->is64 ? 64 : 32; for (const Entry &sym : symbols) { // When C = 64, we choose a word with bits [6:...] and set 1 to two bits in // the word using bits [0:5] and [26:31]. @@ -2435,9 +2391,9 @@ void GnuHashTableSection::writeBloomFilter(uint8_t *buf) { val |= uint64_t(1) << ((sym.hash >> Shift2) % c); writeUint(buf + i * config->wordsize, val); } -} + buf += config->wordsize * maskWords; -void GnuHashTableSection::writeHashTable(uint8_t *buf) { + // Write the hash table. uint32_t *buckets = reinterpret_cast<uint32_t *>(buf); uint32_t oldBucket = -1; uint32_t *values = buckets + nBuckets; @@ -3160,7 +3116,7 @@ size_t VersionTableSection::getSize() const { void VersionTableSection::writeTo(uint8_t *buf) { buf += 2; for (const SymbolTableEntry &s : getPartition().dynSymTab->getSymbols()) { - // For an unfetched lazy symbol (undefined weak), it must have been + // For an unextracted lazy symbol (undefined weak), it must have been // converted to Undefined and have VER_NDX_GLOBAL version here. assert(!s.sym->isLazy()); write16(buf, s.sym->versionId); @@ -3648,8 +3604,8 @@ PPC32Got2Section::PPC32Got2Section() bool PPC32Got2Section::isNeeded() const { // See the comment below. This is not needed if there is no other // InputSection. - for (BaseCommand *base : getParent()->sectionCommands) - if (auto *isd = dyn_cast<InputSectionDescription>(base)) + for (SectionCommand *cmd : getParent()->commands) + if (auto *isd = dyn_cast<InputSectionDescription>(cmd)) for (InputSection *isec : isd->sections) if (isec != this) return true; @@ -3662,8 +3618,8 @@ void PPC32Got2Section::finalizeContents() { // PPC32PltCallStub::writeTo(). The purpose of this empty synthetic section is // to collect input sections named ".got2". uint32_t offset = 0; - for (BaseCommand *base : getParent()->sectionCommands) - if (auto *isd = dyn_cast<InputSectionDescription>(base)) { + for (SectionCommand *cmd : getParent()->commands) + if (auto *isd = dyn_cast<InputSectionDescription>(cmd)) { for (InputSection *isec : isd->sections) { if (isec == this) continue; diff --git a/lld/ELF/SyntheticSections.h b/lld/ELF/SyntheticSections.h index bc24922598fe..3d2e73071d09 100644 --- a/lld/ELF/SyntheticSections.h +++ b/lld/ELF/SyntheticSections.h @@ -493,9 +493,6 @@ private: template <class ELFT> class DynamicSection final : public SyntheticSection { LLVM_ELF_IMPORT_TYPES_ELFT(ELFT) - // finalizeContents() fills this vector with the section contents. - std::vector<std::pair<int32_t, std::function<uint64_t()>>> entries; - public: DynamicSection(); void finalizeContents() override; @@ -503,14 +500,7 @@ public: size_t getSize() const override { return size; } private: - void add(int32_t tag, std::function<uint64_t()> fn); - void addInt(int32_t tag, uint64_t val); - void addInSec(int32_t tag, InputSection *sec); - void addInSecRelative(int32_t tag, InputSection *sec); - void addOutSec(int32_t tag, OutputSection *sec); - void addSize(int32_t tag, OutputSection *sec); - void addSym(int32_t tag, Symbol *sym); - + std::vector<std::pair<int32_t, uint64_t>> computeContents(); uint64_t size = 0; }; @@ -685,9 +675,6 @@ private: // See the comment in writeBloomFilter. enum { Shift2 = 26 }; - void writeBloomFilter(uint8_t *buf); - void writeHashTable(uint8_t *buf); - struct Entry { Symbol *sym; size_t strTabOffset; diff --git a/lld/ELF/Thunks.cpp b/lld/ELF/Thunks.cpp index dbc476ffeeb7..ffbc8d94a800 100644 --- a/lld/ELF/Thunks.cpp +++ b/lld/ELF/Thunks.cpp @@ -399,24 +399,6 @@ public: } }; -// A bl instruction uses a signed 24 bit offset, with an implicit 4 byte -// alignment. This gives a possible 26 bits of 'reach'. If the caller and -// callee do not use toc and the call offset is larger than 26 bits, -// we need to emit a pc-rel based long-branch thunk. The target address of -// the callee is computed with a PC-relative offset. -class PPC64PCRelLongBranchThunk final : public Thunk { -public: - PPC64PCRelLongBranchThunk(Symbol &dest, int64_t addend) - : Thunk(dest, addend) { - alignment = 16; - } - uint32_t size() override { return 32; } - void writeTo(uint8_t *buf) override; - void addSymbols(ThunkSection &isec) override; - bool isCompatibleWith(const InputSection &isec, - const Relocation &rel) const override; -}; - } // end anonymous namespace Defined *Thunk::addSymbol(StringRef name, uint8_t type, uint64_t value, @@ -932,7 +914,7 @@ void PPC64R2SaveStub::writeTo(uint8_t *buf) { write32(buf + 4, 0x48000000 | (offset & 0x03fffffc)); // b <offset> } else if (isInt<34>(offset)) { int nextInstOffset; - if (!config->Power10Stub) { + if (!config->power10Stubs) { uint64_t tocOffset = destination.getVA() - getPPC64TocBase(); if (tocOffset >> 16 > 0) { const uint64_t addi = ADDI_R12_TO_R12_NO_DISP | (tocOffset & 0xffff); @@ -980,7 +962,7 @@ void PPC64R12SetupStub::writeTo(uint8_t *buf) { reportRangeError(buf, offset, 34, destination, "R12 setup stub offset"); int nextInstOffset; - if (!config->Power10Stub) { + if (!config->power10Stubs) { uint32_t off = destination.getVA(addend) - getThunkTargetSym()->getVA() - 8; write32(buf + 0, 0x7c0802a6); // mflr r12 write32(buf + 4, 0x429f0005); // bcl 20,31,.+4 @@ -1013,7 +995,7 @@ void PPC64PCRelPLTStub::writeTo(uint8_t *buf) { int nextInstOffset = 0; int64_t offset = destination.getGotPltVA() - getThunkTargetSym()->getVA(); - if (config->Power10Stub) { + if (config->power10Stubs) { if (!isInt<34>(offset)) reportRangeError(buf, offset, 34, destination, "PC-relative PLT stub offset"); @@ -1061,42 +1043,6 @@ bool PPC64LongBranchThunk::isCompatibleWith(const InputSection &isec, return rel.type == R_PPC64_REL24 || rel.type == R_PPC64_REL14; } -void PPC64PCRelLongBranchThunk::writeTo(uint8_t *buf) { - int64_t offset = destination.getVA() - getThunkTargetSym()->getVA(); - if (!isInt<34>(offset)) - reportRangeError(buf, offset, 34, destination, - "PC-relative long branch stub offset"); - - int nextInstOffset; - if (!config->Power10Stub) { - uint32_t off = destination.getVA(addend) - getThunkTargetSym()->getVA() - 8; - write32(buf + 0, 0x7c0802a6); // mflr r12 - write32(buf + 4, 0x429f0005); // bcl 20,31,.+4 - write32(buf + 8, 0x7d6802a6); // mflr r11 - write32(buf + 12, 0x7d8803a6); // mtlr r12 - write32(buf + 16, 0x3d8b0000 | computeHiBits(off)); // addis r12,r11,off@ha - write32(buf + 20, 0x398c0000 | (off & 0xffff)); // addi r12,r12,off@l - nextInstOffset = 24; - } else { - uint64_t paddi = PADDI_R12_NO_DISP | (((offset >> 16) & 0x3ffff) << 32) | - (offset & 0xffff); - writePrefixedInstruction(buf + 0, paddi); // paddi r12, 0, func@pcrel, 1 - nextInstOffset = 8; - } - write32(buf + nextInstOffset, MTCTR_R12); // mtctr r12 - write32(buf + nextInstOffset + 4, BCTR); // bctr -} - -void PPC64PCRelLongBranchThunk::addSymbols(ThunkSection &isec) { - addSymbol(saver.save("__long_branch_pcrel_" + destination.getName()), - STT_FUNC, 0, isec); -} - -bool PPC64PCRelLongBranchThunk::isCompatibleWith(const InputSection &isec, - const Relocation &rel) const { - return rel.type == R_PPC64_REL24_NOTOC; -} - Thunk::Thunk(Symbol &d, int64_t a) : destination(d), addend(a), offset(0) {} Thunk::~Thunk() = default; @@ -1223,9 +1169,7 @@ static Thunk *addThunkPPC64(RelType type, Symbol &s, int64_t a) { return make<PPC64R2SaveStub>(s, a); if (type == R_PPC64_REL24_NOTOC) - return (s.stOther >> 5) > 1 - ? (Thunk *)make<PPC64R12SetupStub>(s) - : (Thunk *)make<PPC64PCRelLongBranchThunk>(s, a); + return make<PPC64R12SetupStub>(s); if (config->picThunk) return make<PPC64PILongBranchThunk>(s, a); diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp index 6d97852aec43..07c5e2303374 100644 --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -91,67 +91,6 @@ private: }; } // anonymous namespace -static bool isSectionPrefix(StringRef prefix, StringRef name) { - return name.startswith(prefix) || name == prefix.drop_back(); -} - -StringRef elf::getOutputSectionName(const InputSectionBase *s) { - if (config->relocatable) - return s->name; - - // This is for --emit-relocs. If .text.foo is emitted as .text.bar, we want - // to emit .rela.text.foo as .rela.text.bar for consistency (this is not - // technically required, but not doing it is odd). This code guarantees that. - if (auto *isec = dyn_cast<InputSection>(s)) { - if (InputSectionBase *rel = isec->getRelocatedSection()) { - OutputSection *out = rel->getOutputSection(); - if (s->type == SHT_RELA) - return saver.save(".rela" + out->name); - return saver.save(".rel" + out->name); - } - } - - // A BssSection created for a common symbol is identified as "COMMON" in - // linker scripts. It should go to .bss section. - if (s->name == "COMMON") - return ".bss"; - - if (script->hasSectionsCommand) - return s->name; - - // When no SECTIONS is specified, emulate GNU ld's internal linker scripts - // by grouping sections with certain prefixes. - - // GNU ld places text sections with prefix ".text.hot.", ".text.unknown.", - // ".text.unlikely.", ".text.startup." or ".text.exit." before others. - // We provide an option -z keep-text-section-prefix to group such sections - // into separate output sections. This is more flexible. See also - // sortISDBySectionOrder(). - // ".text.unknown" means the hotness of the section is unknown. When - // SampleFDO is used, if a function doesn't have sample, it could be very - // cold or it could be a new function never being sampled. Those functions - // will be kept in the ".text.unknown" section. - // ".text.split." holds symbols which are split out from functions in other - // input sections. For example, with -fsplit-machine-functions, placing the - // cold parts in .text.split instead of .text.unlikely mitigates against poor - // profile inaccuracy. Techniques such as hugepage remapping can make - // conservative decisions at the section granularity. - if (config->zKeepTextSectionPrefix) - for (StringRef v : {".text.hot.", ".text.unknown.", ".text.unlikely.", - ".text.startup.", ".text.exit.", ".text.split."}) - if (isSectionPrefix(v, s->name)) - return v.drop_back(); - - for (StringRef v : - {".text.", ".rodata.", ".data.rel.ro.", ".data.", ".bss.rel.ro.", - ".bss.", ".init_array.", ".fini_array.", ".ctors.", ".dtors.", ".tbss.", - ".gcc_except_table.", ".tdata.", ".ARM.exidx.", ".ARM.extab."}) - if (isSectionPrefix(v, s->name)) - return v.drop_back(); - - return s->name; -} - static bool needsInterpSection() { return !config->relocatable && !config->shared && !config->dynamicLinker.empty() && script->needsInterpSection(); @@ -332,8 +271,8 @@ void elf::addReservedSymbols() { } static OutputSection *findSection(StringRef name, unsigned partition = 1) { - for (BaseCommand *base : script->sectionCommands) - if (auto *sec = dyn_cast<OutputSection>(base)) + for (SectionCommand *cmd : script->sectionCommands) + if (auto *sec = dyn_cast<OutputSection>(cmd)) if (sec->name == name && sec->partition == partition) return sec; return nullptr; @@ -342,7 +281,10 @@ static OutputSection *findSection(StringRef name, unsigned partition = 1) { template <class ELFT> void elf::createSyntheticSections() { // Initialize all pointers with NULL. This is needed because // you can call lld::elf::main more than once as a library. - memset(&Out::first, 0, sizeof(Out)); + Out::tlsPhdr = nullptr; + Out::preinitArray = nullptr; + Out::initArray = nullptr; + Out::finiArray = nullptr; // Add the .interp section first because it is not a SyntheticSection. // The removeUnusedSyntheticSections() function relies on the @@ -426,7 +368,6 @@ template <class ELFT> void elf::createSyntheticSections() { make<RelocationSection<ELFT>>(relaDynName, config->zCombreloc); if (config->hasDynSymTab) { - part.dynSymTab = make<SymbolTableSection<ELFT>>(*part.dynStrTab); add(part.dynSymTab); part.verSym = make<VersionTableSection>(); @@ -624,9 +565,8 @@ template <class ELFT> void Writer<ELFT>::run() { // --print-archive-stats=. Dump them before checkSections() because the files // may be useful in case checkSections() or openFile() fails, for example, due // to an erroneous file size. - writeMapFile(); + writeMapAndCref(); writeWhyExtract(); - writeCrossReferenceTable(); writeArchiveStats(); if (config->checkSections) @@ -787,16 +727,16 @@ template <class ELFT> void Writer<ELFT>::copyLocalSymbols() { // referring to a section (that happens if the section is a synthetic one), we // don't create a section symbol for that section. template <class ELFT> void Writer<ELFT>::addSectionSymbols() { - for (BaseCommand *base : script->sectionCommands) { - auto *sec = dyn_cast<OutputSection>(base); + for (SectionCommand *cmd : script->sectionCommands) { + auto *sec = dyn_cast<OutputSection>(cmd); if (!sec) continue; - auto i = llvm::find_if(sec->sectionCommands, [](BaseCommand *base) { - if (auto *isd = dyn_cast<InputSectionDescription>(base)) + auto i = llvm::find_if(sec->commands, [](SectionCommand *cmd) { + if (auto *isd = dyn_cast<InputSectionDescription>(cmd)) return !isd->sections.empty(); return false; }); - if (i == sec->sectionCommands.end()) + if (i == sec->commands.end()) continue; InputSectionBase *isec = cast<InputSectionDescription>(*i)->sections[0]; @@ -1053,7 +993,8 @@ static unsigned getSectionRank(const OutputSection *sec) { return rank; } -static bool compareSections(const BaseCommand *aCmd, const BaseCommand *bCmd) { +static bool compareSections(const SectionCommand *aCmd, + const SectionCommand *bCmd) { const OutputSection *a = cast<OutputSection>(aCmd); const OutputSection *b = cast<OutputSection>(bCmd); @@ -1210,7 +1151,7 @@ static int getRankProximityAux(OutputSection *a, OutputSection *b) { return countLeadingZeros(a->sortRank ^ b->sortRank); } -static int getRankProximity(OutputSection *a, BaseCommand *b) { +static int getRankProximity(OutputSection *a, SectionCommand *b) { auto *sec = dyn_cast<OutputSection>(b); return (sec && sec->hasInputSections) ? getRankProximityAux(a, sec) : -1; } @@ -1229,7 +1170,7 @@ static int getRankProximity(OutputSection *a, BaseCommand *b) { // /* The RW PT_LOAD starts here*/ // rw_sec : { *(rw_sec) } // would mean that the RW PT_LOAD would become unaligned. -static bool shouldSkip(BaseCommand *cmd) { +static bool shouldSkip(SectionCommand *cmd) { if (auto *assign = dyn_cast<SymbolAssignment>(cmd)) return assign->name != "."; return false; @@ -1238,13 +1179,13 @@ static bool shouldSkip(BaseCommand *cmd) { // We want to place orphan sections so that they share as much // characteristics with their neighbors as possible. For example, if // both are rw, or both are tls. -static std::vector<BaseCommand *>::iterator -findOrphanPos(std::vector<BaseCommand *>::iterator b, - std::vector<BaseCommand *>::iterator e) { +static std::vector<SectionCommand *>::iterator +findOrphanPos(std::vector<SectionCommand *>::iterator b, + std::vector<SectionCommand *>::iterator e) { OutputSection *sec = cast<OutputSection>(*e); // Find the first element that has as close a rank as possible. - auto i = std::max_element(b, e, [=](BaseCommand *a, BaseCommand *b) { + auto i = std::max_element(b, e, [=](SectionCommand *a, SectionCommand *b) { return getRankProximity(sec, a) < getRankProximity(sec, b); }); if (i == e) @@ -1273,7 +1214,7 @@ findOrphanPos(std::vector<BaseCommand *>::iterator b, break; } - auto isOutputSecWithInputSections = [](BaseCommand *cmd) { + auto isOutputSecWithInputSections = [](SectionCommand *cmd) { auto *os = dyn_cast<OutputSection>(cmd); return os && os->hasInputSections; }; @@ -1482,7 +1423,7 @@ static void sortSection(OutputSection *sec, // digit radix sort. The sections may be sorted stably again by a more // significant key. if (!order.empty()) - for (BaseCommand *b : sec->sectionCommands) + for (SectionCommand *b : sec->commands) if (auto *isd = dyn_cast<InputSectionDescription>(b)) sortISDBySectionOrder(isd, order); @@ -1499,8 +1440,8 @@ static void sortSection(OutputSection *sec, // addressable range of [.got, .got + 0xFFFC] for GOT-relative relocations. // To reduce the risk of relocation overflow, .toc contents are sorted so // that sections having smaller relocation offsets are at beginning of .toc - assert(sec->sectionCommands.size() == 1); - auto *isd = cast<InputSectionDescription>(sec->sectionCommands[0]); + assert(sec->commands.size() == 1); + auto *isd = cast<InputSectionDescription>(sec->commands[0]); llvm::stable_sort(isd->sections, [](const InputSection *a, const InputSection *b) -> bool { return a->file->ppc64SmallCodeModelTocRelocs && @@ -1515,8 +1456,8 @@ template <class ELFT> void Writer<ELFT>::sortInputSections() { // Build the order once since it is expensive. DenseMap<const InputSectionBase *, int> order = buildSectionOrder(); maybeShuffle(order); - for (BaseCommand *base : script->sectionCommands) - if (auto *sec = dyn_cast<OutputSection>(base)) + for (SectionCommand *cmd : script->sectionCommands) + if (auto *sec = dyn_cast<OutputSection>(cmd)) sortSection(sec, order); } @@ -1531,8 +1472,8 @@ template <class ELFT> void Writer<ELFT>::sortSections() { sortInputSections(); - for (BaseCommand *base : script->sectionCommands) { - auto *os = dyn_cast<OutputSection>(base); + for (SectionCommand *cmd : script->sectionCommands) { + auto *os = dyn_cast<OutputSection>(cmd); if (!os) continue; os->sortRank = getSectionRank(os); @@ -1547,7 +1488,9 @@ template <class ELFT> void Writer<ELFT>::sortSections() { if (!script->hasSectionsCommand) { // We know that all the OutputSections are contiguous in this case. - auto isSection = [](BaseCommand *base) { return isa<OutputSection>(base); }; + auto isSection = [](SectionCommand *cmd) { + return isa<OutputSection>(cmd); + }; std::stable_sort( llvm::find_if(script->sectionCommands, isSection), llvm::find_if(llvm::reverse(script->sectionCommands), isSection).base(), @@ -1602,8 +1545,8 @@ template <class ELFT> void Writer<ELFT>::sortSections() { auto i = script->sectionCommands.begin(); auto e = script->sectionCommands.end(); - auto nonScriptI = std::find_if(i, e, [](BaseCommand *base) { - if (auto *sec = dyn_cast<OutputSection>(base)) + auto nonScriptI = std::find_if(i, e, [](SectionCommand *cmd) { + if (auto *sec = dyn_cast<OutputSection>(cmd)) return sec->sectionIndex == UINT32_MAX; return false; }); @@ -1616,7 +1559,7 @@ template <class ELFT> void Writer<ELFT>::sortSections() { // the script with ". = 0xabcd" and the expectation is that every section is // after that. auto firstSectionOrDotAssignment = - std::find_if(i, e, [](BaseCommand *cmd) { return !shouldSkip(cmd); }); + std::find_if(i, e, [](SectionCommand *cmd) { return !shouldSkip(cmd); }); if (firstSectionOrDotAssignment != e && isa<SymbolAssignment>(**firstSectionOrDotAssignment)) ++firstSectionOrDotAssignment; @@ -1629,7 +1572,7 @@ template <class ELFT> void Writer<ELFT>::sortSections() { // As an optimization, find all sections with the same sort rank // and insert them with one rotate. unsigned rank = orphan->sortRank; - auto end = std::find_if(nonScriptI + 1, e, [=](BaseCommand *cmd) { + auto end = std::find_if(nonScriptI + 1, e, [=](SectionCommand *cmd) { return cast<OutputSection>(cmd)->sortRank != rank; }); std::rotate(pos, nonScriptI, end); @@ -1670,8 +1613,8 @@ template <class ELFT> void Writer<ELFT>::resolveShfLinkOrder() { // Sorting is performed separately. std::vector<InputSection **> scriptSections; std::vector<InputSection *> sections; - for (BaseCommand *base : sec->sectionCommands) { - auto *isd = dyn_cast<InputSectionDescription>(base); + for (SectionCommand *cmd : sec->commands) { + auto *isd = dyn_cast<InputSectionDescription>(cmd); if (!isd) continue; bool hasLinkOrder = false; @@ -1774,7 +1717,7 @@ template <class ELFT> void Writer<ELFT>::finalizeAddressDependentContent() { // If addrExpr is set, the address may not be a multiple of the alignment. // Warn because this is error-prone. - for (BaseCommand *cmd : script->sectionCommands) + for (SectionCommand *cmd : script->sectionCommands) if (auto *os = dyn_cast<OutputSection>(cmd)) if (os->addr % os->alignment != 0) warn("address (0x" + Twine::utohexstr(os->addr) + ") of section " + @@ -1892,36 +1835,30 @@ static void removeUnusedSyntheticSections() { }) .base(); - DenseSet<InputSectionDescription *> isdSet; - // Mark unused synthetic sections for deletion - auto end = std::stable_partition( - start, inputSections.end(), [&](InputSectionBase *s) { - SyntheticSection *ss = dyn_cast<SyntheticSection>(s); - OutputSection *os = ss->getParent(); - if (!os || ss->isNeeded()) - return true; - - // If we reach here, then ss is an unused synthetic section and we want - // to remove it from the corresponding input section description, and - // orphanSections. - for (BaseCommand *b : os->sectionCommands) - if (auto *isd = dyn_cast<InputSectionDescription>(b)) - isdSet.insert(isd); - - llvm::erase_if( - script->orphanSections, - [=](const InputSectionBase *isec) { return isec == ss; }); - - return false; + // Remove unused synthetic sections from inputSections; + DenseSet<InputSectionBase *> unused; + auto end = + std::remove_if(start, inputSections.end(), [&](InputSectionBase *s) { + auto *sec = cast<SyntheticSection>(s); + if (sec->getParent() && sec->isNeeded()) + return false; + unused.insert(sec); + return true; }); - - DenseSet<InputSectionBase *> unused(end, inputSections.end()); - for (auto *isd : isdSet) - llvm::erase_if(isd->sections, - [=](InputSection *isec) { return unused.count(isec); }); - - // Erase unused synthetic sections. inputSections.erase(end, inputSections.end()); + + // Remove unused synthetic sections from the corresponding input section + // description and orphanSections. + for (auto *sec : unused) + if (OutputSection *osec = cast<SyntheticSection>(sec)->getParent()) + for (SectionCommand *cmd : osec->commands) + if (auto *isd = dyn_cast<InputSectionDescription>(cmd)) + llvm::erase_if(isd->sections, [&](InputSection *isec) { + return unused.count(isec); + }); + llvm::erase_if(script->orphanSections, [&](const InputSectionBase *sec) { + return unused.count(sec); + }); } // Create output section objects and add them to OutputSections. @@ -1935,8 +1872,8 @@ template <class ELFT> void Writer<ELFT>::finalizeSections() { // addresses of each section by section name. Add such symbols. if (!config->relocatable) { addStartEndSymbols(); - for (BaseCommand *base : script->sectionCommands) - if (auto *sec = dyn_cast<OutputSection>(base)) + for (SectionCommand *cmd : script->sectionCommands) + if (auto *sec = dyn_cast<OutputSection>(cmd)) addStartStopSymbols(sec); } @@ -2087,11 +2024,14 @@ template <class ELFT> void Writer<ELFT>::finalizeSections() { sortSections(); - // Now that we have the final list, create a list of all the - // OutputSections for convenience. - for (BaseCommand *base : script->sectionCommands) - if (auto *sec = dyn_cast<OutputSection>(base)) - outputSections.push_back(sec); + // Create a list of OutputSections, assign sectionIndex, and populate + // in.shStrTab. + for (SectionCommand *cmd : script->sectionCommands) + if (auto *osec = dyn_cast<OutputSection>(cmd)) { + outputSections.push_back(osec); + osec->sectionIndex = outputSections.size(); + osec->shName = in.shStrTab->addString(osec->name); + } // Prefer command line supplied address over other constraints. for (OutputSection *sec : outputSections) { @@ -2113,12 +2053,7 @@ template <class ELFT> void Writer<ELFT>::finalizeSections() { // to 1 to make __ehdr_start defined. The section number is not // particularly relevant. Out::elfHeader->sectionIndex = 1; - - for (size_t i = 0, e = outputSections.size(); i != e; ++i) { - OutputSection *sec = outputSections[i]; - sec->sectionIndex = i + 1; - sec->shName = in.shStrTab->addString(sec->name); - } + Out::elfHeader->size = sizeof(typename ELFT::Ehdr); // Binary and relocatable output does not have PHDRS. // The headers have to be created before finalize as that can influence the @@ -2608,17 +2543,6 @@ static uint64_t computeFileOffset(OutputSection *os, uint64_t off) { return first->offset + os->addr - first->addr; } -// Set an in-file position to a given section and returns the end position of -// the section. -static uint64_t setFileOffset(OutputSection *os, uint64_t off) { - off = computeFileOffset(os, off); - os->offset = off; - - if (os->type == SHT_NOBITS) - return off; - return off + os->size; -} - template <class ELFT> void Writer<ELFT>::assignFileOffsetsBinary() { // Compute the minimum LMA of all non-empty non-NOBITS sections as minAddr. auto needsOffset = [](OutputSection &sec) { @@ -2646,9 +2570,8 @@ static std::string rangeToString(uint64_t addr, uint64_t len) { // Assign file offsets to output sections. template <class ELFT> void Writer<ELFT>::assignFileOffsets() { - uint64_t off = 0; - off = setFileOffset(Out::elfHeader, off); - off = setFileOffset(Out::programHeaders, off); + Out::programHeaders->offset = Out::elfHeader->size; + uint64_t off = Out::elfHeader->size + Out::programHeaders->size; PhdrEntry *lastRX = nullptr; for (Partition &part : partitions) @@ -2661,18 +2584,23 @@ template <class ELFT> void Writer<ELFT>::assignFileOffsets() { for (OutputSection *sec : outputSections) { if (!(sec->flags & SHF_ALLOC)) continue; - off = setFileOffset(sec, off); + off = computeFileOffset(sec, off); + sec->offset = off; + if (sec->type != SHT_NOBITS) + off += sec->size; // If this is a last section of the last executable segment and that // segment is the last loadable segment, align the offset of the // following section to avoid loading non-segments parts of the file. if (config->zSeparate != SeparateSegmentKind::None && lastRX && lastRX->lastSec == sec) - off = alignTo(off, config->commonPageSize); + off = alignTo(off, config->maxPageSize); } - for (OutputSection *sec : outputSections) - if (!(sec->flags & SHF_ALLOC)) - off = setFileOffset(sec, off); + for (OutputSection *osec : outputSections) + if (!(osec->flags & SHF_ALLOC)) { + osec->offset = alignTo(off, osec->alignment); + off = osec->offset + osec->size; + } sectionHeaderOff = alignTo(off, config->wordsize); fileSize = sectionHeaderOff + (outputSections.size() + 1) * sizeof(Elf_Shdr); @@ -2946,9 +2874,9 @@ template <class ELFT> void Writer<ELFT>::writeTrapInstr() { for (PhdrEntry *p : part.phdrs) if (p->p_type == PT_LOAD && (p->p_flags & PF_X)) fillTrap(Out::bufferStart + alignDown(p->firstSec->offset + p->p_filesz, - config->commonPageSize), + config->maxPageSize), Out::bufferStart + alignTo(p->firstSec->offset + p->p_filesz, - config->commonPageSize)); + config->maxPageSize)); // Round up the file size of the last segment to the page boundary iff it is // an executable segment to ensure that other tools don't accidentally @@ -2960,7 +2888,7 @@ template <class ELFT> void Writer<ELFT>::writeTrapInstr() { if (last && (last->p_flags & PF_X)) last->p_memsz = last->p_filesz = - alignTo(last->p_filesz, config->commonPageSize); + alignTo(last->p_filesz, config->maxPageSize); } } diff --git a/lld/ELF/Writer.h b/lld/ELF/Writer.h index 3698544d977b..9c4a5b98451d 100644 --- a/lld/ELF/Writer.h +++ b/lld/ELF/Writer.h @@ -51,7 +51,6 @@ struct PhdrEntry { }; void addReservedSymbols(); -llvm::StringRef getOutputSectionName(const InputSectionBase *s); template <class ELFT> uint32_t calcMipsEFlags(); diff --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp index 96167b72a724..d42085737dbb 100644 --- a/lld/MachO/InputSection.cpp +++ b/lld/MachO/InputSection.cpp @@ -26,8 +26,11 @@ using namespace llvm::support; using namespace lld; using namespace lld::macho; -// Verify ConcatInputSection's size on 64-bit builds. -static_assert(sizeof(void *) != 8 || sizeof(ConcatInputSection) == 120, +// Verify ConcatInputSection's size on 64-bit builds. The size of std::vector +// can differ based on STL debug levels (e.g. iterator debugging on MSVC's STL), +// so account for that. +static_assert(sizeof(void *) != 8 || + sizeof(ConcatInputSection) == sizeof(std::vector<Reloc>) + 96, "Try to minimize ConcatInputSection's size, we create many " "instances of it"); diff --git a/lld/MachO/Symbols.h b/lld/MachO/Symbols.h index cc6f51cc5fd3..d1182a0a2d32 100644 --- a/lld/MachO/Symbols.h +++ b/lld/MachO/Symbols.h @@ -236,7 +236,12 @@ public: uint64_t getVA() const override; bool isWeakDef() const override { return weakDef; } - bool isWeakRef() const override { return refState == RefState::Weak; } + + // Symbols from weak libraries/frameworks are also weakly-referenced. + bool isWeakRef() const override { + return refState == RefState::Weak || + (file && getFile()->umbrella->forceWeakImport); + } bool isReferenced() const { return refState != RefState::Unreferenced; } bool isTlv() const override { return tlv; } bool isDynamicLookup() const { return file == nullptr; } diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp index 2527389990fa..99a15666c8fa 100644 --- a/lld/MachO/SyntheticSections.cpp +++ b/lld/MachO/SyntheticSections.cpp @@ -796,16 +796,18 @@ FunctionStartsSection::FunctionStartsSection() void FunctionStartsSection::finalizeContents() { raw_svector_ostream os{contents}; std::vector<uint64_t> addrs; - for (const Symbol *sym : symtab->getSymbols()) { - if (const auto *defined = dyn_cast<Defined>(sym)) { - if (!defined->isec || !isCodeSection(defined->isec) || !defined->isLive()) - continue; - if (const auto *concatIsec = dyn_cast<ConcatInputSection>(defined->isec)) - if (concatIsec->shouldOmitFromOutput()) - continue; - // TODO: Add support for thumbs, in that case - // the lowest bit of nextAddr needs to be set to 1. - addrs.push_back(defined->getVA()); + for (const InputFile *file : inputFiles) { + if (auto *objFile = dyn_cast<ObjFile>(file)) { + for (const Symbol *sym : objFile->symbols) { + if (const auto *defined = dyn_cast_or_null<Defined>(sym)) { + if (!defined->isec || !isCodeSection(defined->isec) || + !defined->isLive()) + continue; + // TODO: Add support for thumbs, in that case + // the lowest bit of nextAddr needs to be set to 1. + addrs.push_back(defined->getVA()); + } + } } } llvm::sort(addrs); diff --git a/lld/MachO/UnwindInfoSection.cpp b/lld/MachO/UnwindInfoSection.cpp index 690098c7a3b7..d28c7a33ff36 100644 --- a/lld/MachO/UnwindInfoSection.cpp +++ b/lld/MachO/UnwindInfoSection.cpp @@ -225,8 +225,9 @@ void UnwindInfoSectionImpl<Ptr>::prepareRelocations(ConcatInputSection *isec) { // // (See discussions/alternatives already considered on D107533) if (!defined->isExternal()) - if (const Symbol *sym = symtab->find(defined->getName())) - r.referent = s = const_cast<Symbol *>(sym); + if (Symbol *sym = symtab->find(defined->getName())) + if (sym->kind() != Symbol::LazyKind) + r.referent = s = sym; } if (auto *undefined = dyn_cast<Undefined>(s)) { treatUndefinedSymbol(*undefined); diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst index 1d53177200c3..a2456fc46689 100644 --- a/lld/docs/ReleaseNotes.rst +++ b/lld/docs/ReleaseNotes.rst @@ -31,11 +31,16 @@ ELF Improvements * ``e_entry`` no longer falls back to the address of ``.text`` if the entry symbol does not exist. Instead, a value of 0 will be written. (`D110014 <https://reviews.llvm.org/D110014>`_) +* If ``-Map`` is specified, ``--cref`` will be printted to the specified file. + (`D114663 <https://reviews.llvm.org/D114663>`_) Architecture specific changes: * The x86-32 port now supports TLSDESC (``-mtls-dialect=gnu2``). (`D112582 <https://reviews.llvm.org/D112582>`_) +* The x86-64 port now handles non-RAX/non-adjacent ``R_X86_64_GOTPC32_TLSDESC`` + and ``R_X86_64_TLSDESC_CALL`` (``-mtls-dialect=gnu2``). + (`D114416 <https://reviews.llvm.org/D114416>`_) * For x86-64, ``--no-relax`` now suppresses ``R_X86_64_GOTPCRELX`` and ``R_X86_64_REX_GOTPCRELX`` GOT optimization (`D113615 <https://reviews.llvm.org/D113615>`_) diff --git a/lld/docs/ld.lld.1 b/lld/docs/ld.lld.1 index 843f4a1cc282..0422231d78b5 100644 --- a/lld/docs/ld.lld.1 +++ b/lld/docs/ld.lld.1 @@ -141,7 +141,9 @@ you can specify .Fl O2 to set the compression level to 6. .It Fl -cref -Output cross reference table. +Output cross reference table. If +.Fl Map +is specified, print to the map file. .It Fl -define-common , Fl d Assign space to common symbols. .It Fl -defsym Ns = Ns Ar symbol Ns = Ns Ar expression @@ -454,6 +456,20 @@ is specified, use SHT_ANDROID_RELR instead of SHT_RELR. Always generate position independent thunks. .It Fl -pie , Fl -pic-executable Create a position independent executable. +.It Fl -power10-stubs Ns = Ns Cm mode +Whether to use Power10 instructions in call stubs for R_PPC64_REL24_NOTOC and TOC/NOTOC interworking. +.Ar mode +may be: +.Pp +.Bl -tag -width 2n -compact +.It Cm yes +(default) Use. +.It Cm auto +Currently the same as yes. +.It Cm no +Don't use. +.El + .It Fl -print-gc-sections List removed unused sections. .It Fl -print-icf-sections diff --git a/lldb/bindings/interface/SBDebugger.i b/lldb/bindings/interface/SBDebugger.i index aae72dd51394..f21e60d62873 100644 --- a/lldb/bindings/interface/SBDebugger.i +++ b/lldb/bindings/interface/SBDebugger.i @@ -207,6 +207,9 @@ public: } SBError + SetInputString (const char* data); + + SBError SetInputFile (SBFile file); SBError diff --git a/lldb/bindings/interface/SBTarget.i b/lldb/bindings/interface/SBTarget.i index 3f9e4cdc6d67..b98aa70849be 100644 --- a/lldb/bindings/interface/SBTarget.i +++ b/lldb/bindings/interface/SBTarget.i @@ -412,6 +412,9 @@ public: uint32_t GetCodeByteSize (); + uint32_t + GetMaximumNumberOfChildrenToDisplay() const; + lldb::SBError SetSectionLoadAddress (lldb::SBSection section, lldb::addr_t section_base_addr); diff --git a/lldb/bindings/interface/SBValue.i b/lldb/bindings/interface/SBValue.i index dd012e667a20..bc66a4ae28f8 100644 --- a/lldb/bindings/interface/SBValue.i +++ b/lldb/bindings/interface/SBValue.i @@ -410,6 +410,9 @@ public: bool SetData (lldb::SBData &data, lldb::SBError& error); + lldb::SBValue + Clone(const char *new_name); + lldb::addr_t GetLoadAddress(); diff --git a/lldb/bindings/python/python-swigsafecast.swig b/lldb/bindings/python/python-swigsafecast.swig index aa2bcfb8c8ae..fdd3b4e62c10 100644 --- a/lldb/bindings/python/python-swigsafecast.swig +++ b/lldb/bindings/python/python-swigsafecast.swig @@ -1,23 +1,14 @@ +namespace lldb_private { +namespace python { + PyObject *SBTypeToSWIGWrapper(lldb::SBEvent &event_sb) { return SWIG_NewPointerObj(&event_sb, SWIGTYPE_p_lldb__SBEvent, 0); } -PyObject *SBTypeToSWIGWrapper(lldb::SBProcess &process_sb) { - return SWIG_NewPointerObj(&process_sb, SWIGTYPE_p_lldb__SBProcess, 0); -} - PyObject *SBTypeToSWIGWrapper(lldb::SBThread &thread_sb) { return SWIG_NewPointerObj(&thread_sb, SWIGTYPE_p_lldb__SBThread, 0); } -PyObject *SBTypeToSWIGWrapper(lldb::SBThreadPlan &thread_plan_sb) { - return SWIG_NewPointerObj(&thread_plan_sb, SWIGTYPE_p_lldb__SBThreadPlan, 0); -} - -PyObject *SBTypeToSWIGWrapper(lldb::SBTarget &target_sb) { - return SWIG_NewPointerObj(&target_sb, SWIGTYPE_p_lldb__SBTarget, 0); -} - PyObject *SBTypeToSWIGWrapper(lldb::SBFrame &frame_sb) { return SWIG_NewPointerObj(&frame_sb, SWIGTYPE_p_lldb__SBFrame, 0); } @@ -26,10 +17,6 @@ PyObject *SBTypeToSWIGWrapper(lldb::SBDebugger &debugger_sb) { return SWIG_NewPointerObj(&debugger_sb, SWIGTYPE_p_lldb__SBDebugger, 0); } -PyObject *SBTypeToSWIGWrapper(lldb::SBBreakpoint &breakpoint_sb) { - return SWIG_NewPointerObj(&breakpoint_sb, SWIGTYPE_p_lldb__SBBreakpoint, 0); -} - PyObject *SBTypeToSWIGWrapper(lldb::SBWatchpoint &watchpoint_sb) { return SWIG_NewPointerObj(&watchpoint_sb, SWIGTYPE_p_lldb__SBWatchpoint, 0); } @@ -40,10 +27,6 @@ SBTypeToSWIGWrapper(lldb::SBBreakpointLocation &breakpoint_location_sb) { SWIGTYPE_p_lldb__SBBreakpointLocation, 0); } -PyObject *SBTypeToSWIGWrapper(lldb::SBValue &value_sb) { - return SWIG_NewPointerObj(&value_sb, SWIGTYPE_p_lldb__SBValue, 0); -} - PyObject *SBTypeToSWIGWrapper(lldb::SBCommandReturnObject &cmd_ret_obj_sb) { return SWIG_NewPointerObj(&cmd_ret_obj_sb, SWIGTYPE_p_lldb__SBCommandReturnObject, 0); @@ -70,3 +53,38 @@ PyObject *SBTypeToSWIGWrapper(lldb::SBSymbolContext &sym_ctx_sb) { PyObject *SBTypeToSWIGWrapper(lldb::SBStream &stream_sb) { return SWIG_NewPointerObj(&stream_sb, SWIGTYPE_p_lldb__SBStream, 0); } + +PythonObject ToSWIGHelper(void *obj, swig_type_info *info) { + return {PyRefType::Owned, SWIG_NewPointerObj(obj, info, SWIG_POINTER_OWN)}; +} + +PythonObject ToSWIGWrapper(std::unique_ptr<lldb::SBValue> value_sb) { + return ToSWIGHelper(value_sb.release(), SWIGTYPE_p_lldb__SBValue); +} + +PythonObject ToSWIGWrapper(lldb::ValueObjectSP value_sp) { + return ToSWIGWrapper(std::make_unique<lldb::SBValue>(std::move(value_sp))); +} + +PythonObject ToSWIGWrapper(lldb::TargetSP target_sp) { + return ToSWIGHelper(new lldb::SBTarget(std::move(target_sp)), + SWIGTYPE_p_lldb__SBTarget); +} + +PythonObject ToSWIGWrapper(lldb::ProcessSP process_sp) { + return ToSWIGHelper(new lldb::SBProcess(std::move(process_sp)), + SWIGTYPE_p_lldb__SBProcess); +} + +PythonObject ToSWIGWrapper(lldb::ThreadPlanSP thread_plan_sp) { + return ToSWIGHelper(new lldb::SBThreadPlan(std::move(thread_plan_sp)), + SWIGTYPE_p_lldb__SBThreadPlan); +} + +PythonObject ToSWIGWrapper(lldb::BreakpointSP breakpoint_sp) { + return ToSWIGHelper(new lldb::SBBreakpoint(std::move(breakpoint_sp)), + SWIGTYPE_p_lldb__SBBreakpoint); +} + +} // namespace python +} // namespace lldb_private diff --git a/lldb/bindings/python/python-wrapper.swig b/lldb/bindings/python/python-wrapper.swig index 6dc8ca170390..079f8d12dafa 100644 --- a/lldb/bindings/python/python-wrapper.swig +++ b/lldb/bindings/python/python-wrapper.swig @@ -22,32 +22,8 @@ private: bool m_print; }; -%} - -%wrapper %{ - -// resolve a dotted Python name in the form -// foo.bar.baz.Foobar to an actual Python object -// if pmodule is NULL, the __main__ module will be used -// as the starting point for the search - - -// This function is called by lldb_private::ScriptInterpreterPython::BreakpointCallbackFunction(...) -// and is used when a script command is attached to a breakpoint for execution. - -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wreturn-type-c-linkage" - -// Disable warning C4190: 'LLDBSwigPythonBreakpointCallbackFunction' has -// C-linkage specified, but returns UDT 'llvm::Expected<bool>' which is -// incompatible with C -#if _MSC_VER -#pragma warning (push) -#pragma warning (disable : 4190) -#endif - -SWIGEXPORT llvm::Expected<bool> -LLDBSwigPythonBreakpointCallbackFunction +llvm::Expected<bool> +lldb_private::LLDBSwigPythonBreakpointCallbackFunction ( const char *python_function_name, const char *session_dictionary_name, @@ -93,17 +69,20 @@ LLDBSwigPythonBreakpointCallbackFunction return result.get().get() != Py_False; } -#if _MSC_VER -#pragma warning (pop) -#endif +// resolve a dotted Python name in the form +// foo.bar.baz.Foobar to an actual Python object +// if pmodule is NULL, the __main__ module will be used +// as the starting point for the search -#pragma clang diagnostic pop + +// This function is called by lldb_private::ScriptInterpreterPython::BreakpointCallbackFunction(...) +// and is used when a script command is attached to a breakpoint for execution. // This function is called by lldb_private::ScriptInterpreterPython::WatchpointCallbackFunction(...) // and is used when a script command is attached to a watchpoint for execution. -SWIGEXPORT bool -LLDBSwigPythonWatchpointCallbackFunction +bool +lldb_private::LLDBSwigPythonWatchpointCallbackFunction ( const char *python_function_name, const char *session_dictionary_name, @@ -134,8 +113,8 @@ LLDBSwigPythonWatchpointCallbackFunction return stop_at_watchpoint; } -SWIGEXPORT bool -LLDBSwigPythonCallTypeScript +bool +lldb_private::LLDBSwigPythonCallTypeScript ( const char *python_function_name, const void *session_dictionary, @@ -145,7 +124,6 @@ LLDBSwigPythonCallTypeScript std::string& retval ) { - lldb::SBValue sb_value (valobj_sp); lldb::SBTypeSummaryOptions sb_options(options_sp.get()); retval.clear(); @@ -195,7 +173,7 @@ LLDBSwigPythonCallTypeScript return false; } - PythonObject value_arg(PyRefType::Owned, SBTypeToSWIGWrapper(sb_value)); + PythonObject value_arg = ToSWIGWrapper(valobj_sp); PythonObject options_arg(PyRefType::Owned, SBTypeToSWIGWrapper(sb_options)); if (argc.get().max_positional_args < 3) @@ -208,8 +186,8 @@ LLDBSwigPythonCallTypeScript return true; } -SWIGEXPORT void* -LLDBSwigPythonCreateSyntheticProvider +void* +lldb_private::LLDBSwigPythonCreateSyntheticProvider ( const char *python_class_name, const char *session_dictionary_name, @@ -227,11 +205,10 @@ LLDBSwigPythonCreateSyntheticProvider if (!pfunc.IsAllocated()) Py_RETURN_NONE; - // FIXME: SBValue leaked here - lldb::SBValue *sb_value = new lldb::SBValue(valobj_sp); + auto sb_value = std::make_unique<lldb::SBValue>(valobj_sp); sb_value->SetPreferSyntheticValue(false); - PythonObject val_arg(PyRefType::Owned, SBTypeToSWIGWrapper(*sb_value)); + PythonObject val_arg = ToSWIGWrapper(std::move(sb_value)); if (!val_arg.IsAllocated()) Py_RETURN_NONE; @@ -243,8 +220,8 @@ LLDBSwigPythonCreateSyntheticProvider Py_RETURN_NONE; } -SWIGEXPORT void* -LLDBSwigPythonCreateCommandObject +void* +lldb_private::LLDBSwigPythonCreateCommandObject ( const char *python_class_name, const char *session_dictionary_name, @@ -271,8 +248,8 @@ LLDBSwigPythonCreateCommandObject Py_RETURN_NONE; } -SWIGEXPORT void* -LLDBSwigPythonCreateScriptedProcess +void* +lldb_private::LLDBSwigPythonCreateScriptedProcess ( const char *python_class_name, const char *session_dictionary_name, @@ -295,12 +272,7 @@ LLDBSwigPythonCreateScriptedProcess return nullptr; } - // FIXME: SBTarget leaked here - PythonObject target_arg( - PyRefType::Owned, SBTypeToSWIGWrapper(*new lldb::SBTarget(target_sp))); - - if (!target_arg.IsAllocated()) - Py_RETURN_NONE; + PythonObject target_arg = ToSWIGWrapper(target_sp); llvm::Expected<PythonCallable::ArgInfo> arg_info = pfunc.GetArgInfo(); if (!arg_info) { @@ -330,8 +302,8 @@ LLDBSwigPythonCreateScriptedProcess Py_RETURN_NONE; } -SWIGEXPORT void* -LLDBSwigPythonCreateScriptedThread +void* +lldb_private::LLDBSwigPythonCreateScriptedThread ( const char *python_class_name, const char *session_dictionary_name, @@ -354,14 +326,6 @@ LLDBSwigPythonCreateScriptedThread return nullptr; } - // FIXME: This leaks the SBProcess object - PythonObject process_arg( - PyRefType::Owned, - SBTypeToSWIGWrapper(*new lldb::SBProcess(process_sp))); - - if (!process_arg.IsAllocated()) - Py_RETURN_NONE; - llvm::Expected<PythonCallable::ArgInfo> arg_info = pfunc.GetArgInfo(); if (!arg_info) { llvm::handleAllErrors( @@ -379,7 +343,7 @@ LLDBSwigPythonCreateScriptedThread if (arg_info.get().max_positional_args == 2) { // FIXME: SBStructuredData leaked here PythonObject args_arg(PyRefType::Owned, SBTypeToSWIGWrapper(*new lldb::SBStructuredData(args_impl))); - result = pfunc(process_arg, args_arg); + result = pfunc(ToSWIGWrapper(process_sp), args_arg); } else { error_string.assign("wrong number of arguments in __init__, should be 2 (not including self)"); Py_RETURN_NONE; @@ -390,8 +354,8 @@ LLDBSwigPythonCreateScriptedThread Py_RETURN_NONE; } -SWIGEXPORT void* -LLDBSwigPythonCreateScriptedThreadPlan +void* +lldb_private::LLDBSwigPythonCreateScriptedThreadPlan ( const char *python_class_name, const char *session_dictionary_name, @@ -415,13 +379,7 @@ LLDBSwigPythonCreateScriptedThreadPlan return nullptr; } - // FIXME: SBThreadPlan leaked here - PythonObject tp_arg( - PyRefType::Owned, - SBTypeToSWIGWrapper(*new lldb::SBThreadPlan(thread_plan_sp))); - - if (!tp_arg.IsAllocated()) - Py_RETURN_NONE; + PythonObject tp_arg = ToSWIGWrapper(thread_plan_sp); llvm::Expected<PythonCallable::ArgInfo> arg_info = pfunc.GetArgInfo(); if (!arg_info) { @@ -460,8 +418,8 @@ LLDBSwigPythonCreateScriptedThreadPlan Py_RETURN_NONE; } -SWIGEXPORT bool -LLDBSWIGPythonCallThreadPlan +bool +lldb_private::LLDBSWIGPythonCallThreadPlan ( void *implementor, const char *method_name, @@ -507,15 +465,11 @@ LLDBSWIGPythonCallThreadPlan return false; } -SWIGEXPORT void * -LLDBSwigPythonCreateScriptedBreakpointResolver -( - const char *python_class_name, - const char *session_dictionary_name, +void *lldb_private::LLDBSwigPythonCreateScriptedBreakpointResolver( + const char *python_class_name, const char *session_dictionary_name, lldb_private::StructuredDataImpl *args_impl, - lldb::BreakpointSP &breakpoint_sp -) -{ + const lldb::BreakpointSP &breakpoint_sp) { + if (python_class_name == NULL || python_class_name[0] == '\0' || !session_dictionary_name) Py_RETURN_NONE; @@ -527,16 +481,11 @@ LLDBSwigPythonCreateScriptedBreakpointResolver if (!pfunc.IsAllocated()) return nullptr; - // FIXME: SBBreakpoint leaked here - lldb::SBBreakpoint *bkpt_value = new lldb::SBBreakpoint(breakpoint_sp); - - PythonObject bkpt_arg(PyRefType::Owned, SBTypeToSWIGWrapper(*bkpt_value)); - // FIXME: SBStructuredData leaked here lldb::SBStructuredData *args_value = new lldb::SBStructuredData(args_impl); PythonObject args_arg(PyRefType::Owned, SBTypeToSWIGWrapper(*args_value)); - PythonObject result = pfunc(bkpt_arg, args_arg, dict); + PythonObject result = pfunc(ToSWIGWrapper(breakpoint_sp), args_arg, dict); // FIXME: At this point we should check that the class we found supports all the methods // that we need. @@ -552,8 +501,8 @@ LLDBSwigPythonCreateScriptedBreakpointResolver Py_RETURN_NONE; } -SWIGEXPORT unsigned int -LLDBSwigPythonCallBreakpointResolver +unsigned int +lldb_private::LLDBSwigPythonCallBreakpointResolver ( void *implementor, const char *method_name, @@ -603,8 +552,8 @@ LLDBSwigPythonCallBreakpointResolver return ret_val; } -SWIGEXPORT void * -LLDBSwigPythonCreateScriptedStopHook +void * +lldb_private::LLDBSwigPythonCreateScriptedStopHook ( lldb::TargetSP target_sp, const char *python_class_name, @@ -637,16 +586,11 @@ LLDBSwigPythonCreateScriptedStopHook return nullptr; } - // FIXME: SBTarget leaked here - lldb::SBTarget *target_val - = new lldb::SBTarget(target_sp); - PythonObject target_arg(PyRefType::Owned, SBTypeToSWIGWrapper(*target_val)); - // FIXME: SBStructuredData leaked here lldb::SBStructuredData *args_value = new lldb::SBStructuredData(args_impl); PythonObject args_arg(PyRefType::Owned, SBTypeToSWIGWrapper(*args_value)); - PythonObject result = pfunc(target_arg, args_arg, dict); + PythonObject result = pfunc(ToSWIGWrapper(target_sp), args_arg, dict); if (result.IsAllocated()) { @@ -679,8 +623,8 @@ LLDBSwigPythonCreateScriptedStopHook Py_RETURN_NONE; } -SWIGEXPORT bool -LLDBSwigPythonStopHookCallHandleStop +bool +lldb_private::LLDBSwigPythonStopHookCallHandleStop ( void *implementor, lldb::ExecutionContextRefSP exc_ctx_sp, @@ -755,8 +699,8 @@ LLDBSwigPython_CallOptionalMember return result.release(); } -SWIGEXPORT size_t -LLDBSwigPython_CalculateNumChildren +size_t +lldb_private::LLDBSwigPython_CalculateNumChildren ( PyObject *implementor, uint32_t max @@ -793,8 +737,8 @@ LLDBSwigPython_CalculateNumChildren return ret_val; } -SWIGEXPORT PyObject* -LLDBSwigPython_GetChildAtIndex +PyObject* +lldb_private::LLDBSwigPython_GetChildAtIndex ( PyObject *implementor, uint32_t idx @@ -823,8 +767,8 @@ LLDBSwigPython_GetChildAtIndex return result.release(); } -SWIGEXPORT int -LLDBSwigPython_GetIndexOfChildWithName +int +lldb_private::LLDBSwigPython_GetIndexOfChildWithName ( PyObject *implementor, const char* child_name @@ -853,8 +797,8 @@ LLDBSwigPython_GetIndexOfChildWithName return UINT32_MAX; } -SWIGEXPORT bool -LLDBSwigPython_UpdateSynthProviderInstance +bool +lldb_private::LLDBSwigPython_UpdateSynthProviderInstance ( PyObject *implementor ) @@ -873,8 +817,8 @@ LLDBSwigPython_UpdateSynthProviderInstance return ret_val; } -SWIGEXPORT bool -LLDBSwigPython_MightHaveChildrenSynthProviderInstance +bool +lldb_private::LLDBSwigPython_MightHaveChildrenSynthProviderInstance ( PyObject *implementor ) @@ -893,8 +837,8 @@ LLDBSwigPython_MightHaveChildrenSynthProviderInstance return ret_val; } -SWIGEXPORT PyObject* -LLDBSwigPython_GetValueSynthProviderInstance +PyObject* +lldb_private::LLDBSwigPython_GetValueSynthProviderInstance ( PyObject *implementor ) @@ -921,8 +865,8 @@ LLDBSwigPython_GetValueSynthProviderInstance return ret_val; } -SWIGEXPORT void* -LLDBSWIGPython_CastPyObjectToSBData +void* +lldb_private::LLDBSWIGPython_CastPyObjectToSBData ( PyObject* data ) @@ -938,8 +882,8 @@ LLDBSWIGPython_CastPyObjectToSBData } -SWIGEXPORT void* -LLDBSWIGPython_CastPyObjectToSBError +void* +lldb_private::LLDBSWIGPython_CastPyObjectToSBError ( PyObject* data ) @@ -955,8 +899,8 @@ LLDBSWIGPython_CastPyObjectToSBError } -SWIGEXPORT void* -LLDBSWIGPython_CastPyObjectToSBValue +void* +lldb_private::LLDBSWIGPython_CastPyObjectToSBValue ( PyObject* data ) @@ -971,8 +915,8 @@ LLDBSWIGPython_CastPyObjectToSBValue return sb_ptr; } -SWIGEXPORT void* -LLDBSWIGPython_CastPyObjectToSBMemoryRegionInfo +void* +lldb_private::LLDBSWIGPython_CastPyObjectToSBMemoryRegionInfo ( PyObject* data ) @@ -987,8 +931,8 @@ LLDBSWIGPython_CastPyObjectToSBMemoryRegionInfo return sb_ptr; } -SWIGEXPORT bool -LLDBSwigPythonCallCommand +bool +lldb_private::LLDBSwigPythonCallCommand ( const char *python_function_name, const char *session_dictionary_name, @@ -1026,8 +970,8 @@ LLDBSwigPythonCallCommand return true; } -SWIGEXPORT bool -LLDBSwigPythonCallCommandObject +bool +lldb_private::LLDBSwigPythonCallCommandObject ( PyObject *implementor, lldb::DebuggerSP& debugger, @@ -1057,8 +1001,8 @@ LLDBSwigPythonCallCommandObject return true; } -SWIGEXPORT void* -LLDBSWIGPythonCreateOSPlugin +void* +lldb_private::LLDBSWIGPythonCreateOSPlugin ( const char *python_class_name, const char *session_dictionary_name, @@ -1076,13 +1020,7 @@ LLDBSWIGPythonCreateOSPlugin if (!pfunc.IsAllocated()) Py_RETURN_NONE; - // FIXME: This leaks the SBProcess object - lldb::SBProcess *process_sb = new lldb::SBProcess(process_sp); - PythonObject process_arg(PyRefType::Owned, SBTypeToSWIGWrapper(*process_sb)); - if (!process_arg.IsAllocated()) - Py_RETURN_NONE; - - auto result = pfunc(process_arg); + auto result = pfunc(ToSWIGWrapper(process_sp)); if (result.IsAllocated()) return result.release(); @@ -1090,8 +1028,8 @@ LLDBSWIGPythonCreateOSPlugin Py_RETURN_NONE; } -SWIGEXPORT void* -LLDBSWIGPython_CreateFrameRecognizer +void* +lldb_private::LLDBSWIGPython_CreateFrameRecognizer ( const char *python_class_name, const char *session_dictionary_name @@ -1116,8 +1054,8 @@ LLDBSWIGPython_CreateFrameRecognizer Py_RETURN_NONE; } -SWIGEXPORT PyObject* -LLDBSwigPython_GetRecognizedArguments +PyObject* +lldb_private::LLDBSwigPython_GetRecognizedArguments ( PyObject *implementor, const lldb::StackFrameSP& frame_sp @@ -1134,8 +1072,8 @@ LLDBSwigPython_GetRecognizedArguments return result; } -SWIGEXPORT void* -LLDBSWIGPython_GetDynamicSetting (void* module, const char* setting, const lldb::TargetSP& target_sp) +void* +lldb_private::LLDBSWIGPython_GetDynamicSetting (void* module, const char* setting, const lldb::TargetSP& target_sp) { if (!module || !setting) Py_RETURN_NONE; @@ -1147,21 +1085,15 @@ LLDBSWIGPython_GetDynamicSetting (void* module, const char* setting, const lldb: if (!pfunc.IsAllocated()) Py_RETURN_NONE; - lldb::SBTarget target_sb(target_sp); - PythonObject target_arg(PyRefType::Owned, SBTypeToSWIGWrapper(target_sb)); - auto result = pfunc(target_arg, PythonString(setting)); + auto result = pfunc(ToSWIGWrapper(target_sp), PythonString(setting)); return result.release(); } -SWIGEXPORT bool -LLDBSWIGPythonRunScriptKeywordProcess -(const char* python_function_name, -const char* session_dictionary_name, -lldb::ProcessSP& process, -std::string& output) +bool lldb_private::LLDBSWIGPythonRunScriptKeywordProcess( + const char *python_function_name, const char *session_dictionary_name, + const lldb::ProcessSP &process, std::string &output) { -{ if (python_function_name == NULL || python_function_name[0] == '\0' || !session_dictionary_name) return false; @@ -1173,17 +1105,15 @@ std::string& output) if (!pfunc.IsAllocated()) return false; - lldb::SBProcess process_sb(process); - PythonObject process_arg(PyRefType::Owned, SBTypeToSWIGWrapper(process_sb)); - auto result = pfunc(process_arg, dict); + auto result = pfunc(ToSWIGWrapper(process), dict); output = result.Str().GetString().str(); return true; } -SWIGEXPORT bool -LLDBSWIGPythonRunScriptKeywordThread +bool +lldb_private::LLDBSWIGPythonRunScriptKeywordThread (const char* python_function_name, const char* session_dictionary_name, lldb::ThreadSP& thread, @@ -1210,14 +1140,10 @@ std::string& output) return true; } -SWIGEXPORT bool -LLDBSWIGPythonRunScriptKeywordTarget -(const char* python_function_name, -const char* session_dictionary_name, -lldb::TargetSP& target, -std::string& output) +bool lldb_private::LLDBSWIGPythonRunScriptKeywordTarget( + const char *python_function_name, const char *session_dictionary_name, + const lldb::TargetSP &target, std::string &output) { -{ if (python_function_name == NULL || python_function_name[0] == '\0' || !session_dictionary_name) return false; @@ -1229,17 +1155,15 @@ std::string& output) if (!pfunc.IsAllocated()) return false; - lldb::SBTarget target_sb(target); - PythonObject target_arg(PyRefType::Owned, SBTypeToSWIGWrapper(target_sb)); - auto result = pfunc(target_arg, dict); + auto result = pfunc(ToSWIGWrapper(target), dict); output = result.Str().GetString().str(); return true; } -SWIGEXPORT bool -LLDBSWIGPythonRunScriptKeywordFrame +bool +lldb_private::LLDBSWIGPythonRunScriptKeywordFrame (const char* python_function_name, const char* session_dictionary_name, lldb::StackFrameSP& frame, @@ -1266,14 +1190,10 @@ std::string& output) return true; } -SWIGEXPORT bool -LLDBSWIGPythonRunScriptKeywordValue -(const char* python_function_name, -const char* session_dictionary_name, -lldb::ValueObjectSP& value, -std::string& output) +bool lldb_private::LLDBSWIGPythonRunScriptKeywordValue( + const char *python_function_name, const char *session_dictionary_name, + const lldb::ValueObjectSP &value, std::string &output) { -{ if (python_function_name == NULL || python_function_name[0] == '\0' || !session_dictionary_name) return false; @@ -1285,17 +1205,15 @@ std::string& output) if (!pfunc.IsAllocated()) return false; - lldb::SBValue value_sb(value); - PythonObject value_arg(PyRefType::Owned, SBTypeToSWIGWrapper(value_sb)); - auto result = pfunc(value_arg, dict); + auto result = pfunc(ToSWIGWrapper(value), dict); output = result.Str().GetString().str(); return true; } -SWIGEXPORT bool -LLDBSwigPythonCallModuleInit +bool +lldb_private::LLDBSwigPythonCallModuleInit ( const char *python_module_name, const char *session_dictionary_name, @@ -1322,16 +1240,9 @@ LLDBSwigPythonCallModuleInit return true; } -%} - - -%runtime %{ -// Forward declaration to be inserted at the start of LLDBWrapPython.h -#include "lldb/API/SBDebugger.h" -#include "lldb/API/SBValue.h" -SWIGEXPORT lldb::ValueObjectSP -LLDBSWIGPython_GetValueObjectSPFromSBValue (void* data) +lldb::ValueObjectSP +lldb_private::LLDBSWIGPython_GetValueObjectSPFromSBValue (void* data) { lldb::ValueObjectSP valobj_sp; if (data) @@ -1342,22 +1253,8 @@ LLDBSWIGPython_GetValueObjectSPFromSBValue (void* data) return valobj_sp; } -#ifdef __cplusplus -extern "C" { -#endif - -void LLDBSwigPythonCallPythonLogOutputCallback(const char *str, void *baton); - -#ifdef __cplusplus -} -#endif -%} - -%wrapper %{ - - // For the LogOutputCallback functions -void LLDBSwigPythonCallPythonLogOutputCallback(const char *str, void *baton) { +static void LLDBSwigPythonCallPythonLogOutputCallback(const char *str, void *baton) { if (baton != Py_None) { SWIG_PYTHON_THREAD_BEGIN_BLOCK; PyObject *result = PyObject_CallFunction(reinterpret_cast<PyObject*>(baton), const_cast<char*>("s"), str); diff --git a/lldb/bindings/python/python.swig b/lldb/bindings/python/python.swig index 9dc4ab87a4bd..5dcbd68d8544 100644 --- a/lldb/bindings/python/python.swig +++ b/lldb/bindings/python/python.swig @@ -121,6 +121,7 @@ def lldb_iter(obj, getsize, getelem): %{ #include "../source/Plugins/ScriptInterpreter/Python/PythonDataObjects.h" +#include "../source/Plugins/ScriptInterpreter/Python/SWIGPythonBridge.h" #include "../bindings/python/python-swigsafecast.swig" using namespace lldb_private; using namespace lldb_private::python; diff --git a/lldb/include/lldb/API/SBDebugger.h b/lldb/include/lldb/API/SBDebugger.h index 64081f79205d..1c771330cddc 100644 --- a/lldb/include/lldb/API/SBDebugger.h +++ b/lldb/include/lldb/API/SBDebugger.h @@ -126,6 +126,8 @@ public: FILE *GetErrorFileHandle(); + SBError SetInputString(const char *data); + SBError SetInputFile(SBFile file); SBError SetOutputFile(SBFile file); diff --git a/lldb/include/lldb/API/SBTarget.h b/lldb/include/lldb/API/SBTarget.h index 5a6908f040b1..abd9ebf07407 100644 --- a/lldb/include/lldb/API/SBTarget.h +++ b/lldb/include/lldb/API/SBTarget.h @@ -336,6 +336,11 @@ public: /// unit from the Architecture's code bus uint32_t GetCodeByteSize(); + /// Gets the target.max-children-count value + /// It should be used to limit the number of + /// children of large data structures to be displayed. + uint32_t GetMaximumNumberOfChildrenToDisplay() const; + /// Set the base load address for a module section. /// /// \param[in] section diff --git a/lldb/include/lldb/API/SBValue.h b/lldb/include/lldb/API/SBValue.h index 69be02545b35..a8578abec6b7 100644 --- a/lldb/include/lldb/API/SBValue.h +++ b/lldb/include/lldb/API/SBValue.h @@ -246,6 +246,12 @@ public: bool SetData(lldb::SBData &data, lldb::SBError &error); + /// Creates a copy of the SBValue with a new name and setting the current + /// SBValue as its parent. It should be used when we want to change the + /// name of a SBValue without modifying the actual SBValue itself + /// (e.g. sythetic child provider). + lldb::SBValue Clone(const char *new_name); + lldb::SBDeclaration GetDeclaration(); /// Find out if a SBValue might have children. diff --git a/lldb/include/lldb/Core/Debugger.h b/lldb/include/lldb/Core/Debugger.h index f0849c9ac950..1ab21bec54c9 100644 --- a/lldb/include/lldb/Core/Debugger.h +++ b/lldb/include/lldb/Core/Debugger.h @@ -176,7 +176,13 @@ public: repro::DataRecorder *GetInputRecorder(); - void SetInputFile(lldb::FileSP file, repro::DataRecorder *recorder = nullptr); + Status SetInputString(const char *data); + + // This method will setup data recorder if reproducer enabled. + // On reply mode this method should take instructions from reproducer file. + Status SetInputFile(lldb::FileSP file); + + void SetInputFile(lldb::FileSP file, repro::DataRecorder *recorder); void SetOutputFile(lldb::FileSP file); diff --git a/lldb/include/lldb/Interpreter/OptionGroupFormat.h b/lldb/include/lldb/Interpreter/OptionGroupFormat.h index 2d445b8a6c20..551688b0d25f 100644 --- a/lldb/include/lldb/Interpreter/OptionGroupFormat.h +++ b/lldb/include/lldb/Interpreter/OptionGroupFormat.h @@ -16,6 +16,9 @@ namespace lldb_private { +typedef std::vector<std::tuple<lldb::CommandArgumentType, const char *>> + OptionGroupFormatUsageTextVector; + // OptionGroupFormat class OptionGroupFormat : public OptionGroup { @@ -30,7 +33,10 @@ public: uint64_t default_byte_size = UINT64_MAX, // Pass UINT64_MAX to disable the "--size" option uint64_t default_count = - UINT64_MAX); // Pass UINT64_MAX to disable the "--count" option + UINT64_MAX, // Pass UINT64_MAX to disable the "--count" option + OptionGroupFormatUsageTextVector usage_text_vector = {} + // Use to override default option usage text with the command specific one + ); ~OptionGroupFormat() override = default; @@ -73,6 +79,7 @@ protected: char m_prev_gdb_format; char m_prev_gdb_size; bool m_has_gdb_format; + OptionDefinition m_option_definitions[4]; }; } // namespace lldb_private diff --git a/lldb/include/lldb/Symbol/ObjectFile.h b/lldb/include/lldb/Symbol/ObjectFile.h index 4ccd7f92064d..0a8b38b2c642 100644 --- a/lldb/include/lldb/Symbol/ObjectFile.h +++ b/lldb/include/lldb/Symbol/ObjectFile.h @@ -19,6 +19,7 @@ #include "lldb/Utility/FileSpec.h" #include "lldb/Utility/UUID.h" #include "lldb/lldb-private.h" +#include "llvm/Support/Threading.h" #include "llvm/Support/VersionTuple.h" namespace lldb_private { @@ -322,12 +323,26 @@ public: /// Gets the symbol table for the currently selected architecture (and /// object for archives). /// - /// Symbol table parsing can be deferred by ObjectFile instances until this - /// accessor is called the first time. + /// This function will manage when ParseSymtab(...) is called to actually do + /// the symbol table parsing in each plug-in. This function will take care of + /// taking all the necessary locks and finalizing the symbol table when the + /// symbol table does get parsed. /// /// \return /// The symbol table for this object file. - virtual Symtab *GetSymtab() = 0; + Symtab *GetSymtab(); + + /// Parse the symbol table into the provides symbol table object. + /// + /// Symbol table parsing will be done once when this function is called by + /// each object file plugin. All of the necessary locks will already be + /// acquired before this function is called and the symbol table object to + /// populate is supplied as an argument and doesn't need to be created by + /// each plug-in. + /// + /// \param + /// The symbol table to populate. + virtual void ParseSymtab(Symtab &symtab) = 0; /// Perform relocations on the section if necessary. /// @@ -708,7 +723,12 @@ protected: const lldb::addr_t m_memory_addr; std::unique_ptr<lldb_private::SectionList> m_sections_up; std::unique_ptr<lldb_private::Symtab> m_symtab_up; - uint32_t m_synthetic_symbol_idx; + /// We need a llvm::once_flag that we can use to avoid locking the module + /// lock and deadlocking LLDB. See comments in ObjectFile::GetSymtab() for + /// the full details. We also need to be able to clear the symbol table, so we + /// need to use a std::unique_ptr to a llvm::once_flag so if we clear the + /// symbol table, we can have a new once flag to use when it is created again. + std::unique_ptr<llvm::once_flag> m_symtab_once_up; /// Sets the architecture for a module. At present the architecture can /// only be set if it is invalid. It is not allowed to switch from one diff --git a/lldb/include/lldb/Symbol/Symtab.h b/lldb/include/lldb/Symbol/Symtab.h index e1ad0dfd2eb8..e5d21c1bf4b3 100644 --- a/lldb/include/lldb/Symbol/Symtab.h +++ b/lldb/include/lldb/Symbol/Symtab.h @@ -119,20 +119,13 @@ public: lldb::addr_t file_addr, std::function<bool(Symbol *)> const &callback); void FindFunctionSymbols(ConstString name, uint32_t name_type_mask, SymbolContextList &sc_list); - void CalculateSymbolSizes(); void SortSymbolIndexesByValue(std::vector<uint32_t> &indexes, bool remove_duplicates) const; static void DumpSymbolHeader(Stream *s); - void Finalize() { - // Shrink to fit the symbols so we don't waste memory - if (m_symbols.capacity() > m_symbols.size()) { - collection new_symbols(m_symbols.begin(), m_symbols.end()); - m_symbols.swap(new_symbols); - } - } + void Finalize(); void AppendSymbolNamesToMap(const IndexCollection &indexes, bool add_demangled, bool add_mangled, diff --git a/lldb/include/lldb/Target/Platform.h b/lldb/include/lldb/Target/Platform.h index 956b29e45dba..26127359a322 100644 --- a/lldb/include/lldb/Target/Platform.h +++ b/lldb/include/lldb/Target/Platform.h @@ -310,25 +310,7 @@ public: /// Get the platform's supported architectures in the order in which they /// should be searched. - /// - /// \param[in] idx - /// A zero based architecture index - /// - /// \param[out] arch - /// A copy of the architecture at index if the return value is - /// \b true. - /// - /// \return - /// \b true if \a arch was filled in and is valid, \b false - /// otherwise. - virtual bool GetSupportedArchitectureAtIndex(uint32_t idx, - ArchSpec &arch); - - /// Get the platform's supported architectures in the order in which they - /// should be searched. - /// NB: This implementation is mutually recursive with - /// GetSupportedArchitectureAtIndex. Subclasses should implement one of them. - virtual std::vector<ArchSpec> GetSupportedArchitectures(); + virtual std::vector<ArchSpec> GetSupportedArchitectures() = 0; virtual size_t GetSoftwareBreakpointTrapOpcode(Target &target, BreakpointSite *bp_site); @@ -971,10 +953,6 @@ private: bool GetCachedSharedModule(const ModuleSpec &module_spec, lldb::ModuleSP &module_sp, bool *did_create_ptr); - Status LoadCachedExecutable(const ModuleSpec &module_spec, - lldb::ModuleSP &module_sp, - const FileSpecList *module_search_paths_ptr); - FileSpec GetModuleCacheRoot(); }; diff --git a/lldb/include/lldb/Target/Process.h b/lldb/include/lldb/Target/Process.h index 4627502abd25..e27cb8cbf2aa 100644 --- a/lldb/include/lldb/Target/Process.h +++ b/lldb/include/lldb/Target/Process.h @@ -1762,7 +1762,7 @@ public: /// /// If load_addr is within the address space the process has mapped /// range_info will be filled in with the start and end of that range as - /// well as the permissions for that range and range_info. GetMapped will + /// well as the permissions for that range and range_info.GetMapped will /// return true. /// /// If load_addr is outside any mapped region then range_info will have its @@ -1771,21 +1771,23 @@ public: /// there are no valid mapped ranges between load_addr and the end of the /// process address space. /// - /// GetMemoryRegionInfo calls DoGetMemoryRegionInfo. Override that function in - /// process subclasses. + /// GetMemoryRegionInfo will only return an error if it is unimplemented for + /// the current process. /// /// \param[in] load_addr - /// The load address to query the range_info for. May include non - /// address bits, these will be removed by the the ABI plugin if there is - /// one. + /// The load address to query the range_info for. /// /// \param[out] range_info /// An range_info value containing the details of the range. /// /// \return /// An error value. - Status GetMemoryRegionInfo(lldb::addr_t load_addr, - MemoryRegionInfo &range_info); + virtual Status GetMemoryRegionInfo(lldb::addr_t load_addr, + MemoryRegionInfo &range_info) { + Status error; + error.SetErrorString("Process::GetMemoryRegionInfo() not supported"); + return error; + } /// Obtain all the mapped memory regions within this process. /// @@ -2605,26 +2607,6 @@ protected: virtual size_t DoReadMemory(lldb::addr_t vm_addr, void *buf, size_t size, Status &error) = 0; - /// DoGetMemoryRegionInfo is called by GetMemoryRegionInfo after it has - /// removed non address bits from load_addr. Override this method in - /// subclasses of Process. - /// - /// See GetMemoryRegionInfo for details of the logic. - /// - /// \param[in] load_addr - /// The load address to query the range_info for. (non address bits - /// removed) - /// - /// \param[out] range_info - /// An range_info value containing the details of the range. - /// - /// \return - /// An error value. - virtual Status DoGetMemoryRegionInfo(lldb::addr_t load_addr, - MemoryRegionInfo &range_info) { - return Status("Process::DoGetMemoryRegionInfo() not supported"); - } - lldb::StateType GetPrivateState(); /// The "private" side of resuming a process. This doesn't alter the state diff --git a/lldb/source/API/SBDebugger.cpp b/lldb/source/API/SBDebugger.cpp index 4bb23c3e705c..844b91de4cd0 100644 --- a/lldb/source/API/SBDebugger.cpp +++ b/lldb/source/API/SBDebugger.cpp @@ -327,12 +327,32 @@ void SBDebugger::SkipAppInitFiles(bool b) { void SBDebugger::SetInputFileHandle(FILE *fh, bool transfer_ownership) { LLDB_RECORD_METHOD(void, SBDebugger, SetInputFileHandle, (FILE *, bool), fh, transfer_ownership); - SetInputFile((FileSP)std::make_shared<NativeFile>(fh, transfer_ownership)); + if (m_opaque_sp) + m_opaque_sp->SetInputFile( + (FileSP)std::make_shared<NativeFile>(fh, transfer_ownership)); } -SBError SBDebugger::SetInputFile(FileSP file_sp) { - LLDB_RECORD_METHOD(SBError, SBDebugger, SetInputFile, (FileSP), file_sp); - return LLDB_RECORD_RESULT(SetInputFile(SBFile(file_sp))); +SBError SBDebugger::SetInputString(const char *data) { + LLDB_RECORD_METHOD(SBError, SBDebugger, SetInputString, (const char *), data); + SBError sb_error; + if (data == nullptr) { + sb_error.SetErrorString("String data is null"); + return LLDB_RECORD_RESULT(sb_error); + } + + size_t size = strlen(data); + if (size == 0) { + sb_error.SetErrorString("String data is empty"); + return LLDB_RECORD_RESULT(sb_error); + } + + if (!m_opaque_sp) { + sb_error.SetErrorString("invalid debugger"); + return LLDB_RECORD_RESULT(sb_error); + } + + sb_error.SetError(m_opaque_sp->SetInputString(data)); + return LLDB_RECORD_RESULT(sb_error); } // Shouldn't really be settable after initialization as this could cause lots @@ -346,36 +366,15 @@ SBError SBDebugger::SetInputFile(SBFile file) { error.ref().SetErrorString("invalid debugger"); return LLDB_RECORD_RESULT(error); } - - repro::DataRecorder *recorder = nullptr; - if (repro::Generator *g = repro::Reproducer::Instance().GetGenerator()) - recorder = g->GetOrCreate<repro::CommandProvider>().GetNewRecorder(); - - FileSP file_sp = file.m_opaque_sp; - - static std::unique_ptr<repro::MultiLoader<repro::CommandProvider>> loader = - repro::MultiLoader<repro::CommandProvider>::Create( - repro::Reproducer::Instance().GetLoader()); - if (loader) { - llvm::Optional<std::string> nextfile = loader->GetNextFile(); - FILE *fh = nextfile ? FileSystem::Instance().Fopen(nextfile->c_str(), "r") - : nullptr; - // FIXME Jonas Devlieghere: shouldn't this error be propagated out to the - // reproducer somehow if fh is NULL? - if (fh) { - file_sp = std::make_shared<NativeFile>(fh, true); - } - } - - if (!file_sp || !file_sp->IsValid()) { - error.ref().SetErrorString("invalid file"); - return LLDB_RECORD_RESULT(error); - } - - m_opaque_sp->SetInputFile(file_sp, recorder); + error.SetError(m_opaque_sp->SetInputFile(file.m_opaque_sp)); return LLDB_RECORD_RESULT(error); } +SBError SBDebugger::SetInputFile(FileSP file_sp) { + LLDB_RECORD_METHOD(SBError, SBDebugger, SetInputFile, (FileSP), file_sp); + return LLDB_RECORD_RESULT(SetInputFile(SBFile(file_sp))); +} + SBError SBDebugger::SetOutputFile(FileSP file_sp) { LLDB_RECORD_METHOD(SBError, SBDebugger, SetOutputFile, (FileSP), file_sp); return LLDB_RECORD_RESULT(SetOutputFile(SBFile(file_sp))); @@ -1771,6 +1770,7 @@ template <> void RegisterMethods<SBDebugger>(Registry &R) { LLDB_REGISTER_METHOD(bool, SBDebugger, GetAsync, ()); LLDB_REGISTER_METHOD(void, SBDebugger, SkipLLDBInitFiles, (bool)); LLDB_REGISTER_METHOD(void, SBDebugger, SkipAppInitFiles, (bool)); + LLDB_REGISTER_METHOD(SBError, SBDebugger, SetInputString, (const char *)); LLDB_REGISTER_METHOD(void, SBDebugger, SetInputFileHandle, (FILE *, bool)); LLDB_REGISTER_METHOD(FILE *, SBDebugger, GetInputFileHandle, ()); LLDB_REGISTER_METHOD(FILE *, SBDebugger, GetOutputFileHandle, ()); diff --git a/lldb/source/API/SBTarget.cpp b/lldb/source/API/SBTarget.cpp index 98158f457a04..dc79c77fee9e 100644 --- a/lldb/source/API/SBTarget.cpp +++ b/lldb/source/API/SBTarget.cpp @@ -1745,6 +1745,16 @@ uint32_t SBTarget::GetCodeByteSize() { return 0; } +uint32_t SBTarget::GetMaximumNumberOfChildrenToDisplay() const { + LLDB_RECORD_METHOD_CONST_NO_ARGS(uint32_t, SBTarget, GetMaximumNumberOfChildrenToDisplay); + + TargetSP target_sp(GetSP()); + if(target_sp){ + return target_sp->GetMaximumNumberOfChildrenToDisplay(); + } + return 0; +} + uint32_t SBTarget::GetAddressByteSize() { LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBTarget, GetAddressByteSize); @@ -2679,6 +2689,7 @@ void RegisterMethods<SBTarget>(Registry &R) { LLDB_REGISTER_METHOD(const char *, SBTarget, GetTriple, ()); LLDB_REGISTER_METHOD(uint32_t, SBTarget, GetDataByteSize, ()); LLDB_REGISTER_METHOD(uint32_t, SBTarget, GetCodeByteSize, ()); + LLDB_REGISTER_METHOD_CONST(uint32_t, SBTarget, GetMaximumNumberOfChildrenToDisplay,()); LLDB_REGISTER_METHOD(uint32_t, SBTarget, GetAddressByteSize, ()); LLDB_REGISTER_METHOD(lldb::SBModule, SBTarget, GetModuleAtIndex, (uint32_t)); diff --git a/lldb/source/API/SBValue.cpp b/lldb/source/API/SBValue.cpp index 9faee102c5e3..e3325b8d36fa 100644 --- a/lldb/source/API/SBValue.cpp +++ b/lldb/source/API/SBValue.cpp @@ -1431,6 +1431,18 @@ bool SBValue::SetData(lldb::SBData &data, SBError &error) { return ret; } +lldb::SBValue SBValue::Clone(const char *new_name) { + LLDB_RECORD_METHOD(lldb::SBValue, SBValue, Clone, (const char *), new_name); + + ValueLocker locker; + lldb::ValueObjectSP value_sp(GetSP(locker)); + + if (value_sp) + return lldb::SBValue(value_sp->Clone(ConstString(new_name))); + else + return lldb::SBValue(); +} + lldb::SBDeclaration SBValue::GetDeclaration() { LLDB_RECORD_METHOD_NO_ARGS(lldb::SBDeclaration, SBValue, GetDeclaration); @@ -1656,6 +1668,7 @@ void RegisterMethods<SBValue>(Registry &R) { LLDB_REGISTER_METHOD(lldb::SBData, SBValue, GetData, ()); LLDB_REGISTER_METHOD(bool, SBValue, SetData, (lldb::SBData &, lldb::SBError &)); + LLDB_REGISTER_METHOD(lldb::SBValue, SBValue, Clone, (const char *)); LLDB_REGISTER_METHOD(lldb::SBDeclaration, SBValue, GetDeclaration, ()); LLDB_REGISTER_METHOD(lldb::SBWatchpoint, SBValue, Watch, (bool, bool, bool, lldb::SBError &)); diff --git a/lldb/source/Commands/CommandObjectMemory.cpp b/lldb/source/Commands/CommandObjectMemory.cpp index f27d4bd7e4b2..094ce6f8558f 100644 --- a/lldb/source/Commands/CommandObjectMemory.cpp +++ b/lldb/source/Commands/CommandObjectMemory.cpp @@ -1222,7 +1222,15 @@ public: interpreter, "memory write", "Write to the memory of the current target process.", nullptr, eCommandRequiresProcess | eCommandProcessMustBeLaunched), - m_option_group(), m_format_options(eFormatBytes, 1, UINT64_MAX), + m_option_group(), + m_format_options( + eFormatBytes, 1, UINT64_MAX, + {std::make_tuple( + eArgTypeFormat, + "The format to use for each of the value to be written."), + std::make_tuple( + eArgTypeByteSize, + "The size in bytes to write from input file or each value.")}), m_memory_options() { CommandArgumentEntry arg1; CommandArgumentEntry arg2; @@ -1240,6 +1248,7 @@ public: // Define the first (and only) variant of this arg. value_arg.arg_type = eArgTypeValue; value_arg.arg_repetition = eArgRepeatPlus; + value_arg.arg_opt_set_association = LLDB_OPT_SET_1; // There is only one variant this argument could be; put it into the // argument entry. @@ -1278,6 +1287,12 @@ protected: m_cmd_name.c_str()); return false; } + if (argc > 1) { + result.AppendErrorWithFormat( + "%s takes only a destination address when writing file contents.\n", + m_cmd_name.c_str()); + return false; + } } else if (argc < 2) { result.AppendErrorWithFormat( "%s takes a destination address and at least one value.\n", diff --git a/lldb/source/Core/Debugger.cpp b/lldb/source/Core/Debugger.cpp index 32dcfb1ce17b..ae454fae3322 100644 --- a/lldb/source/Core/Debugger.cpp +++ b/lldb/source/Core/Debugger.cpp @@ -45,6 +45,7 @@ #include "lldb/Utility/Listener.h" #include "lldb/Utility/Log.h" #include "lldb/Utility/Reproducer.h" +#include "lldb/Utility/ReproducerProvider.h" #include "lldb/Utility/State.h" #include "lldb/Utility/Stream.h" #include "lldb/Utility/StreamCallback.h" @@ -75,6 +76,14 @@ #include <string> #include <system_error> +// Includes for pipe() +#if defined(_WIN32) +#include <fcntl.h> +#include <io.h> +#else +#include <unistd.h> +#endif + namespace lldb_private { class Address; } @@ -810,6 +819,86 @@ void Debugger::SetAsyncExecution(bool async_execution) { repro::DataRecorder *Debugger::GetInputRecorder() { return m_input_recorder; } +static inline int OpenPipe(int fds[2], std::size_t size) { +#ifdef _WIN32 + return _pipe(fds, size, O_BINARY); +#else + (void)size; + return pipe(fds); +#endif +} + +Status Debugger::SetInputString(const char *data) { + Status result; + enum PIPES { READ, WRITE }; // Indexes for the read and write fds + int fds[2] = {-1, -1}; + + if (data == nullptr) { + result.SetErrorString("String data is null"); + return result; + } + + size_t size = strlen(data); + if (size == 0) { + result.SetErrorString("String data is empty"); + return result; + } + + if (OpenPipe(fds, size) != 0) { + result.SetErrorString( + "can't create pipe file descriptors for LLDB commands"); + return result; + } + + write(fds[WRITE], data, size); + // Close the write end of the pipe, so that the command interpreter will exit + // when it consumes all the data. + llvm::sys::Process::SafelyCloseFileDescriptor(fds[WRITE]); + + // Open the read file descriptor as a FILE * that we can return as an input + // handle. + FILE *commands_file = fdopen(fds[READ], "rb"); + if (commands_file == nullptr) { + result.SetErrorStringWithFormat("fdopen(%i, \"rb\") failed (errno = %i) " + "when trying to open LLDB commands pipe", + fds[READ], errno); + llvm::sys::Process::SafelyCloseFileDescriptor(fds[READ]); + return result; + } + + return SetInputFile( + (FileSP)std::make_shared<NativeFile>(commands_file, true)); +} + +Status Debugger::SetInputFile(FileSP file_sp) { + Status error; + repro::DataRecorder *recorder = nullptr; + if (repro::Generator *g = repro::Reproducer::Instance().GetGenerator()) + recorder = g->GetOrCreate<repro::CommandProvider>().GetNewRecorder(); + + static std::unique_ptr<repro::MultiLoader<repro::CommandProvider>> loader = + repro::MultiLoader<repro::CommandProvider>::Create( + repro::Reproducer::Instance().GetLoader()); + if (loader) { + llvm::Optional<std::string> nextfile = loader->GetNextFile(); + FILE *fh = nextfile ? FileSystem::Instance().Fopen(nextfile->c_str(), "r") + : nullptr; + // FIXME Jonas Devlieghere: shouldn't this error be propagated out to the + // reproducer somehow if fh is NULL? + if (fh) { + file_sp = std::make_shared<NativeFile>(fh, true); + } + } + + if (!file_sp || !file_sp->IsValid()) { + error.SetErrorString("invalid file"); + return error; + } + + SetInputFile(file_sp, recorder); + return error; +} + void Debugger::SetInputFile(FileSP file_sp, repro::DataRecorder *recorder) { assert(file_sp && file_sp->IsValid()); m_input_recorder = recorder; diff --git a/lldb/source/Core/Module.cpp b/lldb/source/Core/Module.cpp index bd0a667171a5..cbecbb9aa5fe 100644 --- a/lldb/source/Core/Module.cpp +++ b/lldb/source/Core/Module.cpp @@ -1379,12 +1379,15 @@ void Module::PreloadSymbols() { if (!sym_file) return; - // Prime the symbol file first, since it adds symbols to the symbol table. - sym_file->PreloadSymbols(); - - // Now we can prime the symbol table. + // Load the object file symbol table and any symbols from the SymbolFile that + // get appended using SymbolFile::AddSymbols(...). if (Symtab *symtab = sym_file->GetSymtab()) symtab->PreloadSymbols(); + + // Now let the symbol file preload its data and the symbol table will be + // available without needing to take the module lock. + sym_file->PreloadSymbols(); + } void Module::SetSymbolFileFileSpec(const FileSpec &file) { diff --git a/lldb/source/Interpreter/CommandObject.cpp b/lldb/source/Interpreter/CommandObject.cpp index 64b23d04abea..dcae27ff5479 100644 --- a/lldb/source/Interpreter/CommandObject.cpp +++ b/lldb/source/Interpreter/CommandObject.cpp @@ -454,6 +454,9 @@ void CommandObject::GetFormattedCommandArguments(Stream &str, opt_set_mask == LLDB_OPT_SET_ALL ? m_arguments[i] : OptSetFiltered(opt_set_mask, m_arguments[i]); + // This argument is not associated with the current option set, so skip it. + if (arg_entry.empty()) + continue; int num_alternatives = arg_entry.size(); if ((num_alternatives == 2) && IsPairType(arg_entry[0].arg_repetition)) { diff --git a/lldb/source/Interpreter/OptionGroupFormat.cpp b/lldb/source/Interpreter/OptionGroupFormat.cpp index 1cc5e70282c1..a2ca9ff39818 100644 --- a/lldb/source/Interpreter/OptionGroupFormat.cpp +++ b/lldb/source/Interpreter/OptionGroupFormat.cpp @@ -16,15 +16,7 @@ using namespace lldb; using namespace lldb_private; -OptionGroupFormat::OptionGroupFormat(lldb::Format default_format, - uint64_t default_byte_size, - uint64_t default_count) - : m_format(default_format, default_format), - m_byte_size(default_byte_size, default_byte_size), - m_count(default_count, default_count), m_prev_gdb_format('x'), - m_prev_gdb_size('w') {} - -static constexpr OptionDefinition g_option_table[] = { +static constexpr OptionDefinition g_default_option_definitions[] = { {LLDB_OPT_SET_1, false, "format", 'f', OptionParser::eRequiredArgument, nullptr, {}, 0, eArgTypeFormat, "Specify a format to be used for display."}, @@ -39,8 +31,34 @@ static constexpr OptionDefinition g_option_table[] = { "The number of total items to display."}, }; +OptionGroupFormat::OptionGroupFormat( + lldb::Format default_format, uint64_t default_byte_size, + uint64_t default_count, OptionGroupFormatUsageTextVector usage_text_vector) + : m_format(default_format, default_format), + m_byte_size(default_byte_size, default_byte_size), + m_count(default_count, default_count), m_prev_gdb_format('x'), + m_prev_gdb_size('w') { + // Copy the default option definitions. + std::copy(std::begin(g_default_option_definitions), + std::end(g_default_option_definitions), + std::begin(m_option_definitions)); + + for (auto usage_text_tuple : usage_text_vector) { + switch (std::get<0>(usage_text_tuple)) { + case eArgTypeFormat: + m_option_definitions[0].usage_text = std::get<1>(usage_text_tuple); + break; + case eArgTypeByteSize: + m_option_definitions[2].usage_text = std::get<1>(usage_text_tuple); + break; + default: + llvm_unreachable("Unimplemented option"); + } + } +} + llvm::ArrayRef<OptionDefinition> OptionGroupFormat::GetDefinitions() { - auto result = llvm::makeArrayRef(g_option_table); + auto result = llvm::makeArrayRef(m_option_definitions); if (m_byte_size.GetDefaultValue() < UINT64_MAX) { if (m_count.GetDefaultValue() < UINT64_MAX) return result; @@ -54,7 +72,7 @@ Status OptionGroupFormat::SetOptionValue(uint32_t option_idx, llvm::StringRef option_arg, ExecutionContext *execution_context) { Status error; - const int short_option = g_option_table[option_idx].short_option; + const int short_option = m_option_definitions[option_idx].short_option; switch (short_option) { case 'f': diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangUserExpression.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangUserExpression.cpp index 50e9f7827838..1437d7b58293 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangUserExpression.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangUserExpression.cpp @@ -516,7 +516,7 @@ CppModuleConfiguration GetModuleConfig(lldb::LanguageType language, // Try to create a configuration from the files. If there is no valid // configuration possible with the files, this just returns an invalid // configuration. - return CppModuleConfiguration(files); + return CppModuleConfiguration(files, target->GetArchitecture().GetTriple()); } bool ClangUserExpression::PrepareForParsing( diff --git a/lldb/source/Plugins/ExpressionParser/Clang/CppModuleConfiguration.cpp b/lldb/source/Plugins/ExpressionParser/Clang/CppModuleConfiguration.cpp index ffab16b1682b..befb1f125406 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/CppModuleConfiguration.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/CppModuleConfiguration.cpp @@ -10,6 +10,7 @@ #include "ClangHost.h" #include "lldb/Host/FileSystem.h" +#include "llvm/ADT/Triple.h" using namespace lldb_private; @@ -30,7 +31,35 @@ bool CppModuleConfiguration::SetOncePath::TrySet(llvm::StringRef path) { return false; } -bool CppModuleConfiguration::analyzeFile(const FileSpec &f) { +static llvm::SmallVector<std::string, 2> +getTargetIncludePaths(const llvm::Triple &triple) { + llvm::SmallVector<std::string, 2> paths; + if (!triple.str().empty()) { + paths.push_back("/usr/include/" + triple.str()); + if (!triple.getArchName().empty() || + triple.getOSAndEnvironmentName().empty()) + paths.push_back(("/usr/include/" + triple.getArchName() + "-" + + triple.getOSAndEnvironmentName()) + .str()); + } + return paths; +} + +/// Returns the include path matching the given pattern for the given file +/// path (or None if the path doesn't match the pattern). +static llvm::Optional<llvm::StringRef> +guessIncludePath(llvm::StringRef path_to_file, llvm::StringRef pattern) { + if (pattern.empty()) + return llvm::NoneType(); + size_t pos = path_to_file.find(pattern); + if (pos == llvm::StringRef::npos) + return llvm::NoneType(); + + return path_to_file.substr(0, pos + pattern.size()); +} + +bool CppModuleConfiguration::analyzeFile(const FileSpec &f, + const llvm::Triple &triple) { using namespace llvm::sys::path; // Convert to slashes to make following operations simpler. std::string dir_buffer = convert_to_slash(f.GetDirectory().GetStringRef()); @@ -43,15 +72,25 @@ bool CppModuleConfiguration::analyzeFile(const FileSpec &f) { // need to be specified in the header search. if (libcpp_regex.match(f.GetPath()) && parent_path(posix_dir, Style::posix).endswith("c++")) { - return m_std_inc.TrySet(posix_dir); + if (!m_std_inc.TrySet(posix_dir)) + return false; + if (triple.str().empty()) + return true; + + posix_dir.consume_back("c++/v1"); + // Check if this is a target-specific libc++ include directory. + return m_std_target_inc.TrySet( + (posix_dir + triple.str() + "/c++/v1").str()); } - // Check for /usr/include. On Linux this might be /usr/include/bits, so - // we should remove that '/bits' suffix to get the actual include directory. - if (posix_dir.endswith("/usr/include/bits")) - posix_dir.consume_back("/bits"); - if (posix_dir.endswith("/usr/include")) - return m_c_inc.TrySet(posix_dir); + llvm::Optional<llvm::StringRef> inc_path; + // Target specific paths contains /usr/include, so we check them first + for (auto &path : getTargetIncludePaths(triple)) { + if ((inc_path = guessIncludePath(posix_dir, path))) + return m_c_target_inc.TrySet(*inc_path); + } + if ((inc_path = guessIncludePath(posix_dir, "/usr/include"))) + return m_c_inc.TrySet(*inc_path); // File wasn't interesting, continue analyzing. return true; @@ -92,11 +131,11 @@ bool CppModuleConfiguration::hasValidConfig() { } CppModuleConfiguration::CppModuleConfiguration( - const FileSpecList &support_files) { + const FileSpecList &support_files, const llvm::Triple &triple) { // Analyze all files we were given to build the configuration. bool error = !llvm::all_of(support_files, std::bind(&CppModuleConfiguration::analyzeFile, - this, std::placeholders::_1)); + this, std::placeholders::_1, triple)); // If we have a valid configuration at this point, set the // include directories and module list that should be used. if (!error && hasValidConfig()) { @@ -109,6 +148,10 @@ CppModuleConfiguration::CppModuleConfiguration( // This order matches the way Clang orders these directories. m_include_dirs = {m_std_inc.Get().str(), m_resource_inc, m_c_inc.Get().str()}; + if (m_c_target_inc.Valid()) + m_include_dirs.push_back(m_c_target_inc.Get().str()); + if (m_std_target_inc.Valid()) + m_include_dirs.push_back(m_std_target_inc.Get().str()); m_imported_modules = {"std"}; } } diff --git a/lldb/source/Plugins/ExpressionParser/Clang/CppModuleConfiguration.h b/lldb/source/Plugins/ExpressionParser/Clang/CppModuleConfiguration.h index 907db5d625dc..5db8abbdbdf3 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/CppModuleConfiguration.h +++ b/lldb/source/Plugins/ExpressionParser/Clang/CppModuleConfiguration.h @@ -42,8 +42,15 @@ class CppModuleConfiguration { /// If valid, the include path used for the std module. SetOncePath m_std_inc; + /// If valid, the per-target include path used for the std module. + /// This is an optional path only required on some systems. + SetOncePath m_std_target_inc; /// If valid, the include path to the C library (e.g. /usr/include). SetOncePath m_c_inc; + /// If valid, the include path to target-specific C library files + /// (e.g. /usr/include/x86_64-linux-gnu). + /// This is an optional path only required on some systems. + SetOncePath m_c_target_inc; /// The Clang resource include path for this configuration. std::string m_resource_inc; @@ -53,11 +60,13 @@ class CppModuleConfiguration { /// Analyze a given source file to build the current configuration. /// Returns false iff there was a fatal error that makes analyzing any /// further files pointless as the configuration is now invalid. - bool analyzeFile(const FileSpec &f); + bool analyzeFile(const FileSpec &f, const llvm::Triple &triple); public: /// Creates a configuration by analyzing the given list of used source files. - explicit CppModuleConfiguration(const FileSpecList &support_files); + /// The triple (if valid) is used to search for target-specific include paths. + explicit CppModuleConfiguration(const FileSpecList &support_files, + const llvm::Triple &triple); /// Creates an empty and invalid configuration. CppModuleConfiguration() = default; diff --git a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp index 83e8e52b86f2..f1925990e94a 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp @@ -914,11 +914,21 @@ static void LoadLibStdcppFormatters(lldb::TypeCategoryImplSP cpp_category_sp) { stl_deref_flags, "lldb.formatters.cpp.gnu_libstdcpp.StdMapLikeSynthProvider"))); cpp_category_sp->GetRegexTypeSyntheticsContainer()->Add( + RegularExpression("^std::optional<.+>(( )?&)?$"), + SyntheticChildrenSP(new ScriptedSyntheticChildren( + stl_synth_flags, + "lldb.formatters.cpp.gnu_libstdcpp.StdOptionalSynthProvider"))); + cpp_category_sp->GetRegexTypeSyntheticsContainer()->Add( RegularExpression("^std::multiset<.+> >(( )?&)?$"), SyntheticChildrenSP(new ScriptedSyntheticChildren( stl_deref_flags, "lldb.formatters.cpp.gnu_libstdcpp.StdMapLikeSynthProvider"))); cpp_category_sp->GetRegexTypeSyntheticsContainer()->Add( + RegularExpression("^std::unordered_(multi)?(map|set)<.+> >$"), + SyntheticChildrenSP(new ScriptedSyntheticChildren( + stl_deref_flags, + "lldb.formatters.cpp.gnu_libstdcpp.StdUnorderedMapSynthProvider"))); + cpp_category_sp->GetRegexTypeSyntheticsContainer()->Add( RegularExpression("^std::(__cxx11::)?list<.+>(( )?&)?$"), SyntheticChildrenSP(new ScriptedSyntheticChildren( stl_synth_flags, @@ -928,9 +938,15 @@ static void LoadLibStdcppFormatters(lldb::TypeCategoryImplSP cpp_category_sp) { SyntheticChildrenSP(new ScriptedSyntheticChildren( stl_synth_flags, "lldb.formatters.cpp.gnu_libstdcpp.StdForwardListSynthProvider"))); + stl_summary_flags.SetDontShowChildren(false); stl_summary_flags.SetSkipPointers(false); cpp_category_sp->GetRegexTypeSummariesContainer()->Add( + RegularExpression("^std::optional<.+>(( )?&)?$"), + TypeSummaryImplSP(new ScriptSummaryFormat( + stl_summary_flags, + "lldb.formatters.cpp.gnu_libstdcpp.StdOptionalSummaryProvider"))); + cpp_category_sp->GetRegexTypeSummariesContainer()->Add( RegularExpression("^std::bitset<.+>(( )?&)?$"), TypeSummaryImplSP( new StringSummaryFormat(stl_summary_flags, "size=${svar%#}"))); @@ -955,13 +971,17 @@ static void LoadLibStdcppFormatters(lldb::TypeCategoryImplSP cpp_category_sp) { TypeSummaryImplSP( new StringSummaryFormat(stl_summary_flags, "size=${svar%#}"))); cpp_category_sp->GetRegexTypeSummariesContainer()->Add( + RegularExpression("^std::unordered_(multi)?(map|set)<.+> >$"), + TypeSummaryImplSP( + new StringSummaryFormat(stl_summary_flags, "size=${svar%#}"))); + cpp_category_sp->GetRegexTypeSummariesContainer()->Add( RegularExpression("^std::(__cxx11::)?list<.+>(( )?&)?$"), TypeSummaryImplSP( new StringSummaryFormat(stl_summary_flags, "size=${svar%#}"))); cpp_category_sp->GetRegexTypeSummariesContainer()->Add( RegularExpression("^std::(__cxx11::)?forward_list<.+>(( )?&)?$"), TypeSummaryImplSP( - new StringSummaryFormat(stl_summary_flags, "size=${svar%#}"))); + new ScriptSummaryFormat(stl_summary_flags, "lldb.formatters.cpp.gnu_libstdcpp.ForwardListSummaryProvider"))); AddCXXSynthetic( cpp_category_sp, diff --git a/lldb/source/Plugins/Language/CPlusPlus/GenericBitset.cpp b/lldb/source/Plugins/Language/CPlusPlus/GenericBitset.cpp index fc8255983436..c8063915b178 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/GenericBitset.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/GenericBitset.cpp @@ -81,12 +81,11 @@ bool GenericBitsetFrontEnd::Update() { TargetSP target_sp = m_backend.GetTargetSP(); if (!target_sp) return false; - size_t capping_size = target_sp->GetMaximumNumberOfChildrenToDisplay(); size_t size = 0; if (auto arg = m_backend.GetCompilerType().GetIntegralTemplateArgument(0)) - size = arg->value.getLimitedValue(capping_size); + size = arg->value.getLimitedValue(); m_elements.assign(size, ValueObjectSP()); m_first = m_backend.GetChildMemberWithName(GetDataContainerMemberName(), true) diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxxOptional.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibCxxOptional.cpp index c0c819632851..c1b40ba65e7d 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibCxxOptional.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxxOptional.cpp @@ -45,7 +45,7 @@ bool OptionalFrontEnd::Update() { // __engaged_ is a bool flag and is true if the optional contains a value. // Converting it to unsigned gives us a size of 1 if it contains a value // and 0 if not. - m_has_value = engaged_sp->GetValueAsUnsigned(0) == 1; + m_has_value = engaged_sp->GetValueAsUnsigned(0) != 0; return false; } diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxxUnorderedMap.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibCxxUnorderedMap.cpp index 3a441973fc73..57c5ba87c397 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/LibCxxUnorderedMap.cpp +++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxxUnorderedMap.cpp @@ -62,9 +62,7 @@ lldb_private::formatters::LibcxxStdUnorderedMapSyntheticFrontEnd:: size_t lldb_private::formatters::LibcxxStdUnorderedMapSyntheticFrontEnd:: CalculateNumChildren() { - if (m_num_elements != UINT32_MAX) - return m_num_elements; - return 0; + return m_num_elements; } lldb::ValueObjectSP lldb_private::formatters:: @@ -160,7 +158,7 @@ lldb::ValueObjectSP lldb_private::formatters:: bool lldb_private::formatters::LibcxxStdUnorderedMapSyntheticFrontEnd:: Update() { - m_num_elements = UINT32_MAX; + m_num_elements = 0; m_next_element = nullptr; m_elements_cache.clear(); ValueObjectSP table_sp = @@ -195,8 +193,13 @@ bool lldb_private::formatters::LibcxxStdUnorderedMapSyntheticFrontEnd:: if (!num_elements_sp) return false; - m_num_elements = num_elements_sp->GetValueAsUnsigned(0); + m_tree = table_sp->GetChildAtNamePath(next_path).get(); + if (m_tree == nullptr) + return false; + + m_num_elements = num_elements_sp->GetValueAsUnsigned(0); + if (m_num_elements > 0) m_next_element = table_sp->GetChildAtNamePath(next_path).get(); diff --git a/lldb/source/Plugins/ObjectFile/Breakpad/ObjectFileBreakpad.cpp b/lldb/source/Plugins/ObjectFile/Breakpad/ObjectFileBreakpad.cpp index bad730512ff4..ce701fd823fd 100644 --- a/lldb/source/Plugins/ObjectFile/Breakpad/ObjectFileBreakpad.cpp +++ b/lldb/source/Plugins/ObjectFile/Breakpad/ObjectFileBreakpad.cpp @@ -116,9 +116,10 @@ bool ObjectFileBreakpad::ParseHeader() { return true; } -Symtab *ObjectFileBreakpad::GetSymtab() { - // TODO - return nullptr; +void ObjectFileBreakpad::ParseSymtab(Symtab &symtab) { + // Nothing to do for breakpad files, all information is parsed as debug info + // which means "lldb_private::Function" objects are used, or symbols are added + // by the SymbolFileBreakpad::AddSymbols(...) function in the symbol file. } void ObjectFileBreakpad::CreateSections(SectionList &unified_section_list) { diff --git a/lldb/source/Plugins/ObjectFile/Breakpad/ObjectFileBreakpad.h b/lldb/source/Plugins/ObjectFile/Breakpad/ObjectFileBreakpad.h index c320c7ad3e2e..f04e0b4dd7a7 100644 --- a/lldb/source/Plugins/ObjectFile/Breakpad/ObjectFileBreakpad.h +++ b/lldb/source/Plugins/ObjectFile/Breakpad/ObjectFileBreakpad.h @@ -71,7 +71,7 @@ public: return AddressClass::eInvalid; } - Symtab *GetSymtab() override; + void ParseSymtab(lldb_private::Symtab &symtab) override; bool IsStripped() override { return false; } diff --git a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp index 8e0f228a988f..96e94ef08a45 100644 --- a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp +++ b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp @@ -2687,155 +2687,131 @@ unsigned ObjectFileELF::RelocateDebugSections(const ELFSectionHeader *rel_hdr, return 0; } -Symtab *ObjectFileELF::GetSymtab() { +void ObjectFileELF::ParseSymtab(Symtab &lldb_symtab) { ModuleSP module_sp(GetModule()); if (!module_sp) - return nullptr; + return; + + Progress progress( + llvm::formatv("Parsing symbol table for {0}", + m_file.GetFilename().AsCString("<Unknown>"))); + ElapsedTime elapsed(module_sp->GetSymtabParseTime()); // We always want to use the main object file so we (hopefully) only have one // cached copy of our symtab, dynamic sections, etc. ObjectFile *module_obj_file = module_sp->GetObjectFile(); if (module_obj_file && module_obj_file != this) - return module_obj_file->GetSymtab(); - - if (m_symtab_up == nullptr) { - Progress progress( - llvm::formatv("Parsing symbol table for {0}", - m_file.GetFilename().AsCString("<Unknown>"))); - ElapsedTime elapsed(module_sp->GetSymtabParseTime()); - SectionList *section_list = module_sp->GetSectionList(); - if (!section_list) - return nullptr; + return module_obj_file->ParseSymtab(lldb_symtab); - uint64_t symbol_id = 0; - std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex()); - - // Sharable objects and dynamic executables usually have 2 distinct symbol - // tables, one named ".symtab", and the other ".dynsym". The dynsym is a - // smaller version of the symtab that only contains global symbols. The - // information found in the dynsym is therefore also found in the symtab, - // while the reverse is not necessarily true. - Section *symtab = - section_list->FindSectionByType(eSectionTypeELFSymbolTable, true).get(); - if (symtab) { - m_symtab_up = std::make_unique<Symtab>(symtab->GetObjectFile()); - symbol_id += ParseSymbolTable(m_symtab_up.get(), symbol_id, symtab); - } - - // The symtab section is non-allocable and can be stripped, while the - // .dynsym section which should always be always be there. To support the - // minidebuginfo case we parse .dynsym when there's a .gnu_debuginfo - // section, nomatter if .symtab was already parsed or not. This is because - // minidebuginfo normally removes the .symtab symbols which have their - // matching .dynsym counterparts. - if (!symtab || - GetSectionList()->FindSectionByName(ConstString(".gnu_debugdata"))) { - Section *dynsym = - section_list->FindSectionByType(eSectionTypeELFDynamicSymbols, true) - .get(); - if (dynsym) { - if (!m_symtab_up) - m_symtab_up = std::make_unique<Symtab>(dynsym->GetObjectFile()); - symbol_id += ParseSymbolTable(m_symtab_up.get(), symbol_id, dynsym); - } - } + SectionList *section_list = module_sp->GetSectionList(); + if (!section_list) + return; - // DT_JMPREL - // If present, this entry's d_ptr member holds the address of - // relocation - // entries associated solely with the procedure linkage table. - // Separating - // these relocation entries lets the dynamic linker ignore them during - // process initialization, if lazy binding is enabled. If this entry is - // present, the related entries of types DT_PLTRELSZ and DT_PLTREL must - // also be present. - const ELFDynamic *symbol = FindDynamicSymbol(DT_JMPREL); - if (symbol) { - // Synthesize trampoline symbols to help navigate the PLT. - addr_t addr = symbol->d_ptr; - Section *reloc_section = - section_list->FindSectionContainingFileAddress(addr).get(); - if (reloc_section) { - user_id_t reloc_id = reloc_section->GetID(); - const ELFSectionHeaderInfo *reloc_header = - GetSectionHeaderByIndex(reloc_id); - if (reloc_header) { - if (m_symtab_up == nullptr) - m_symtab_up = - std::make_unique<Symtab>(reloc_section->GetObjectFile()); - - ParseTrampolineSymbols(m_symtab_up.get(), symbol_id, reloc_header, - reloc_id); - } - } - } + uint64_t symbol_id = 0; - if (DWARFCallFrameInfo *eh_frame = - GetModule()->GetUnwindTable().GetEHFrameInfo()) { - if (m_symtab_up == nullptr) - m_symtab_up = std::make_unique<Symtab>(this); - ParseUnwindSymbols(m_symtab_up.get(), eh_frame); + // Sharable objects and dynamic executables usually have 2 distinct symbol + // tables, one named ".symtab", and the other ".dynsym". The dynsym is a + // smaller version of the symtab that only contains global symbols. The + // information found in the dynsym is therefore also found in the symtab, + // while the reverse is not necessarily true. + Section *symtab = + section_list->FindSectionByType(eSectionTypeELFSymbolTable, true).get(); + if (symtab) + symbol_id += ParseSymbolTable(&lldb_symtab, symbol_id, symtab); + + // The symtab section is non-allocable and can be stripped, while the + // .dynsym section which should always be always be there. To support the + // minidebuginfo case we parse .dynsym when there's a .gnu_debuginfo + // section, nomatter if .symtab was already parsed or not. This is because + // minidebuginfo normally removes the .symtab symbols which have their + // matching .dynsym counterparts. + if (!symtab || + GetSectionList()->FindSectionByName(ConstString(".gnu_debugdata"))) { + Section *dynsym = + section_list->FindSectionByType(eSectionTypeELFDynamicSymbols, true) + .get(); + if (dynsym) + symbol_id += ParseSymbolTable(&lldb_symtab, symbol_id, dynsym); + } + + // DT_JMPREL + // If present, this entry's d_ptr member holds the address of + // relocation + // entries associated solely with the procedure linkage table. + // Separating + // these relocation entries lets the dynamic linker ignore them during + // process initialization, if lazy binding is enabled. If this entry is + // present, the related entries of types DT_PLTRELSZ and DT_PLTREL must + // also be present. + const ELFDynamic *symbol = FindDynamicSymbol(DT_JMPREL); + if (symbol) { + // Synthesize trampoline symbols to help navigate the PLT. + addr_t addr = symbol->d_ptr; + Section *reloc_section = + section_list->FindSectionContainingFileAddress(addr).get(); + if (reloc_section) { + user_id_t reloc_id = reloc_section->GetID(); + const ELFSectionHeaderInfo *reloc_header = + GetSectionHeaderByIndex(reloc_id); + if (reloc_header) + ParseTrampolineSymbols(&lldb_symtab, symbol_id, reloc_header, reloc_id); } + } - // If we still don't have any symtab then create an empty instance to avoid - // do the section lookup next time. - if (m_symtab_up == nullptr) - m_symtab_up = std::make_unique<Symtab>(this); - - // In the event that there's no symbol entry for the entry point we'll - // artificially create one. We delegate to the symtab object the figuring - // out of the proper size, this will usually make it span til the next - // symbol it finds in the section. This means that if there are missing - // symbols the entry point might span beyond its function definition. - // We're fine with this as it doesn't make it worse than not having a - // symbol entry at all. - if (CalculateType() == eTypeExecutable) { - ArchSpec arch = GetArchitecture(); - auto entry_point_addr = GetEntryPointAddress(); - bool is_valid_entry_point = - entry_point_addr.IsValid() && entry_point_addr.IsSectionOffset(); - addr_t entry_point_file_addr = entry_point_addr.GetFileAddress(); - if (is_valid_entry_point && !m_symtab_up->FindSymbolContainingFileAddress( - entry_point_file_addr)) { - uint64_t symbol_id = m_symtab_up->GetNumSymbols(); - // Don't set the name for any synthetic symbols, the Symbol - // object will generate one if needed when the name is accessed - // via accessors. - SectionSP section_sp = entry_point_addr.GetSection(); - Symbol symbol( - /*symID=*/symbol_id, - /*name=*/llvm::StringRef(), // Name will be auto generated. - /*type=*/eSymbolTypeCode, - /*external=*/true, - /*is_debug=*/false, - /*is_trampoline=*/false, - /*is_artificial=*/true, - /*section_sp=*/section_sp, - /*offset=*/0, - /*size=*/0, // FDE can span multiple symbols so don't use its size. - /*size_is_valid=*/false, - /*contains_linker_annotations=*/false, - /*flags=*/0); - // When the entry point is arm thumb we need to explicitly set its - // class address to reflect that. This is important because expression - // evaluation relies on correctly setting a breakpoint at this - // address. - if (arch.GetMachine() == llvm::Triple::arm && - (entry_point_file_addr & 1)) { - symbol.GetAddressRef().SetOffset(entry_point_addr.GetOffset() ^ 1); - m_address_class_map[entry_point_file_addr ^ 1] = - AddressClass::eCodeAlternateISA; - } else { - m_address_class_map[entry_point_file_addr] = AddressClass::eCode; - } - m_symtab_up->AddSymbol(symbol); + if (DWARFCallFrameInfo *eh_frame = + GetModule()->GetUnwindTable().GetEHFrameInfo()) { + ParseUnwindSymbols(&lldb_symtab, eh_frame); + } + + // In the event that there's no symbol entry for the entry point we'll + // artificially create one. We delegate to the symtab object the figuring + // out of the proper size, this will usually make it span til the next + // symbol it finds in the section. This means that if there are missing + // symbols the entry point might span beyond its function definition. + // We're fine with this as it doesn't make it worse than not having a + // symbol entry at all. + if (CalculateType() == eTypeExecutable) { + ArchSpec arch = GetArchitecture(); + auto entry_point_addr = GetEntryPointAddress(); + bool is_valid_entry_point = + entry_point_addr.IsValid() && entry_point_addr.IsSectionOffset(); + addr_t entry_point_file_addr = entry_point_addr.GetFileAddress(); + if (is_valid_entry_point && !lldb_symtab.FindSymbolContainingFileAddress( + entry_point_file_addr)) { + uint64_t symbol_id = lldb_symtab.GetNumSymbols(); + // Don't set the name for any synthetic symbols, the Symbol + // object will generate one if needed when the name is accessed + // via accessors. + SectionSP section_sp = entry_point_addr.GetSection(); + Symbol symbol( + /*symID=*/symbol_id, + /*name=*/llvm::StringRef(), // Name will be auto generated. + /*type=*/eSymbolTypeCode, + /*external=*/true, + /*is_debug=*/false, + /*is_trampoline=*/false, + /*is_artificial=*/true, + /*section_sp=*/section_sp, + /*offset=*/0, + /*size=*/0, // FDE can span multiple symbols so don't use its size. + /*size_is_valid=*/false, + /*contains_linker_annotations=*/false, + /*flags=*/0); + // When the entry point is arm thumb we need to explicitly set its + // class address to reflect that. This is important because expression + // evaluation relies on correctly setting a breakpoint at this + // address. + if (arch.GetMachine() == llvm::Triple::arm && + (entry_point_file_addr & 1)) { + symbol.GetAddressRef().SetOffset(entry_point_addr.GetOffset() ^ 1); + m_address_class_map[entry_point_file_addr ^ 1] = + AddressClass::eCodeAlternateISA; + } else { + m_address_class_map[entry_point_file_addr] = AddressClass::eCode; } + lldb_symtab.AddSymbol(symbol); } - - m_symtab_up->CalculateSymbolSizes(); } - - return m_symtab_up.get(); } void ObjectFileELF::RelocateSection(lldb_private::Section *section) diff --git a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h index 5738e5cf60d5..554f623ec8af 100644 --- a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h +++ b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h @@ -110,7 +110,7 @@ public: lldb_private::AddressClass GetAddressClass(lldb::addr_t file_addr) override; - lldb_private::Symtab *GetSymtab() override; + void ParseSymtab(lldb_private::Symtab &symtab) override; bool IsStripped() override; @@ -123,7 +123,7 @@ public: lldb_private::UUID GetUUID() override; /// Return the contents of the .gnu_debuglink section, if the object file - /// contains it. + /// contains it. llvm::Optional<lldb_private::FileSpec> GetDebugLink(); uint32_t GetDependentModules(lldb_private::FileSpecList &files) override; @@ -278,8 +278,9 @@ private: /// number of dynamic symbols parsed. size_t ParseDynamicSymbols(); - /// Populates m_symtab_up will all non-dynamic linker symbols. This method - /// will parse the symbols only once. Returns the number of symbols parsed. + /// Populates the symbol table with all non-dynamic linker symbols. This + /// method will parse the symbols only once. Returns the number of symbols + /// parsed. unsigned ParseSymbolTable(lldb_private::Symtab *symbol_table, lldb::user_id_t start_id, lldb_private::Section *symtab); @@ -384,7 +385,7 @@ private: lldb_private::UUID &uuid); bool AnySegmentHasPhysicalAddress(); - + /// Takes the .gnu_debugdata and returns the decompressed object file that is /// stored within that section. /// diff --git a/lldb/source/Plugins/ObjectFile/JIT/ObjectFileJIT.cpp b/lldb/source/Plugins/ObjectFile/JIT/ObjectFileJIT.cpp index bec0099517c8..ca9337454889 100644 --- a/lldb/source/Plugins/ObjectFile/JIT/ObjectFileJIT.cpp +++ b/lldb/source/Plugins/ObjectFile/JIT/ObjectFileJIT.cpp @@ -106,23 +106,10 @@ uint32_t ObjectFileJIT::GetAddressByteSize() const { return m_data.GetAddressByteSize(); } -Symtab *ObjectFileJIT::GetSymtab() { - ModuleSP module_sp(GetModule()); - if (module_sp) { - std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex()); - if (m_symtab_up == nullptr) { - ElapsedTime elapsed(module_sp->GetSymtabParseTime()); - m_symtab_up = std::make_unique<Symtab>(this); - std::lock_guard<std::recursive_mutex> symtab_guard( - m_symtab_up->GetMutex()); - ObjectFileJITDelegateSP delegate_sp(m_delegate_wp.lock()); - if (delegate_sp) - delegate_sp->PopulateSymtab(this, *m_symtab_up); - // TODO: get symbols from delegate - m_symtab_up->Finalize(); - } - } - return m_symtab_up.get(); +void ObjectFileJIT::ParseSymtab(Symtab &symtab) { + ObjectFileJITDelegateSP delegate_sp(m_delegate_wp.lock()); + if (delegate_sp) + delegate_sp->PopulateSymtab(this, symtab); } bool ObjectFileJIT::IsStripped() { diff --git a/lldb/source/Plugins/ObjectFile/JIT/ObjectFileJIT.h b/lldb/source/Plugins/ObjectFile/JIT/ObjectFileJIT.h index 03ac001988a0..be31139df549 100644 --- a/lldb/source/Plugins/ObjectFile/JIT/ObjectFileJIT.h +++ b/lldb/source/Plugins/ObjectFile/JIT/ObjectFileJIT.h @@ -67,7 +67,7 @@ public: uint32_t GetAddressByteSize() const override; - lldb_private::Symtab *GetSymtab() override; + void ParseSymtab(lldb_private::Symtab &symtab) override; bool IsStripped() override; diff --git a/lldb/source/Plugins/ObjectFile/PDB/ObjectFilePDB.h b/lldb/source/Plugins/ObjectFile/PDB/ObjectFilePDB.h index 36e71e21332f..da999d2b55a7 100644 --- a/lldb/source/Plugins/ObjectFile/PDB/ObjectFilePDB.h +++ b/lldb/source/Plugins/ObjectFile/PDB/ObjectFilePDB.h @@ -68,7 +68,7 @@ public: bool IsExecutable() const override { return false; } - Symtab *GetSymtab() override { return nullptr; } + void ParseSymtab(lldb_private::Symtab &symtab) override {} bool IsStripped() override { return false; } diff --git a/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp b/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp index 0e6329885528..7445f8311c50 100644 --- a/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp +++ b/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp @@ -246,7 +246,7 @@ bool ObjectFileWasm::ParseHeader() { return true; } -Symtab *ObjectFileWasm::GetSymtab() { return nullptr; } +void ObjectFileWasm::ParseSymtab(Symtab &symtab) {} static SectionType GetSectionTypeFromName(llvm::StringRef Name) { if (Name.consume_front(".debug_") || Name.consume_front(".zdebug_")) { diff --git a/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.h b/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.h index 44939b6d4ea0..d7b5bc22caad 100644 --- a/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.h +++ b/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.h @@ -78,7 +78,7 @@ public: return AddressClass::eInvalid; } - Symtab *GetSymtab() override; + void ParseSymtab(lldb_private::Symtab &symtab) override; bool IsStripped() override { return !!GetExternalDebugInfoFileSpec(); } diff --git a/lldb/source/Plugins/Platform/QemuUser/PlatformQemuUser.cpp b/lldb/source/Plugins/Platform/QemuUser/PlatformQemuUser.cpp new file mode 100644 index 000000000000..90c290b6fbc7 --- /dev/null +++ b/lldb/source/Plugins/Platform/QemuUser/PlatformQemuUser.cpp @@ -0,0 +1,148 @@ +//===-- PlatformQemuUser.cpp ----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "Plugins/Platform/QemuUser/PlatformQemuUser.h" +#include "Plugins/Process/gdb-remote/ProcessGDBRemote.h" +#include "lldb/Core/PluginManager.h" +#include "lldb/Host/FileSystem.h" +#include "lldb/Host/ProcessLaunchInfo.h" +#include "lldb/Interpreter/OptionValueProperties.h" +#include "lldb/Target/Process.h" +#include "lldb/Target/Target.h" +#include "lldb/Utility/Listener.h" +#include "lldb/Utility/Log.h" + +using namespace lldb; +using namespace lldb_private; + +LLDB_PLUGIN_DEFINE(PlatformQemuUser) + +#define LLDB_PROPERTIES_platformqemuuser +#include "PlatformQemuUserProperties.inc" + +enum { +#define LLDB_PROPERTIES_platformqemuuser +#include "PlatformQemuUserPropertiesEnum.inc" +}; + +class PluginProperties : public Properties { +public: + PluginProperties() { + m_collection_sp = std::make_shared<OptionValueProperties>( + ConstString(PlatformQemuUser::GetPluginNameStatic())); + m_collection_sp->Initialize(g_platformqemuuser_properties); + } + + llvm::StringRef GetArchitecture() { + return m_collection_sp->GetPropertyAtIndexAsString( + nullptr, ePropertyArchitecture, ""); + } + + FileSpec GetEmulatorPath() { + return m_collection_sp->GetPropertyAtIndexAsFileSpec(nullptr, + ePropertyEmulatorPath); + } +}; + +static PluginProperties &GetGlobalProperties() { + static PluginProperties g_settings; + return g_settings; +} + +llvm::StringRef PlatformQemuUser::GetPluginDescriptionStatic() { + return "Platform for debugging binaries under user mode qemu"; +} + +void PlatformQemuUser::Initialize() { + PluginManager::RegisterPlugin( + GetPluginNameStatic(), GetPluginDescriptionStatic(), + PlatformQemuUser::CreateInstance, PlatformQemuUser::DebuggerInitialize); +} + +void PlatformQemuUser::Terminate() { + PluginManager::UnregisterPlugin(PlatformQemuUser::CreateInstance); +} + +void PlatformQemuUser::DebuggerInitialize(Debugger &debugger) { + if (!PluginManager::GetSettingForPlatformPlugin( + debugger, ConstString(GetPluginNameStatic()))) { + PluginManager::CreateSettingForPlatformPlugin( + debugger, GetGlobalProperties().GetValueProperties(), + ConstString("Properties for the qemu-user platform plugin."), + /*is_global_property=*/true); + } +} + +PlatformSP PlatformQemuUser::CreateInstance(bool force, const ArchSpec *arch) { + if (force) + return PlatformSP(new PlatformQemuUser()); + return nullptr; +} + +std::vector<ArchSpec> PlatformQemuUser::GetSupportedArchitectures() { + llvm::Triple triple = HostInfo::GetArchitecture().GetTriple(); + triple.setEnvironment(llvm::Triple::UnknownEnvironment); + triple.setArchName(GetGlobalProperties().GetArchitecture()); + if (triple.getArch() != llvm::Triple::UnknownArch) + return {ArchSpec(triple)}; + return {}; +} + +static auto get_arg_range(const Args &args) { + return llvm::make_range(args.GetArgumentArrayRef().begin(), + args.GetArgumentArrayRef().end()); +} + +lldb::ProcessSP PlatformQemuUser::DebugProcess(ProcessLaunchInfo &launch_info, + Debugger &debugger, + Target &target, Status &error) { + Log *log = GetLogIfAnyCategoriesSet(LIBLLDB_LOG_PLATFORM); + + std::string qemu = GetGlobalProperties().GetEmulatorPath().GetPath(); + + llvm::SmallString<0> socket_model, socket_path; + HostInfo::GetProcessTempDir().GetPath(socket_model); + llvm::sys::path::append(socket_model, "qemu-%%%%%%%%.socket"); + do { + llvm::sys::fs::createUniquePath(socket_model, socket_path, false); + } while (FileSystem::Instance().Exists(socket_path)); + + Args args( + {qemu, "-g", socket_path, launch_info.GetExecutableFile().GetPath()}); + for (size_t i = 1; i < launch_info.GetArguments().size(); ++i) + args.AppendArgument(launch_info.GetArguments()[i].ref()); + + LLDB_LOG(log, "{0} -> {1}", get_arg_range(launch_info.GetArguments()), + get_arg_range(args)); + + launch_info.SetArguments(args, true); + launch_info.SetLaunchInSeparateProcessGroup(true); + launch_info.GetFlags().Clear(eLaunchFlagDebug); + launch_info.SetMonitorProcessCallback(ProcessLaunchInfo::NoOpMonitorCallback, + false); + + error = Host::LaunchProcess(launch_info); + if (error.Fail()) + return nullptr; + + ProcessSP process_sp = target.CreateProcess( + launch_info.GetListener(), + process_gdb_remote::ProcessGDBRemote::GetPluginNameStatic(), nullptr, + true); + ListenerSP listener_sp = + Listener::MakeListener("lldb.platform_qemu_user.debugprocess"); + launch_info.SetHijackListener(listener_sp); + Process::ProcessEventHijacker hijacker(*process_sp, listener_sp); + + error = process_sp->ConnectRemote(("unix-connect://" + socket_path).str()); + if (error.Fail()) + return nullptr; + + process_sp->WaitForProcessToStop(llvm::None, nullptr, false, listener_sp); + return process_sp; +} diff --git a/lldb/source/Plugins/Platform/QemuUser/PlatformQemuUser.h b/lldb/source/Plugins/Platform/QemuUser/PlatformQemuUser.h new file mode 100644 index 000000000000..f4f5d224a8cd --- /dev/null +++ b/lldb/source/Plugins/Platform/QemuUser/PlatformQemuUser.h @@ -0,0 +1,57 @@ +//===-- PlatformQemuUser.h ------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "lldb/Host/Host.h" +#include "lldb/Host/HostInfo.h" +#include "lldb/Target/Platform.h" + +namespace lldb_private { + +class PlatformQemuUser : public Platform { +public: + static void Initialize(); + static void Terminate(); + + static llvm::StringRef GetPluginNameStatic() { return "qemu-user"; } + static llvm::StringRef GetPluginDescriptionStatic(); + + llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); } + llvm::StringRef GetDescription() override { + return GetPluginDescriptionStatic(); + } + + UserIDResolver &GetUserIDResolver() override { + return HostInfo::GetUserIDResolver(); + } + + std::vector<ArchSpec> GetSupportedArchitectures() override; + + lldb::ProcessSP DebugProcess(ProcessLaunchInfo &launch_info, + Debugger &debugger, Target &target, + Status &error) override; + + lldb::ProcessSP Attach(ProcessAttachInfo &attach_info, Debugger &debugger, + Target *target, Status &status) override { + status.SetErrorString("Not supported"); + return nullptr; + } + + bool IsConnected() const override { return true; } + + void CalculateTrapHandlerSymbolNames() override {} + + Environment GetEnvironment() override { return Host::GetEnvironment(); } + +private: + static lldb::PlatformSP CreateInstance(bool force, const ArchSpec *arch); + static void DebuggerInitialize(Debugger &debugger); + + PlatformQemuUser() : Platform(/*is_host=*/false) {} +}; + +} // namespace lldb_private diff --git a/lldb/source/Plugins/Platform/QemuUser/PlatformQemuUserProperties.td b/lldb/source/Plugins/Platform/QemuUser/PlatformQemuUserProperties.td new file mode 100644 index 000000000000..abfab7f59de4 --- /dev/null +++ b/lldb/source/Plugins/Platform/QemuUser/PlatformQemuUserProperties.td @@ -0,0 +1,12 @@ +include "../../../../include/lldb/Core/PropertiesBase.td" + +let Definition = "platformqemuuser" in { + def Architecture: Property<"architecture", "String">, + Global, + DefaultStringValue<"">, + Desc<"Architecture to emulate.">; + def EmulatorPath: Property<"emulator-path", "FileSpec">, + Global, + DefaultStringValue<"">, + Desc<"Path to the emulator binary.">; +} diff --git a/lldb/source/Plugins/Process/elf-core/ProcessElfCore.cpp b/lldb/source/Plugins/Process/elf-core/ProcessElfCore.cpp index 23b346d5c17f..b852a0164375 100644 --- a/lldb/source/Plugins/Process/elf-core/ProcessElfCore.cpp +++ b/lldb/source/Plugins/Process/elf-core/ProcessElfCore.cpp @@ -281,8 +281,8 @@ size_t ProcessElfCore::ReadMemory(lldb::addr_t addr, void *buf, size_t size, return DoReadMemory(addr, buf, size, error); } -Status ProcessElfCore::DoGetMemoryRegionInfo(lldb::addr_t load_addr, - MemoryRegionInfo ®ion_info) { +Status ProcessElfCore::GetMemoryRegionInfo(lldb::addr_t load_addr, + MemoryRegionInfo ®ion_info) { region_info.Clear(); const VMRangeToPermissions::Entry *permission_entry = m_core_range_infos.FindEntryThatContainsOrFollows(load_addr); diff --git a/lldb/source/Plugins/Process/elf-core/ProcessElfCore.h b/lldb/source/Plugins/Process/elf-core/ProcessElfCore.h index fd36e5027816..67df3c5fac76 100644 --- a/lldb/source/Plugins/Process/elf-core/ProcessElfCore.h +++ b/lldb/source/Plugins/Process/elf-core/ProcessElfCore.h @@ -86,6 +86,10 @@ public: size_t DoReadMemory(lldb::addr_t addr, void *buf, size_t size, lldb_private::Status &error) override; + lldb_private::Status + GetMemoryRegionInfo(lldb::addr_t load_addr, + lldb_private::MemoryRegionInfo ®ion_info) override; + lldb::addr_t GetImageInfoAddress() override; lldb_private::ArchSpec GetArchitecture(); @@ -101,10 +105,6 @@ protected: bool DoUpdateThreadList(lldb_private::ThreadList &old_thread_list, lldb_private::ThreadList &new_thread_list) override; - lldb_private::Status - DoGetMemoryRegionInfo(lldb::addr_t load_addr, - lldb_private::MemoryRegionInfo ®ion_info) override; - private: struct NT_FILE_Entry { lldb::addr_t start; diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunication.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunication.cpp index 4ce79da48f07..25ae08838bf8 100644 --- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunication.cpp +++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunication.cpp @@ -81,11 +81,6 @@ GDBRemoteCommunication::~GDBRemoteCommunication() { if (m_decompression_scratch) free (m_decompression_scratch); #endif - - // Stop the communications read thread which is used to parse all incoming - // packets. This function will block until the read thread returns. - if (m_read_thread_enabled) - StopReadThread(); } char GDBRemoteCommunication::CalculcateChecksum(llvm::StringRef payload) { @@ -193,7 +188,7 @@ GDBRemoteCommunication::SendRawPacketNoLock(llvm::StringRef packet, GDBRemoteCommunication::PacketResult GDBRemoteCommunication::GetAck() { StringExtractorGDBRemote packet; - PacketResult result = ReadPacket(packet, GetPacketTimeout(), false); + PacketResult result = WaitForPacketNoLock(packet, GetPacketTimeout(), false); if (result == PacketResult::Success) { if (packet.GetResponseType() == StringExtractorGDBRemote::ResponseType::eAck) @@ -225,40 +220,18 @@ GDBRemoteCommunication::PacketResult GDBRemoteCommunication::ReadPacket(StringExtractorGDBRemote &response, Timeout<std::micro> timeout, bool sync_on_timeout) { - if (m_read_thread_enabled) - return PopPacketFromQueue(response, timeout); - else - return WaitForPacketNoLock(response, timeout, sync_on_timeout); -} + using ResponseType = StringExtractorGDBRemote::ResponseType; -// This function is called when a packet is requested. -// A whole packet is popped from the packet queue and returned to the caller. -// Packets are placed into this queue from the communication read thread. See -// GDBRemoteCommunication::AppendBytesToCache. -GDBRemoteCommunication::PacketResult -GDBRemoteCommunication::PopPacketFromQueue(StringExtractorGDBRemote &response, - Timeout<std::micro> timeout) { - auto pred = [&] { return !m_packet_queue.empty() && IsConnected(); }; - // lock down the packet queue - std::unique_lock<std::mutex> lock(m_packet_queue_mutex); - - if (!timeout) - m_condition_queue_not_empty.wait(lock, pred); - else { - if (!m_condition_queue_not_empty.wait_for(lock, *timeout, pred)) - return PacketResult::ErrorReplyTimeout; - if (!IsConnected()) - return PacketResult::ErrorDisconnected; + Log *log(ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(GDBR_LOG_PACKETS)); + for (;;) { + PacketResult result = + WaitForPacketNoLock(response, timeout, sync_on_timeout); + if (result != PacketResult::Success || + (response.GetResponseType() != ResponseType::eAck && + response.GetResponseType() != ResponseType::eNack)) + return result; + LLDB_LOG(log, "discarding spurious `{0}` packet", response.GetStringRef()); } - - // get the front element of the queue - response = m_packet_queue.front(); - - // remove the front element - m_packet_queue.pop(); - - // we got a packet - return PacketResult::Success; } GDBRemoteCommunication::PacketResult @@ -1287,53 +1260,6 @@ GDBRemoteCommunication::ScopedTimeout::~ScopedTimeout() { m_gdb_comm.SetPacketTimeout(m_saved_timeout); } -// This function is called via the Communications class read thread when bytes -// become available for this connection. This function will consume all -// incoming bytes and try to parse whole packets as they become available. Full -// packets are placed in a queue, so that all packet requests can simply pop -// from this queue. Async notification packets will be dispatched immediately -// to the ProcessGDBRemote Async thread via an event. -void GDBRemoteCommunication::AppendBytesToCache(const uint8_t *bytes, - size_t len, bool broadcast, - lldb::ConnectionStatus status) { - StringExtractorGDBRemote packet; - - while (true) { - PacketType type = CheckForPacket(bytes, len, packet); - - // scrub the data so we do not pass it back to CheckForPacket on future - // passes of the loop - bytes = nullptr; - len = 0; - - // we may have received no packet so lets bail out - if (type == PacketType::Invalid) - break; - - if (type == PacketType::Standard) { - // scope for the mutex - { - // lock down the packet queue - std::lock_guard<std::mutex> guard(m_packet_queue_mutex); - // push a new packet into the queue - m_packet_queue.push(packet); - // Signal condition variable that we have a packet - m_condition_queue_not_empty.notify_one(); - } - } - - if (type == PacketType::Notify) { - // put this packet into an event - const char *pdata = packet.GetStringRef().data(); - - // as the communication class, we are a broadcaster and the async thread - // is tuned to listen to us - BroadcastEvent(eBroadcastBitGdbReadThreadGotNotify, - new EventDataBytes(pdata)); - } - } -} - void llvm::format_provider<GDBRemoteCommunication::PacketResult>::format( const GDBRemoteCommunication::PacketResult &result, raw_ostream &Stream, StringRef Style) { diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunication.h b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunication.h index 5da568e9b4d4..afc7e740d4c9 100644 --- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunication.h +++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunication.h @@ -84,8 +84,6 @@ class GDBRemoteCommunication : public Communication { public: enum { eBroadcastBitRunPacketSent = kLoUserBroadcastBit, - eBroadcastBitGdbReadThreadGotNotify = - kLoUserBroadcastBit << 1 // Sent when we received a notify packet. }; enum class PacketType { Invalid = 0, Standard, Notify }; @@ -196,10 +194,6 @@ protected: bool sync_on_timeout, llvm::function_ref<void(llvm::StringRef)> output_callback); - // Pop a packet from the queue in a thread safe manner - PacketResult PopPacketFromQueue(StringExtractorGDBRemote &response, - Timeout<std::micro> timeout); - PacketResult WaitForPacketNoLock(StringExtractorGDBRemote &response, Timeout<std::micro> timeout, bool sync_on_timeout); @@ -226,24 +220,7 @@ protected: static lldb::thread_result_t ListenThread(lldb::thread_arg_t arg); - // GDB-Remote read thread - // . this thread constantly tries to read from the communication - // class and stores all packets received in a queue. The usual - // threads read requests simply pop packets off the queue in the - // usual order. - // This setup allows us to intercept and handle async packets, such - // as the notify packet. - - // This method is defined as part of communication.h - // when the read thread gets any bytes it will pass them on to this function - void AppendBytesToCache(const uint8_t *bytes, size_t len, bool broadcast, - lldb::ConnectionStatus status) override; - private: - std::queue<StringExtractorGDBRemote> m_packet_queue; // The packet queue - std::mutex m_packet_queue_mutex; // Mutex for accessing queue - std::condition_variable - m_condition_queue_not_empty; // Condition variable to wait for packets // Promise used to grab the port number from listening thread std::promise<uint16_t> m_port_promise; diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp index 78e722eee080..07dfa5e04ee5 100644 --- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp +++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp @@ -86,13 +86,6 @@ bool GDBRemoteCommunicationClient::HandshakeWithServer(Status *error_ptr) { std::chrono::steady_clock::time_point start_of_handshake = std::chrono::steady_clock::now(); if (SendAck()) { - // Wait for any responses that might have been queued up in the remote - // GDB server and flush them all - StringExtractorGDBRemote response; - PacketResult packet_result = PacketResult::Success; - while (packet_result == PacketResult::Success) - packet_result = ReadPacket(response, milliseconds(10), false); - // The return value from QueryNoAckModeSupported() is true if the packet // was sent and _any_ response (including UNIMPLEMENTED) was received), or // false if no response was received. This quickly tells us if we have a @@ -106,17 +99,15 @@ bool GDBRemoteCommunicationClient::HandshakeWithServer(Status *error_ptr) { std::chrono::duration<double>(end_of_handshake - start_of_handshake) .count(); if (error_ptr) { - if (packet_result == PacketResult::ErrorDisconnected) + if (!IsConnected()) error_ptr->SetErrorString("Connection shut down by remote side " "while waiting for reply to initial " "handshake packet"); - else if (packet_result == PacketResult::ErrorReplyTimeout) + else error_ptr->SetErrorStringWithFormat( "failed to get reply to handshake packet within timeout of " "%.1f seconds", handshake_timeout); - else - error_ptr->SetErrorString("failed to get reply to handshake packet"); } } } else { diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServer.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServer.cpp index 11cac9fa3a4d..49d88b72b01b 100644 --- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServer.cpp +++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServer.cpp @@ -46,7 +46,7 @@ GDBRemoteCommunicationServer::GetPacketAndSendResponse( Timeout<std::micro> timeout, Status &error, bool &interrupt, bool &quit) { StringExtractorGDBRemote packet; - PacketResult packet_result = WaitForPacketNoLock(packet, timeout, false); + PacketResult packet_result = ReadPacket(packet, timeout, false); if (packet_result == PacketResult::Success) { const StringExtractorGDBRemote::ServerPacketType packet_type = packet.GetServerPacketType(); @@ -150,10 +150,6 @@ GDBRemoteCommunicationServer::SendOKResponse() { return SendPacketNoLock("OK"); } -bool GDBRemoteCommunicationServer::HandshakeWithClient() { - return GetAck() == PacketResult::Success; -} - GDBRemoteCommunication::PacketResult GDBRemoteCommunicationServer::SendJSONResponse(const json::Value &value) { std::string json_string; diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServer.h b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServer.h index 68448eae2b9f..5de344061ec9 100644 --- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServer.h +++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServer.h @@ -44,10 +44,6 @@ public: Status &error, bool &interrupt, bool &quit); - // After connecting, do a little handshake with the client to make sure - // we are at least communicating - bool HandshakeWithClient(); - protected: std::map<StringExtractorGDBRemote::ServerPacketType, PacketHandler> m_packet_handlers; diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.cpp index 5360db3d8462..30f14a52dfb5 100644 --- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.cpp +++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.cpp @@ -1088,18 +1088,6 @@ void GDBRemoteCommunicationServerLLGS::NewSubprocess( void GDBRemoteCommunicationServerLLGS::DataAvailableCallback() { Log *log(GetLogIfAnyCategoriesSet(GDBR_LOG_COMM)); - if (!m_handshake_completed) { - if (!HandshakeWithClient()) { - LLDB_LOGF(log, - "GDBRemoteCommunicationServerLLGS::%s handshake with " - "client failed, exiting", - __FUNCTION__); - m_mainloop.RequestTermination(); - return; - } - m_handshake_completed = true; - } - bool interrupt = false; bool done = false; Status error; diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.h b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.h index 6c75771f6427..17ee4130dc34 100644 --- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.h +++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.h @@ -104,7 +104,6 @@ protected: std::mutex m_saved_registers_mutex; std::unordered_map<uint32_t, lldb::DataBufferSP> m_saved_registers_map; uint32_t m_next_saved_registers_id = 1; - bool m_handshake_completed = false; bool m_thread_suffix_supported = false; bool m_list_threads_in_stop_reply = false; diff --git a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp index 2233bf675819..3ade8c815feb 100644 --- a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp +++ b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp @@ -282,9 +282,7 @@ ProcessGDBRemote::ProcessGDBRemote(lldb::TargetSP target_sp, __FUNCTION__); } - const uint32_t gdb_event_mask = - Communication::eBroadcastBitReadThreadDidExit | - GDBRemoteCommunication::eBroadcastBitGdbReadThreadGotNotify; + const uint32_t gdb_event_mask = Communication::eBroadcastBitReadThreadDidExit; if (m_async_listener_sp->StartListeningForEvents( &m_gdb_comm, gdb_event_mask) != gdb_event_mask) { LLDB_LOGF(log, @@ -1324,24 +1322,6 @@ Status ProcessGDBRemote::DoResume() { return error; } -void ProcessGDBRemote::HandleStopReplySequence() { - while (true) { - // Send vStopped - StringExtractorGDBRemote response; - m_gdb_comm.SendPacketAndWaitForResponse("vStopped", response); - - // OK represents end of signal list - if (response.IsOKResponse()) - break; - - // If not OK or a normal packet we have a problem - if (!response.IsNormalResponse()) - break; - - SetLastStopPacket(response); - } -} - void ProcessGDBRemote::ClearThreadIDList() { std::lock_guard<std::recursive_mutex> guard(m_thread_list_real.GetMutex()); m_thread_ids.clear(); @@ -2897,8 +2877,8 @@ lldb::addr_t ProcessGDBRemote::DoAllocateMemory(size_t size, return allocated_addr; } -Status ProcessGDBRemote::DoGetMemoryRegionInfo(addr_t load_addr, - MemoryRegionInfo ®ion_info) { +Status ProcessGDBRemote::GetMemoryRegionInfo(addr_t load_addr, + MemoryRegionInfo ®ion_info) { Status error(m_gdb_comm.GetMemoryRegionInfo(load_addr, region_info)); return error; @@ -3539,31 +3519,6 @@ void ProcessGDBRemote::StopAsyncThread() { __FUNCTION__); } -bool ProcessGDBRemote::HandleNotifyPacket(StringExtractorGDBRemote &packet) { - // get the packet at a string - const std::string &pkt = std::string(packet.GetStringRef()); - // skip %stop: - StringExtractorGDBRemote stop_info(pkt.c_str() + 5); - - // pass as a thread stop info packet - SetLastStopPacket(stop_info); - - // check for more stop reasons - HandleStopReplySequence(); - - // if the process is stopped then we need to fake a resume so that we can - // stop properly with the new break. This is possible due to - // SetPrivateState() broadcasting the state change as a side effect. - if (GetPrivateState() == lldb::StateType::eStateStopped) { - SetPrivateState(lldb::StateType::eStateRunning); - } - - // since we have some stopped packets we can halt the process - SetPrivateState(lldb::StateType::eStateStopped); - - return true; -} - thread_result_t ProcessGDBRemote::AsyncThread(void *arg) { ProcessGDBRemote *process = (ProcessGDBRemote *)arg; @@ -3712,17 +3667,6 @@ thread_result_t ProcessGDBRemote::AsyncThread(void *arg) { done = true; break; - case GDBRemoteCommunication::eBroadcastBitGdbReadThreadGotNotify: { - lldb_private::Event *event = event_sp.get(); - const EventDataBytes *continue_packet = - EventDataBytes::GetEventDataFromEvent(event); - StringExtractorGDBRemote notify( - (const char *)continue_packet->GetBytes()); - // Hand this over to the process to handle - process->HandleNotifyPacket(notify); - break; - } - default: LLDB_LOGF(log, "ProcessGDBRemote::%s (arg = %p, pid = %" PRIu64 diff --git a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.h b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.h index 8134bc6b530d..488336b8c1b8 100644 --- a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.h +++ b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.h @@ -144,6 +144,9 @@ public: lldb::addr_t DoAllocateMemory(size_t size, uint32_t permissions, Status &error) override; + Status GetMemoryRegionInfo(lldb::addr_t load_addr, + MemoryRegionInfo ®ion_info) override; + Status DoDeallocateMemory(lldb::addr_t ptr) override; // Process STDIO @@ -343,8 +346,6 @@ protected: size_t UpdateThreadIDsFromStopReplyThreadsValue(llvm::StringRef value); - bool HandleNotifyPacket(StringExtractorGDBRemote &packet); - bool StartAsyncThread(); void StopAsyncThread(); @@ -375,8 +376,6 @@ protected: lldb::addr_t dispatch_queue_t, std::string &queue_name, lldb::QueueKind queue_kind, uint64_t queue_serial); - void HandleStopReplySequence(); - void ClearThreadIDList(); bool UpdateThreadIDList(); @@ -421,9 +420,6 @@ protected: Status DoWriteMemoryTags(lldb::addr_t addr, size_t len, int32_t type, const std::vector<uint8_t> &tags) override; - Status DoGetMemoryRegionInfo(lldb::addr_t load_addr, - MemoryRegionInfo ®ion_info) override; - private: // For ProcessGDBRemote only std::string m_partial_profile_data; diff --git a/lldb/source/Plugins/Process/minidump/ProcessMinidump.cpp b/lldb/source/Plugins/Process/minidump/ProcessMinidump.cpp index 736cfa070088..37ee5466c5b9 100644 --- a/lldb/source/Plugins/Process/minidump/ProcessMinidump.cpp +++ b/lldb/source/Plugins/Process/minidump/ProcessMinidump.cpp @@ -73,7 +73,7 @@ public: bool IsExecutable() const override { return false; } ArchSpec GetArchitecture() override { return m_arch; } UUID GetUUID() override { return m_uuid; } - Symtab *GetSymtab() override { return m_symtab_up.get(); } + void ParseSymtab(lldb_private::Symtab &symtab) override {} bool IsStripped() override { return true; } ByteOrder GetByteOrder() const override { return m_arch.GetByteOrder(); } @@ -439,8 +439,8 @@ void ProcessMinidump::BuildMemoryRegions() { llvm::sort(*m_memory_regions); } -Status ProcessMinidump::DoGetMemoryRegionInfo(lldb::addr_t load_addr, - MemoryRegionInfo ®ion) { +Status ProcessMinidump::GetMemoryRegionInfo(lldb::addr_t load_addr, + MemoryRegionInfo ®ion) { BuildMemoryRegions(); region = MinidumpParser::GetMemoryRegionInfo(*m_memory_regions, load_addr); return Status(); diff --git a/lldb/source/Plugins/Process/minidump/ProcessMinidump.h b/lldb/source/Plugins/Process/minidump/ProcessMinidump.h index 5360269199cd..3501d38a0f27 100644 --- a/lldb/source/Plugins/Process/minidump/ProcessMinidump.h +++ b/lldb/source/Plugins/Process/minidump/ProcessMinidump.h @@ -75,6 +75,9 @@ public: ArchSpec GetArchitecture(); + Status GetMemoryRegionInfo(lldb::addr_t load_addr, + MemoryRegionInfo &range_info) override; + Status GetMemoryRegions( lldb_private::MemoryRegionInfos ®ion_list) override; @@ -95,9 +98,6 @@ protected: bool DoUpdateThreadList(ThreadList &old_thread_list, ThreadList &new_thread_list) override; - Status DoGetMemoryRegionInfo(lldb::addr_t load_addr, - MemoryRegionInfo &range_info) override; - void ReadModuleList(); lldb::ModuleSP GetOrCreateModule(lldb_private::UUID minidump_uuid, diff --git a/lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp b/lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp index 15d3d43d9993..c1b7294a7f58 100644 --- a/lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp +++ b/lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp @@ -248,8 +248,8 @@ ArchSpec ScriptedProcess::GetArchitecture() { return GetTarget().GetArchitecture(); } -Status ScriptedProcess::DoGetMemoryRegionInfo(lldb::addr_t load_addr, - MemoryRegionInfo ®ion) { +Status ScriptedProcess::GetMemoryRegionInfo(lldb::addr_t load_addr, + MemoryRegionInfo ®ion) { CheckInterpreterAndScriptObject(); Status error; diff --git a/lldb/source/Plugins/Process/scripted/ScriptedProcess.h b/lldb/source/Plugins/Process/scripted/ScriptedProcess.h index c8355f35548a..d56658a2e48a 100644 --- a/lldb/source/Plugins/Process/scripted/ScriptedProcess.h +++ b/lldb/source/Plugins/Process/scripted/ScriptedProcess.h @@ -84,6 +84,9 @@ public: ArchSpec GetArchitecture(); + Status GetMemoryRegionInfo(lldb::addr_t load_addr, + MemoryRegionInfo &range_info) override; + Status GetMemoryRegions(lldb_private::MemoryRegionInfos ®ion_list) override; @@ -97,9 +100,6 @@ protected: bool DoUpdateThreadList(ThreadList &old_thread_list, ThreadList &new_thread_list) override; - Status DoGetMemoryRegionInfo(lldb::addr_t load_addr, - MemoryRegionInfo &range_info) override; - private: friend class ScriptedThread; diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/SWIGPythonBridge.h b/lldb/source/Plugins/ScriptInterpreter/Python/SWIGPythonBridge.h index 798d947a0a7d..c7af13598843 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/SWIGPythonBridge.h +++ b/lldb/source/Plugins/ScriptInterpreter/Python/SWIGPythonBridge.h @@ -15,8 +15,12 @@ #if LLDB_ENABLE_PYTHON +// LLDB Python header must be included first +#include "lldb-python.h" + #include "lldb/lldb-forward.h" #include "lldb/lldb-types.h" +#include "llvm/Support/Error.h" namespace lldb_private { @@ -41,20 +45,148 @@ template <> const char *GetPythonValueFormatString(unsigned long long); template <> const char *GetPythonValueFormatString(float t); template <> const char *GetPythonValueFormatString(double t); -extern "C" void *LLDBSwigPythonCreateScriptedProcess( +void *LLDBSWIGPython_CastPyObjectToSBData(PyObject *data); +void *LLDBSWIGPython_CastPyObjectToSBError(PyObject *data); +void *LLDBSWIGPython_CastPyObjectToSBValue(PyObject *data); +void *LLDBSWIGPython_CastPyObjectToSBMemoryRegionInfo(PyObject *data); + +// These prototypes are the Pythonic implementations of the required callbacks. +// Although these are scripting-language specific, their definition depends on +// the public API. + +void *LLDBSwigPythonCreateScriptedProcess(const char *python_class_name, + const char *session_dictionary_name, + const lldb::TargetSP &target_sp, + StructuredDataImpl *args_impl, + std::string &error_string); + +void *LLDBSwigPythonCreateScriptedThread(const char *python_class_name, + const char *session_dictionary_name, + const lldb::ProcessSP &process_sp, + StructuredDataImpl *args_impl, + std::string &error_string); + +llvm::Expected<bool> LLDBSwigPythonBreakpointCallbackFunction( + const char *python_function_name, const char *session_dictionary_name, + const lldb::StackFrameSP &sb_frame, + const lldb::BreakpointLocationSP &sb_bp_loc, + lldb_private::StructuredDataImpl *args_impl); + +bool LLDBSwigPythonWatchpointCallbackFunction( + const char *python_function_name, const char *session_dictionary_name, + const lldb::StackFrameSP &sb_frame, const lldb::WatchpointSP &sb_wp); + +bool LLDBSwigPythonCallTypeScript(const char *python_function_name, + const void *session_dictionary, + const lldb::ValueObjectSP &valobj_sp, + void **pyfunct_wrapper, + const lldb::TypeSummaryOptionsSP &options_sp, + std::string &retval); + +void * +LLDBSwigPythonCreateSyntheticProvider(const char *python_class_name, + const char *session_dictionary_name, + const lldb::ValueObjectSP &valobj_sp); + +void *LLDBSwigPythonCreateCommandObject(const char *python_class_name, + const char *session_dictionary_name, + const lldb::DebuggerSP debugger_sp); + +void *LLDBSwigPythonCreateScriptedThreadPlan( const char *python_class_name, const char *session_dictionary_name, - const lldb::TargetSP &target_sp, StructuredDataImpl *args_impl, - std::string &error_string); + lldb_private::StructuredDataImpl *args_data, std::string &error_string, + const lldb::ThreadPlanSP &thread_plan_sp); -extern "C" void *LLDBSwigPythonCreateScriptedThread( +bool LLDBSWIGPythonCallThreadPlan(void *implementor, const char *method_name, + lldb_private::Event *event_sp, + bool &got_error); + +void *LLDBSwigPythonCreateScriptedBreakpointResolver( const char *python_class_name, const char *session_dictionary_name, - const lldb::ProcessSP &process_sp, StructuredDataImpl *args_impl, - std::string &error_string); + lldb_private::StructuredDataImpl *args, const lldb::BreakpointSP &bkpt_sp); + +unsigned int +LLDBSwigPythonCallBreakpointResolver(void *implementor, const char *method_name, + lldb_private::SymbolContext *sym_ctx); + +void *LLDBSwigPythonCreateScriptedStopHook( + lldb::TargetSP target_sp, const char *python_class_name, + const char *session_dictionary_name, lldb_private::StructuredDataImpl *args, + lldb_private::Status &error); + +bool LLDBSwigPythonStopHookCallHandleStop(void *implementor, + lldb::ExecutionContextRefSP exc_ctx, + lldb::StreamSP stream); + +size_t LLDBSwigPython_CalculateNumChildren(PyObject *implementor, uint32_t max); + +PyObject *LLDBSwigPython_GetChildAtIndex(PyObject *implementor, uint32_t idx); + +int LLDBSwigPython_GetIndexOfChildWithName(PyObject *implementor, + const char *child_name); + +lldb::ValueObjectSP LLDBSWIGPython_GetValueObjectSPFromSBValue(void *data); + +bool LLDBSwigPython_UpdateSynthProviderInstance(PyObject *implementor); + +bool LLDBSwigPython_MightHaveChildrenSynthProviderInstance( + PyObject *implementor); + +PyObject *LLDBSwigPython_GetValueSynthProviderInstance(PyObject *implementor); + +bool LLDBSwigPythonCallCommand(const char *python_function_name, + const char *session_dictionary_name, + lldb::DebuggerSP &debugger, const char *args, + lldb_private::CommandReturnObject &cmd_retobj, + lldb::ExecutionContextRefSP exe_ctx_ref_sp); + +bool LLDBSwigPythonCallCommandObject( + PyObject *implementor, lldb::DebuggerSP &debugger, const char *args, + lldb_private::CommandReturnObject &cmd_retobj, + lldb::ExecutionContextRefSP exe_ctx_ref_sp); + +bool LLDBSwigPythonCallModuleInit(const char *python_module_name, + const char *session_dictionary_name, + lldb::DebuggerSP &debugger); + +void *LLDBSWIGPythonCreateOSPlugin(const char *python_class_name, + const char *session_dictionary_name, + const lldb::ProcessSP &process_sp); + +void *LLDBSWIGPython_CreateFrameRecognizer(const char *python_class_name, + const char *session_dictionary_name); + +PyObject * +LLDBSwigPython_GetRecognizedArguments(PyObject *implementor, + const lldb::StackFrameSP &frame_sp); + +bool LLDBSWIGPythonRunScriptKeywordProcess(const char *python_function_name, + const char *session_dictionary_name, + const lldb::ProcessSP &process, + std::string &output); + +bool LLDBSWIGPythonRunScriptKeywordThread(const char *python_function_name, + const char *session_dictionary_name, + lldb::ThreadSP &thread, + std::string &output); + +bool LLDBSWIGPythonRunScriptKeywordTarget(const char *python_function_name, + const char *session_dictionary_name, + const lldb::TargetSP &target, + std::string &output); + +bool LLDBSWIGPythonRunScriptKeywordFrame(const char *python_function_name, + const char *session_dictionary_name, + lldb::StackFrameSP &frame, + std::string &output); + +bool LLDBSWIGPythonRunScriptKeywordValue(const char *python_function_name, + const char *session_dictionary_name, + const lldb::ValueObjectSP &value, + std::string &output); -extern "C" void *LLDBSWIGPython_CastPyObjectToSBData(void *data); -extern "C" void *LLDBSWIGPython_CastPyObjectToSBError(void *data); -extern "C" void *LLDBSWIGPython_CastPyObjectToSBValue(void *data); -extern "C" void *LLDBSWIGPython_CastPyObjectToSBMemoryRegionInfo(void *data); +void *LLDBSWIGPython_GetDynamicSetting(void *module, const char *setting, + const lldb::TargetSP &target_sp); } // namespace lldb_private diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp index c1f4c2d3b4d3..5f282d74e364 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp +++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp @@ -70,153 +70,6 @@ extern "C" void init_lldb(void); #define LLDBSwigPyInit init_lldb #endif -// These prototypes are the Pythonic implementations of the required callbacks. -// Although these are scripting-language specific, their definition depends on -// the public API. - -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wreturn-type-c-linkage" - -// Disable warning C4190: 'LLDBSwigPythonBreakpointCallbackFunction' has -// C-linkage specified, but returns UDT 'llvm::Expected<bool>' which is -// incompatible with C -#if _MSC_VER -#pragma warning (push) -#pragma warning (disable : 4190) -#endif - -extern "C" llvm::Expected<bool> LLDBSwigPythonBreakpointCallbackFunction( - const char *python_function_name, const char *session_dictionary_name, - const lldb::StackFrameSP &sb_frame, - const lldb::BreakpointLocationSP &sb_bp_loc, StructuredDataImpl *args_impl); - -#if _MSC_VER -#pragma warning (pop) -#endif - -#pragma clang diagnostic pop - -extern "C" bool LLDBSwigPythonWatchpointCallbackFunction( - const char *python_function_name, const char *session_dictionary_name, - const lldb::StackFrameSP &sb_frame, const lldb::WatchpointSP &sb_wp); - -extern "C" bool LLDBSwigPythonCallTypeScript( - const char *python_function_name, void *session_dictionary, - const lldb::ValueObjectSP &valobj_sp, void **pyfunct_wrapper, - const lldb::TypeSummaryOptionsSP &options_sp, std::string &retval); - -extern "C" void * -LLDBSwigPythonCreateSyntheticProvider(const char *python_class_name, - const char *session_dictionary_name, - const lldb::ValueObjectSP &valobj_sp); - -extern "C" void * -LLDBSwigPythonCreateCommandObject(const char *python_class_name, - const char *session_dictionary_name, - const lldb::DebuggerSP debugger_sp); - -extern "C" void *LLDBSwigPythonCreateScriptedThreadPlan( - const char *python_class_name, const char *session_dictionary_name, - StructuredDataImpl *args_data, - std::string &error_string, - const lldb::ThreadPlanSP &thread_plan_sp); - -extern "C" bool LLDBSWIGPythonCallThreadPlan(void *implementor, - const char *method_name, - Event *event_sp, bool &got_error); - -extern "C" void *LLDBSwigPythonCreateScriptedBreakpointResolver( - const char *python_class_name, const char *session_dictionary_name, - lldb_private::StructuredDataImpl *args, lldb::BreakpointSP &bkpt_sp); - -extern "C" unsigned int -LLDBSwigPythonCallBreakpointResolver(void *implementor, const char *method_name, - lldb_private::SymbolContext *sym_ctx); - -extern "C" void *LLDBSwigPythonCreateScriptedStopHook( - TargetSP target_sp, const char *python_class_name, - const char *session_dictionary_name, lldb_private::StructuredDataImpl *args, - lldb_private::Status &error); - -extern "C" bool -LLDBSwigPythonStopHookCallHandleStop(void *implementor, - lldb::ExecutionContextRefSP exc_ctx, - lldb::StreamSP stream); - -extern "C" size_t LLDBSwigPython_CalculateNumChildren(void *implementor, - uint32_t max); - -extern "C" void *LLDBSwigPython_GetChildAtIndex(void *implementor, - uint32_t idx); - -extern "C" int LLDBSwigPython_GetIndexOfChildWithName(void *implementor, - const char *child_name); - -extern lldb::ValueObjectSP -LLDBSWIGPython_GetValueObjectSPFromSBValue(void *data); - -extern "C" bool LLDBSwigPython_UpdateSynthProviderInstance(void *implementor); - -extern "C" bool -LLDBSwigPython_MightHaveChildrenSynthProviderInstance(void *implementor); - -extern "C" void * -LLDBSwigPython_GetValueSynthProviderInstance(void *implementor); - -extern "C" bool -LLDBSwigPythonCallCommand(const char *python_function_name, - const char *session_dictionary_name, - lldb::DebuggerSP &debugger, const char *args, - lldb_private::CommandReturnObject &cmd_retobj, - lldb::ExecutionContextRefSP exe_ctx_ref_sp); - -extern "C" bool -LLDBSwigPythonCallCommandObject(void *implementor, lldb::DebuggerSP &debugger, - const char *args, - lldb_private::CommandReturnObject &cmd_retobj, - lldb::ExecutionContextRefSP exe_ctx_ref_sp); - -extern "C" bool -LLDBSwigPythonCallModuleInit(const char *python_module_name, - const char *session_dictionary_name, - lldb::DebuggerSP &debugger); - -extern "C" void * -LLDBSWIGPythonCreateOSPlugin(const char *python_class_name, - const char *session_dictionary_name, - const lldb::ProcessSP &process_sp); - -extern "C" void * -LLDBSWIGPython_CreateFrameRecognizer(const char *python_class_name, - const char *session_dictionary_name); - -extern "C" void * -LLDBSwigPython_GetRecognizedArguments(void *implementor, - const lldb::StackFrameSP &frame_sp); - -extern "C" bool LLDBSWIGPythonRunScriptKeywordProcess( - const char *python_function_name, const char *session_dictionary_name, - lldb::ProcessSP &process, std::string &output); - -extern "C" bool LLDBSWIGPythonRunScriptKeywordThread( - const char *python_function_name, const char *session_dictionary_name, - lldb::ThreadSP &thread, std::string &output); - -extern "C" bool LLDBSWIGPythonRunScriptKeywordTarget( - const char *python_function_name, const char *session_dictionary_name, - lldb::TargetSP &target, std::string &output); - -extern "C" bool LLDBSWIGPythonRunScriptKeywordFrame( - const char *python_function_name, const char *session_dictionary_name, - lldb::StackFrameSP &frame, std::string &output); - -extern "C" bool LLDBSWIGPythonRunScriptKeywordValue( - const char *python_function_name, const char *session_dictionary_name, - lldb::ValueObjectSP &value, std::string &output); - -extern "C" void * -LLDBSWIGPython_GetDynamicSetting(void *module, const char *setting, - const lldb::TargetSP &target_sp); static ScriptInterpreterPythonImpl *GetPythonInterpreter(Debugger &debugger) { ScriptInterpreter *script_interpreter = @@ -1591,9 +1444,9 @@ lldb::ValueObjectListSP ScriptInterpreterPythonImpl::GetRecognizedArguments( if (!implementor.IsAllocated()) return ValueObjectListSP(); - PythonObject py_return(PyRefType::Owned, - (PyObject *)LLDBSwigPython_GetRecognizedArguments( - implementor.get(), frame_sp)); + PythonObject py_return( + PyRefType::Owned, + LLDBSwigPython_GetRecognizedArguments(implementor.get(), frame_sp)); // if it fails, print the error but otherwise go on if (PyErr_Occurred()) { @@ -2423,7 +2276,7 @@ size_t ScriptInterpreterPythonImpl::CalculateNumChildren( StructuredData::Generic *generic = implementor_sp->GetAsGeneric(); if (!generic) return 0; - void *implementor = generic->GetValue(); + auto *implementor = static_cast<PyObject *>(generic->GetValue()); if (!implementor) return 0; @@ -2446,7 +2299,7 @@ lldb::ValueObjectSP ScriptInterpreterPythonImpl::GetChildAtIndex( StructuredData::Generic *generic = implementor_sp->GetAsGeneric(); if (!generic) return lldb::ValueObjectSP(); - void *implementor = generic->GetValue(); + auto *implementor = static_cast<PyObject *>(generic->GetValue()); if (!implementor) return lldb::ValueObjectSP(); @@ -2454,7 +2307,7 @@ lldb::ValueObjectSP ScriptInterpreterPythonImpl::GetChildAtIndex( { Locker py_lock(this, Locker::AcquireLock | Locker::InitSession | Locker::NoSTDIN); - void *child_ptr = LLDBSwigPython_GetChildAtIndex(implementor, idx); + PyObject *child_ptr = LLDBSwigPython_GetChildAtIndex(implementor, idx); if (child_ptr != nullptr && child_ptr != Py_None) { lldb::SBValue *sb_value_ptr = (lldb::SBValue *)LLDBSWIGPython_CastPyObjectToSBValue(child_ptr); @@ -2478,7 +2331,7 @@ int ScriptInterpreterPythonImpl::GetIndexOfChildWithName( StructuredData::Generic *generic = implementor_sp->GetAsGeneric(); if (!generic) return UINT32_MAX; - void *implementor = generic->GetValue(); + auto *implementor = static_cast<PyObject *>(generic->GetValue()); if (!implementor) return UINT32_MAX; @@ -2503,7 +2356,7 @@ bool ScriptInterpreterPythonImpl::UpdateSynthProviderInstance( StructuredData::Generic *generic = implementor_sp->GetAsGeneric(); if (!generic) return ret_val; - void *implementor = generic->GetValue(); + auto *implementor = static_cast<PyObject *>(generic->GetValue()); if (!implementor) return ret_val; @@ -2526,7 +2379,7 @@ bool ScriptInterpreterPythonImpl::MightHaveChildrenSynthProviderInstance( StructuredData::Generic *generic = implementor_sp->GetAsGeneric(); if (!generic) return ret_val; - void *implementor = generic->GetValue(); + auto *implementor = static_cast<PyObject *>(generic->GetValue()); if (!implementor) return ret_val; @@ -2550,14 +2403,15 @@ lldb::ValueObjectSP ScriptInterpreterPythonImpl::GetSyntheticValue( StructuredData::Generic *generic = implementor_sp->GetAsGeneric(); if (!generic) return ret_val; - void *implementor = generic->GetValue(); + auto *implementor = static_cast<PyObject *>(generic->GetValue()); if (!implementor) return ret_val; { Locker py_lock(this, Locker::AcquireLock | Locker::InitSession | Locker::NoSTDIN); - void *child_ptr = LLDBSwigPython_GetValueSynthProviderInstance(implementor); + PyObject *child_ptr = + LLDBSwigPython_GetValueSynthProviderInstance(implementor); if (child_ptr != nullptr && child_ptr != Py_None) { lldb::SBValue *sb_value_ptr = (lldb::SBValue *)LLDBSWIGPython_CastPyObjectToSBValue(child_ptr); @@ -2653,11 +2507,11 @@ bool ScriptInterpreterPythonImpl::RunScriptFormatKeyword( } { - ProcessSP process_sp(process->shared_from_this()); Locker py_lock(this, Locker::AcquireLock | Locker::InitSession | Locker::NoSTDIN); ret_val = LLDBSWIGPythonRunScriptKeywordProcess( - impl_function, m_dictionary_name.c_str(), process_sp, output); + impl_function, m_dictionary_name.c_str(), process->shared_from_this(), + output); if (!ret_val) error.SetErrorString("python script evaluation failed"); } @@ -2753,11 +2607,10 @@ bool ScriptInterpreterPythonImpl::RunScriptFormatKeyword( } { - ValueObjectSP value_sp(value->GetSP()); Locker py_lock(this, Locker::AcquireLock | Locker::InitSession | Locker::NoSTDIN); ret_val = LLDBSWIGPythonRunScriptKeywordValue( - impl_function, m_dictionary_name.c_str(), value_sp, output); + impl_function, m_dictionary_name.c_str(), value->GetSP(), output); if (!ret_val) error.SetErrorString("python script evaluation failed"); } @@ -3076,9 +2929,9 @@ bool ScriptInterpreterPythonImpl::RunScriptBasedCommand( SynchronicityHandler synch_handler(debugger_sp, synchronicity); std::string args_str = args.str(); - ret_val = LLDBSwigPythonCallCommandObject(impl_obj_sp->GetValue(), - debugger_sp, args_str.c_str(), - cmd_retobj, exe_ctx_ref_sp); + ret_val = LLDBSwigPythonCallCommandObject( + static_cast<PyObject *>(impl_obj_sp->GetValue()), debugger_sp, + args_str.c_str(), cmd_retobj, exe_ctx_ref_sp); } if (!ret_val) diff --git a/lldb/source/Plugins/SymbolFile/Breakpad/SymbolFileBreakpad.cpp b/lldb/source/Plugins/SymbolFile/Breakpad/SymbolFileBreakpad.cpp index b07674af3bd9..9d23f1baf931 100644 --- a/lldb/source/Plugins/SymbolFile/Breakpad/SymbolFileBreakpad.cpp +++ b/lldb/source/Plugins/SymbolFile/Breakpad/SymbolFileBreakpad.cpp @@ -500,7 +500,7 @@ void SymbolFileBreakpad::AddSymbols(Symtab &symtab) { for (Symbol &symbol : symbols) symtab.AddSymbol(std::move(symbol)); - symtab.CalculateSymbolSizes(); + symtab.Finalize(); } llvm::Expected<lldb::addr_t> @@ -927,4 +927,3 @@ uint64_t SymbolFileBreakpad::GetDebugInfoSize() { // Breakpad files are all debug info. return m_objfile_sp->GetByteSize(); } - diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp index 2dd7ae60b231..8c20244a6c44 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp @@ -2067,6 +2067,13 @@ uint32_t SymbolFileDWARF::ResolveSymbolContext( } void SymbolFileDWARF::PreloadSymbols() { + // Get the symbol table for the symbol file prior to taking the module lock + // so that it is available without needing to take the module lock. The DWARF + // indexing might end up needing to relocate items when DWARF sections are + // loaded as they might end up getting the section contents which can call + // ObjectFileELF::RelocateSection() which in turn will ask for the symbol + // table and can cause deadlocks. + GetSymtab(); std::lock_guard<std::recursive_mutex> guard(GetModuleMutex()); m_index->Preload(); } @@ -3271,15 +3278,14 @@ VariableSP SymbolFileDWARF::ParseVariableDIE(const SymbolContext &sc, } const DWARFDIE parent_context_die = GetDeclContextDIEContainingDIE(die); - const dw_tag_t parent_tag = die.GetParent().Tag(); + const DWARFDIE sc_parent_die = GetParentSymbolContextDIE(die); + const dw_tag_t parent_tag = sc_parent_die.Tag(); bool is_static_member = (parent_tag == DW_TAG_compile_unit || parent_tag == DW_TAG_partial_unit) && (parent_context_die.Tag() == DW_TAG_class_type || parent_context_die.Tag() == DW_TAG_structure_type); ValueType scope = eValueTypeInvalid; - - const DWARFDIE sc_parent_die = GetParentSymbolContextDIE(die); SymbolContextScope *symbol_context_scope = nullptr; bool has_explicit_mangled = mangled != nullptr; diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp index 8af90cb66e87..bf101ac1acf1 100644 --- a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp +++ b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp @@ -1182,8 +1182,9 @@ void SymbolFileNativePDB::FindFunctions( FunctionNameType name_type_mask, bool include_inlines, SymbolContextList &sc_list) { std::lock_guard<std::recursive_mutex> guard(GetModuleMutex()); - // For now we only support lookup by method name. - if (!(name_type_mask & eFunctionNameTypeMethod)) + // For now we only support lookup by method name or full name. + if (!(name_type_mask & eFunctionNameTypeFull || + name_type_mask & eFunctionNameTypeMethod)) return; using SymbolAndOffset = std::pair<uint32_t, llvm::codeview::CVSymbol>; diff --git a/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp b/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp index 45dfc4b9a152..db0ae241be7e 100644 --- a/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp +++ b/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp @@ -1421,7 +1421,6 @@ void SymbolFilePDB::AddSymbols(lldb_private::Symtab &symtab) { )); } - symtab.CalculateSymbolSizes(); symtab.Finalize(); } diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp index b20ae32a08ac..b1dbc382ff04 100644 --- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp +++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp @@ -91,7 +91,7 @@ static void VerifyDecl(clang::Decl *decl) { assert(decl && "VerifyDecl called with nullptr?"); #ifndef NDEBUG // We don't care about the actual access value here but only want to trigger - // that Clang calls its internal Decl::AccessDeclContextSanity check. + // that Clang calls its internal Decl::AccessDeclContextCheck validation. decl->getAccess(); #endif } diff --git a/lldb/source/Symbol/ObjectFile.cpp b/lldb/source/Symbol/ObjectFile.cpp index 101af01341a2..bfab741b0d66 100644 --- a/lldb/source/Symbol/ObjectFile.cpp +++ b/lldb/source/Symbol/ObjectFile.cpp @@ -244,7 +244,7 @@ ObjectFile::ObjectFile(const lldb::ModuleSP &module_sp, m_type(eTypeInvalid), m_strata(eStrataInvalid), m_file_offset(file_offset), m_length(length), m_data(), m_process_wp(), m_memory_addr(LLDB_INVALID_ADDRESS), m_sections_up(), m_symtab_up(), - m_synthetic_symbol_idx(0) { + m_symtab_once_up(new llvm::once_flag()) { if (file_spec_ptr) m_file = *file_spec_ptr; if (data_sp) @@ -265,7 +265,7 @@ ObjectFile::ObjectFile(const lldb::ModuleSP &module_sp, : ModuleChild(module_sp), m_file(), m_type(eTypeInvalid), m_strata(eStrataInvalid), m_file_offset(0), m_length(0), m_data(), m_process_wp(process_sp), m_memory_addr(header_addr), m_sections_up(), - m_symtab_up(), m_synthetic_symbol_idx(0) { + m_symtab_up(), m_symtab_once_up(new llvm::once_flag()) { if (header_data_sp) m_data.SetData(header_data_sp, 0, header_data_sp->GetByteSize()); Log *log(lldb_private::GetLogIfAllCategoriesSet(LIBLLDB_LOG_OBJECT)); @@ -571,11 +571,13 @@ bool ObjectFile::SplitArchivePathWithObject(llvm::StringRef path_with_object, void ObjectFile::ClearSymtab() { ModuleSP module_sp(GetModule()); if (module_sp) { - std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex()); Log *log(lldb_private::GetLogIfAllCategoriesSet(LIBLLDB_LOG_OBJECT)); LLDB_LOGF(log, "%p ObjectFile::ClearSymtab () symtab = %p", static_cast<void *>(this), static_cast<void *>(m_symtab_up.get())); + // Since we need to clear the symbol table, we need a new llvm::once_flag + // instance so we can safely create another symbol table + m_symtab_once_up.reset(new llvm::once_flag()); m_symtab_up.reset(); } } @@ -715,3 +717,33 @@ void llvm::format_provider<ObjectFile::Strata>::format( break; } } + + +Symtab *ObjectFile::GetSymtab() { + ModuleSP module_sp(GetModule()); + if (module_sp) { + // We can't take the module lock in ObjectFile::GetSymtab() or we can + // deadlock in DWARF indexing when any file asks for the symbol table from + // an object file. This currently happens in the preloading of symbols in + // SymbolFileDWARF::PreloadSymbols() because the main thread will take the + // module lock, and then threads will be spun up to index the DWARF and + // any of those threads might end up trying to relocate items in the DWARF + // sections which causes ObjectFile::GetSectionData(...) to relocate section + // data which requires the symbol table. + // + // So to work around this, we create the symbol table one time using + // llvm::once_flag, lock it, and then set the unique pointer. Any other + // thread that gets ahold of the symbol table before parsing is done, will + // not be able to access the symbol table contents since all APIs in Symtab + // are protected by a mutex in the Symtab object itself. + llvm::call_once(*m_symtab_once_up, [&]() { + ElapsedTime elapsed(module_sp->GetSymtabParseTime()); + Symtab *symtab = new Symtab(this); + std::lock_guard<std::recursive_mutex> symtab_guard(symtab->GetMutex()); + m_symtab_up.reset(symtab); + ParseSymtab(*m_symtab_up); + m_symtab_up->Finalize(); + }); + } + return m_symtab_up.get(); +} diff --git a/lldb/source/Symbol/Symtab.cpp b/lldb/source/Symbol/Symtab.cpp index 19c1fee2bb38..c67955523bfb 100644 --- a/lldb/source/Symbol/Symtab.cpp +++ b/lldb/source/Symbol/Symtab.cpp @@ -997,10 +997,15 @@ void Symtab::InitAddressIndexes() { } } -void Symtab::CalculateSymbolSizes() { +void Symtab::Finalize() { std::lock_guard<std::recursive_mutex> guard(m_mutex); - // Size computation happens inside InitAddressIndexes. + // Calculate the size of symbols inside InitAddressIndexes. InitAddressIndexes(); + // Shrink to fit the symbols so we don't waste memory + if (m_symbols.capacity() > m_symbols.size()) { + collection new_symbols(m_symbols.begin(), m_symbols.end()); + m_symbols.swap(new_symbols); + } } Symbol *Symtab::FindSymbolAtFileAddress(addr_t file_addr) { diff --git a/lldb/source/Target/Platform.cpp b/lldb/source/Target/Platform.cpp index bd455310f08e..af5ca0225169 100644 --- a/lldb/source/Target/Platform.cpp +++ b/lldb/source/Target/Platform.cpp @@ -1222,22 +1222,6 @@ Platform::CreateArchList(llvm::ArrayRef<llvm::Triple::ArchType> archs, return list; } -bool Platform::GetSupportedArchitectureAtIndex(uint32_t idx, ArchSpec &arch) { - const auto &archs = GetSupportedArchitectures(); - if (idx >= archs.size()) - return false; - arch = archs[idx]; - return true; -} - -std::vector<ArchSpec> Platform::GetSupportedArchitectures() { - std::vector<ArchSpec> result; - ArchSpec arch; - for (uint32_t idx = 0; GetSupportedArchitectureAtIndex(idx, arch); ++idx) - result.push_back(arch); - return result; -} - /// Lets a platform answer if it is compatible with a given /// architecture and the target triple contained within. bool Platform::IsCompatibleArchitecture(const ArchSpec &arch, @@ -1563,28 +1547,20 @@ Status Platform::GetCachedExecutable(ModuleSpec &module_spec, lldb::ModuleSP &module_sp, const FileSpecList *module_search_paths_ptr) { - const auto platform_spec = module_spec.GetFileSpec(); - const auto error = - LoadCachedExecutable(module_spec, module_sp, module_search_paths_ptr); - if (error.Success()) { - module_spec.GetFileSpec() = module_sp->GetFileSpec(); - module_spec.GetPlatformFileSpec() = platform_spec; - } - - return error; -} - -Status -Platform::LoadCachedExecutable(const ModuleSpec &module_spec, - lldb::ModuleSP &module_sp, - const FileSpecList *module_search_paths_ptr) { - return GetRemoteSharedModule( + FileSpec platform_spec = module_spec.GetFileSpec(); + Status error = GetRemoteSharedModule( module_spec, nullptr, module_sp, [&](const ModuleSpec &spec) { return ResolveRemoteExecutable(spec, module_sp, module_search_paths_ptr); }, nullptr); + if (error.Success()) { + module_spec.GetFileSpec() = module_sp->GetFileSpec(); + module_spec.GetPlatformFileSpec() = platform_spec; + } + + return error; } Status Platform::GetRemoteSharedModule(const ModuleSpec &module_spec, diff --git a/lldb/source/Target/Process.cpp b/lldb/source/Target/Process.cpp index 84dc2b94a0eb..94f378886e50 100644 --- a/lldb/source/Target/Process.cpp +++ b/lldb/source/Target/Process.cpp @@ -5853,13 +5853,6 @@ Process::AdvanceAddressToNextBranchInstruction(Address default_stop_addr, return retval; } -Status Process::GetMemoryRegionInfo(lldb::addr_t load_addr, - MemoryRegionInfo &range_info) { - if (auto abi = GetABI()) - load_addr = abi->FixDataAddress(load_addr); - return DoGetMemoryRegionInfo(load_addr, range_info); -} - Status Process::GetMemoryRegions(lldb_private::MemoryRegionInfos ®ion_list) { diff --git a/lldb/source/Target/RemoteAwarePlatform.cpp b/lldb/source/Target/RemoteAwarePlatform.cpp index eb39fc6db304..b92d4d5fcaa7 100644 --- a/lldb/source/Target/RemoteAwarePlatform.cpp +++ b/lldb/source/Target/RemoteAwarePlatform.cpp @@ -131,9 +131,9 @@ Status RemoteAwarePlatform::ResolveExecutable( // architectures that we should be using (in the correct order) and see // if we can find a match that way StreamString arch_names; - for (uint32_t idx = 0; GetSupportedArchitectureAtIndex( - idx, resolved_module_spec.GetArchitecture()); - ++idx) { + llvm::ListSeparator LS; + for (const ArchSpec &arch : GetSupportedArchitectures()) { + resolved_module_spec.GetArchitecture() = arch; error = ModuleList::GetSharedModule(resolved_module_spec, exe_module_sp, module_search_paths_ptr, nullptr, nullptr); // Did we find an executable using one of the @@ -144,10 +144,7 @@ Status RemoteAwarePlatform::ResolveExecutable( error.SetErrorToGenericError(); } - if (idx > 0) - arch_names.PutCString(", "); - arch_names.PutCString( - resolved_module_spec.GetArchitecture().GetArchitectureName()); + arch_names << LS << arch.GetArchitectureName(); } if (error.Fail() || !exe_module_sp) { diff --git a/lldb/tools/driver/Driver.cpp b/lldb/tools/driver/Driver.cpp index a51c124f9615..977cc306bb4e 100644 --- a/lldb/tools/driver/Driver.cpp +++ b/lldb/tools/driver/Driver.cpp @@ -24,7 +24,6 @@ #include "llvm/Support/Format.h" #include "llvm/Support/InitLLVM.h" #include "llvm/Support/Path.h" -#include "llvm/Support/Process.h" #include "llvm/Support/Signals.h" #include "llvm/Support/WithColor.h" #include "llvm/Support/raw_ostream.h" @@ -44,14 +43,6 @@ #include <cstring> #include <fcntl.h> -// Includes for pipe() -#if defined(_WIN32) -#include <fcntl.h> -#include <io.h> -#else -#include <unistd.h> -#endif - #if !defined(__APPLE__) #include "llvm/Support/DataTypes.h" #endif @@ -421,60 +412,6 @@ SBError Driver::ProcessArgs(const opt::InputArgList &args, bool &exiting) { return error; } -static inline int OpenPipe(int fds[2], std::size_t size) { -#ifdef _WIN32 - return _pipe(fds, size, O_BINARY); -#else - (void)size; - return pipe(fds); -#endif -} - -static ::FILE *PrepareCommandsForSourcing(const char *commands_data, - size_t commands_size) { - enum PIPES { READ, WRITE }; // Indexes for the read and write fds - int fds[2] = {-1, -1}; - - if (OpenPipe(fds, commands_size) != 0) { - WithColor::error() - << "can't create pipe file descriptors for LLDB commands\n"; - return nullptr; - } - - ssize_t nrwr = write(fds[WRITE], commands_data, commands_size); - if (size_t(nrwr) != commands_size) { - WithColor::error() - << format( - "write(%i, %p, %" PRIu64 - ") failed (errno = %i) when trying to open LLDB commands pipe", - fds[WRITE], static_cast<const void *>(commands_data), - static_cast<uint64_t>(commands_size), errno) - << '\n'; - llvm::sys::Process::SafelyCloseFileDescriptor(fds[READ]); - llvm::sys::Process::SafelyCloseFileDescriptor(fds[WRITE]); - return nullptr; - } - - // Close the write end of the pipe, so that the command interpreter will exit - // when it consumes all the data. - llvm::sys::Process::SafelyCloseFileDescriptor(fds[WRITE]); - - // Open the read file descriptor as a FILE * that we can return as an input - // handle. - ::FILE *commands_file = fdopen(fds[READ], "rb"); - if (commands_file == nullptr) { - WithColor::error() << format("fdopen(%i, \"rb\") failed (errno = %i) " - "when trying to open LLDB commands pipe", - fds[READ], errno) - << '\n'; - llvm::sys::Process::SafelyCloseFileDescriptor(fds[READ]); - return nullptr; - } - - // 'commands_file' now owns the read descriptor. - return commands_file; -} - std::string EscapeString(std::string arg) { std::string::size_type pos = 0; while ((pos = arg.find_first_of("\"\\", pos)) != std::string::npos) { @@ -604,21 +541,15 @@ int Driver::MainLoop() { // Check if we have any data in the commands stream, and if so, save it to a // temp file // so we can then run the command interpreter using the file contents. - const char *commands_data = commands_stream.GetData(); - const size_t commands_size = commands_stream.GetSize(); - bool go_interactive = true; - if ((commands_data != nullptr) && (commands_size != 0u)) { - FILE *commands_file = - PrepareCommandsForSourcing(commands_data, commands_size); - - if (commands_file == nullptr) { - // We should have already printed an error in PrepareCommandsForSourcing. + if ((commands_stream.GetData() != nullptr) && + (commands_stream.GetSize() != 0u)) { + SBError error = m_debugger.SetInputString(commands_stream.GetData()); + if (error.Fail()) { + WithColor::error() << error.GetCString() << '\n'; return 1; } - m_debugger.SetInputFileHandle(commands_file, true); - // Set the debugger into Sync mode when running the command file. Otherwise // command files that run the target won't run in a sensible way. bool old_async = m_debugger.GetAsync(); @@ -651,12 +582,9 @@ int Driver::MainLoop() { SBStream crash_commands_stream; WriteCommandsForSourcing(eCommandPlacementAfterCrash, crash_commands_stream); - const char *crash_commands_data = crash_commands_stream.GetData(); - const size_t crash_commands_size = crash_commands_stream.GetSize(); - commands_file = - PrepareCommandsForSourcing(crash_commands_data, crash_commands_size); - if (commands_file != nullptr) { - m_debugger.SetInputFileHandle(commands_file, true); + SBError error = + m_debugger.SetInputString(crash_commands_stream.GetData()); + if (error.Success()) { SBCommandInterpreterRunResult local_results = m_debugger.RunCommandInterpreter(options); if (local_results.GetResult() == diff --git a/lldb/tools/lldb-server/lldb-platform.cpp b/lldb/tools/lldb-server/lldb-platform.cpp index d4b54362bb46..9e07f4c8debd 100644 --- a/lldb/tools/lldb-server/lldb-platform.cpp +++ b/lldb/tools/lldb-server/lldb-platform.cpp @@ -364,23 +364,17 @@ int main_platform(int argc, char *argv[]) { fprintf(stderr, "failed to start gdbserver: %s\n", error.AsCString()); } - // After we connected, we need to get an initial ack from... - if (platform.HandshakeWithClient()) { - bool interrupt = false; - bool done = false; - while (!interrupt && !done) { - if (platform.GetPacketAndSendResponse(llvm::None, error, interrupt, - done) != - GDBRemoteCommunication::PacketResult::Success) - break; - } - - if (error.Fail()) { - WithColor::error() << error.AsCString() << '\n'; - } - } else { - WithColor::error() << "handshake with client failed\n"; + bool interrupt = false; + bool done = false; + while (!interrupt && !done) { + if (platform.GetPacketAndSendResponse(llvm::None, error, interrupt, + done) != + GDBRemoteCommunication::PacketResult::Success) + break; } + + if (error.Fail()) + WithColor::error() << error.AsCString() << '\n'; } } while (g_server); diff --git a/llvm/include/llvm-c/Core.h b/llvm/include/llvm-c/Core.h index d170eff17951..f2183ff52bfb 100644 --- a/llvm/include/llvm-c/Core.h +++ b/llvm/include/llvm-c/Core.h @@ -2377,10 +2377,21 @@ void LLVMSetExternallyInitialized(LLVMValueRef GlobalVar, LLVMBool IsExtInit); * * @{ */ + +/** Deprecated: Use LLVMAddAlias2 instead. */ LLVMValueRef LLVMAddAlias(LLVMModuleRef M, LLVMTypeRef Ty, LLVMValueRef Aliasee, const char *Name); /** + * Add a GlobalAlias with the given value type, address space and aliasee. + * + * @see llvm::GlobalAlias::create() + */ +LLVMValueRef LLVMAddAlias2(LLVMModuleRef M, LLVMTypeRef ValueTy, + unsigned AddrSpace, LLVMValueRef Aliasee, + const char *Name); + +/** * Obtain a GlobalAlias value from a Module by its name. * * The returned value corresponds to a llvm::GlobalAlias value. diff --git a/llvm/include/llvm/ADT/APInt.h b/llvm/include/llvm/ADT/APInt.h index 595cd94b6b8f..c2660502a419 100644 --- a/llvm/include/llvm/ADT/APInt.h +++ b/llvm/include/llvm/ADT/APInt.h @@ -1458,10 +1458,8 @@ public: /// uint64_t. The bitwidth must be <= 64 or the value must fit within a /// uint64_t. Otherwise an assertion will result. uint64_t getZExtValue() const { - if (isSingleWord()) { - assert(BitWidth && "zero width values not allowed"); + if (isSingleWord()) return U.VAL; - } assert(getActiveBits() <= 64 && "Too many bits for uint64_t"); return U.pVal[0]; } diff --git a/llvm/include/llvm/ADT/SCCIterator.h b/llvm/include/llvm/ADT/SCCIterator.h index 8a7c0a78a0fc..ad35e09f0f74 100644 --- a/llvm/include/llvm/ADT/SCCIterator.h +++ b/llvm/include/llvm/ADT/SCCIterator.h @@ -28,6 +28,10 @@ #include <cassert> #include <cstddef> #include <iterator> +#include <queue> +#include <set> +#include <unordered_map> +#include <unordered_set> #include <vector> namespace llvm { @@ -234,6 +238,135 @@ template <class T> scc_iterator<T> scc_end(const T &G) { return scc_iterator<T>::end(G); } +/// Sort the nodes of a directed SCC in the decreasing order of the edge +/// weights. The instantiating GraphT type should have weighted edge type +/// declared in its graph traits in order to use this iterator. +/// +/// This is implemented using Kruskal's minimal spanning tree algorithm followed +/// by a BFS walk. First a maximum spanning tree (forest) is built based on all +/// edges within the SCC collection. Then a BFS walk is initiated on tree nodes +/// that do not have a predecessor. Finally, the BFS order computed is the +/// traversal order of the nodes of the SCC. Such order ensures that +/// high-weighted edges are visited first during the tranversal. +template <class GraphT, class GT = GraphTraits<GraphT>> +class scc_member_iterator { + using NodeType = typename GT::NodeType; + using EdgeType = typename GT::EdgeType; + using NodesType = std::vector<NodeType *>; + + // Auxilary node information used during the MST calculation. + struct NodeInfo { + NodeInfo *Group = this; + uint32_t Rank = 0; + bool Visited = true; + }; + + // Find the root group of the node and compress the path from node to the + // root. + NodeInfo *find(NodeInfo *Node) { + if (Node->Group != Node) + Node->Group = find(Node->Group); + return Node->Group; + } + + // Union the source and target node into the same group and return true. + // Returns false if they are already in the same group. + bool unionGroups(const EdgeType *Edge) { + NodeInfo *G1 = find(&NodeInfoMap[Edge->Source]); + NodeInfo *G2 = find(&NodeInfoMap[Edge->Target]); + + // If the edge forms a cycle, do not add it to MST + if (G1 == G2) + return false; + + // Make the smaller rank tree a direct child or the root of high rank tree. + if (G1->Rank < G1->Rank) + G1->Group = G2; + else { + G2->Group = G1; + // If the ranks are the same, increment root of one tree by one. + if (G1->Rank == G2->Rank) + G2->Rank++; + } + return true; + } + + std::unordered_map<NodeType *, NodeInfo> NodeInfoMap; + NodesType Nodes; + +public: + scc_member_iterator(const NodesType &InputNodes); + + NodesType &operator*() { return Nodes; } +}; + +template <class GraphT, class GT> +scc_member_iterator<GraphT, GT>::scc_member_iterator( + const NodesType &InputNodes) { + if (InputNodes.size() <= 1) { + Nodes = InputNodes; + return; + } + + // Initialize auxilary node information. + NodeInfoMap.clear(); + for (auto *Node : InputNodes) { + // This is specifically used to construct a `NodeInfo` object in place. An + // insert operation will involve a copy construction which invalidate the + // initial value of the `Group` field which should be `this`. + (void)NodeInfoMap[Node].Group; + } + + // Sort edges by weights. + struct EdgeComparer { + bool operator()(const EdgeType *L, const EdgeType *R) const { + return L->Weight > R->Weight; + } + }; + + std::multiset<const EdgeType *, EdgeComparer> SortedEdges; + for (auto *Node : InputNodes) { + for (auto &Edge : Node->Edges) { + if (NodeInfoMap.count(Edge.Target)) + SortedEdges.insert(&Edge); + } + } + + // Traverse all the edges and compute the Maximum Weight Spanning Tree + // using Kruskal's algorithm. + std::unordered_set<const EdgeType *> MSTEdges; + for (auto *Edge : SortedEdges) { + if (unionGroups(Edge)) + MSTEdges.insert(Edge); + } + + // Do BFS on MST, starting from nodes that have no incoming edge. These nodes + // are "roots" of the MST forest. This ensures that nodes are visited before + // their decsendents are, thus ensures hot edges are processed before cold + // edges, based on how MST is computed. + for (const auto *Edge : MSTEdges) + NodeInfoMap[Edge->Target].Visited = false; + + std::queue<NodeType *> Queue; + for (auto &Node : NodeInfoMap) + if (Node.second.Visited) + Queue.push(Node.first); + + while (!Queue.empty()) { + auto *Node = Queue.front(); + Queue.pop(); + Nodes.push_back(Node); + for (auto &Edge : Node->Edges) { + if (MSTEdges.count(&Edge) && !NodeInfoMap[Edge.Target].Visited) { + NodeInfoMap[Edge.Target].Visited = true; + Queue.push(Edge.Target); + } + } + } + + assert(InputNodes.size() == Nodes.size() && "missing nodes in MST"); + std::reverse(Nodes.begin(), Nodes.end()); +} } // end namespace llvm #endif // LLVM_ADT_SCCITERATOR_H diff --git a/llvm/include/llvm/ADT/STLExtras.h b/llvm/include/llvm/ADT/STLExtras.h index 48f15b02283a..f9b658ca960a 100644 --- a/llvm/include/llvm/ADT/STLExtras.h +++ b/llvm/include/llvm/ADT/STLExtras.h @@ -1016,20 +1016,39 @@ public: private: std::tuple<RangeTs...> Ranges; - template <size_t... Ns> iterator begin_impl(std::index_sequence<Ns...>) { + template <size_t... Ns> + iterator begin_impl(std::index_sequence<Ns...>) { + return iterator(std::get<Ns>(Ranges)...); + } + template <size_t... Ns> + iterator begin_impl(std::index_sequence<Ns...>) const { return iterator(std::get<Ns>(Ranges)...); } template <size_t... Ns> iterator end_impl(std::index_sequence<Ns...>) { return iterator(make_range(std::end(std::get<Ns>(Ranges)), std::end(std::get<Ns>(Ranges)))...); } + template <size_t... Ns> iterator end_impl(std::index_sequence<Ns...>) const { + return iterator(make_range(std::end(std::get<Ns>(Ranges)), + std::end(std::get<Ns>(Ranges)))...); + } public: concat_range(RangeTs &&... Ranges) : Ranges(std::forward<RangeTs>(Ranges)...) {} - iterator begin() { return begin_impl(std::index_sequence_for<RangeTs...>{}); } - iterator end() { return end_impl(std::index_sequence_for<RangeTs...>{}); } + iterator begin() { + return begin_impl(std::index_sequence_for<RangeTs...>{}); + } + iterator begin() const { + return begin_impl(std::index_sequence_for<RangeTs...>{}); + } + iterator end() { + return end_impl(std::index_sequence_for<RangeTs...>{}); + } + iterator end() const { + return end_impl(std::index_sequence_for<RangeTs...>{}); + } }; } // end namespace detail @@ -1977,10 +1996,16 @@ public: enumerator_iter<R> begin() { return enumerator_iter<R>(0, std::begin(TheRange)); } + enumerator_iter<R> begin() const { + return enumerator_iter<R>(0, std::begin(TheRange)); + } enumerator_iter<R> end() { return enumerator_iter<R>(std::end(TheRange)); } + enumerator_iter<R> end() const { + return enumerator_iter<R>(std::end(TheRange)); + } private: R TheRange; diff --git a/llvm/include/llvm/Analysis/IVDescriptors.h b/llvm/include/llvm/Analysis/IVDescriptors.h index c26dbc457949..ea4c0312e073 100644 --- a/llvm/include/llvm/Analysis/IVDescriptors.h +++ b/llvm/include/llvm/Analysis/IVDescriptors.h @@ -20,6 +20,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Operator.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Support/Casting.h" @@ -50,6 +51,7 @@ enum class RecurKind { FMul, ///< Product of floats. FMin, ///< FP min implemented in terms of select(cmp()). FMax, ///< FP max implemented in terms of select(cmp()). + FMulAdd, ///< Fused multiply-add of floats (a * b + c). SelectICmp, ///< Integer select(icmp(),x,y) where one of (x,y) is loop ///< invariant SelectFCmp ///< Integer select(fcmp(),x,y) where one of (x,y) is loop @@ -260,6 +262,12 @@ public: SmallVector<Instruction *, 4> getReductionOpChain(PHINode *Phi, Loop *L) const; + /// Returns true if the instruction is a call to the llvm.fmuladd intrinsic. + static bool isFMulAddIntrinsic(Instruction *I) { + return isa<IntrinsicInst>(I) && + cast<IntrinsicInst>(I)->getIntrinsicID() == Intrinsic::fmuladd; + } + private: // The starting value of the recurrence. // It does not have to be zero! diff --git a/llvm/include/llvm/Analysis/Lint.h b/llvm/include/llvm/Analysis/Lint.h index 6eb637e72782..4ceae2d29f16 100644 --- a/llvm/include/llvm/Analysis/Lint.h +++ b/llvm/include/llvm/Analysis/Lint.h @@ -6,11 +6,10 @@ // //===----------------------------------------------------------------------===// // -// This file defines lint interfaces that can be used for some sanity checking -// of input to the system, and for checking that transformations -// haven't done something bad. In contrast to the Verifier, the Lint checker -// checks for undefined behavior or constructions with likely unintended -// behavior. +// This file defines lint interfaces that can be used for some validation of +// input to the system, and for checking that transformations haven't done +// something bad. In contrast to the Verifier, the Lint checker checks for +// undefined behavior or constructions with likely unintended behavior. // // To see what specifically is checked, look at Lint.cpp // diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h index a2260688e3d6..df50611832ce 100644 --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -1378,6 +1378,8 @@ private: /// includes an exact count and a maximum count. /// class BackedgeTakenInfo { + friend class ScalarEvolution; + /// A list of computable exits and their not-taken counts. Loops almost /// never have more than one computable exit. SmallVector<ExitNotTakenInfo, 1> ExitNotTaken; @@ -1398,9 +1400,6 @@ private: /// True iff the backedge is taken either exactly Max or zero times. bool MaxOrZero = false; - /// SCEV expressions used in any of the ExitNotTakenInfo counts. - SmallPtrSet<const SCEV *, 4> Operands; - bool isComplete() const { return IsComplete; } const SCEV *getConstantMax() const { return ConstantMax; } @@ -1466,10 +1465,6 @@ private: /// Return true if the number of times this backedge is taken is either the /// value returned by getConstantMax or zero. bool isConstantMaxOrZero(ScalarEvolution *SE) const; - - /// Return true if any backedge taken count expressions refer to the given - /// subexpression. - bool hasOperand(const SCEV *S) const; }; /// Cache the backedge-taken count of the loops for this function as they @@ -1480,6 +1475,10 @@ private: /// function as they are computed. DenseMap<const Loop *, BackedgeTakenInfo> PredicatedBackedgeTakenCounts; + /// Loops whose backedge taken counts directly use this non-constant SCEV. + DenseMap<const SCEV *, SmallPtrSet<PointerIntPair<const Loop *, 1, bool>, 4>> + BECountUsers; + /// This map contains entries for all of the PHI instructions that we /// attempt to compute constant evolutions for. This allows us to avoid /// potentially expensive recomputation of these properties. An instruction @@ -1492,6 +1491,11 @@ private: DenseMap<const SCEV *, SmallVector<std::pair<const Loop *, const SCEV *>, 2>> ValuesAtScopes; + /// Reverse map for invalidation purposes: Stores of which SCEV and which + /// loop this is the value-at-scope of. + DenseMap<const SCEV *, SmallVector<std::pair<const Loop *, const SCEV *>, 2>> + ValuesAtScopesUsers; + /// Memoized computeLoopDisposition results. DenseMap<const SCEV *, SmallVector<PointerIntPair<const Loop *, 2, LoopDisposition>, 2>> @@ -1616,11 +1620,6 @@ private: /// SCEV+Loop pair. const SCEV *computeSCEVAtScope(const SCEV *S, const Loop *L); - /// This looks up computed SCEV values for all instructions that depend on - /// the given instruction and removes them from the ValueExprMap map if they - /// reference SymName. This is used during PHI resolution. - void forgetSymbolicName(Instruction *I, const SCEV *SymName); - /// Return the BackedgeTakenInfo for the given loop, lazily computing new /// values if the loop hasn't been analyzed yet. The returned result is /// guaranteed not to be predicated. @@ -1911,6 +1910,9 @@ private: bool splitBinaryAdd(const SCEV *Expr, const SCEV *&L, const SCEV *&R, SCEV::NoWrapFlags &Flags); + /// Forget predicated/non-predicated backedge taken counts for the given loop. + void forgetBackedgeTakenCounts(const Loop *L, bool Predicated); + /// Drop memoized information for all \p SCEVs. void forgetMemoizedResults(ArrayRef<const SCEV *> SCEVs); @@ -1923,6 +1925,9 @@ private: /// Erase Value from ValueExprMap and ExprValueMap. void eraseValueFromMap(Value *V); + /// Insert V to S mapping into ValueExprMap and ExprValueMap. + void insertValueToMap(Value *V, const SCEV *S); + /// Return false iff given SCEV contains a SCEVUnknown with NULL value- /// pointer. bool checkValidity(const SCEV *S) const; diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.def b/llvm/include/llvm/Analysis/TargetLibraryInfo.def index ded53617b304..9c1abef33b28 100644 --- a/llvm/include/llvm/Analysis/TargetLibraryInfo.def +++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.def @@ -1048,6 +1048,12 @@ TLI_DEFINE_STRING_INTERNAL("memset") /// void memset_pattern16(void *b, const void *pattern16, size_t len); TLI_DEFINE_ENUM_INTERNAL(memset_pattern16) TLI_DEFINE_STRING_INTERNAL("memset_pattern16") +/// void memset_pattern4(void *b, const void *pattern4, size_t len); +TLI_DEFINE_ENUM_INTERNAL(memset_pattern4) +TLI_DEFINE_STRING_INTERNAL("memset_pattern4") +/// void memset_pattern8(void *b, const void *pattern8, size_t len); +TLI_DEFINE_ENUM_INTERNAL(memset_pattern8) +TLI_DEFINE_STRING_INTERNAL("memset_pattern8") /// int mkdir(const char *path, mode_t mode); TLI_DEFINE_ENUM_INTERNAL(mkdir) TLI_DEFINE_STRING_INTERNAL("mkdir") diff --git a/llvm/include/llvm/Analysis/VectorUtils.h b/llvm/include/llvm/Analysis/VectorUtils.h index 24e2318de48b..751c88a4ecbb 100644 --- a/llvm/include/llvm/Analysis/VectorUtils.h +++ b/llvm/include/llvm/Analysis/VectorUtils.h @@ -115,7 +115,7 @@ struct VFShape { return {EC, Parameters}; } - /// Sanity check on the Parameters in the VFShape. + /// Validation check on the Parameters in the VFShape. bool hasValidParameterList() const; }; diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h index a270fd399aeb..c199e933116a 100644 --- a/llvm/include/llvm/BinaryFormat/ELF.h +++ b/llvm/include/llvm/BinaryFormat/ELF.h @@ -1602,6 +1602,13 @@ enum { NT_FREEBSD_PROCSTAT_AUXV = 16, }; +// NetBSD core note types. +enum { + NT_NETBSDCORE_PROCINFO = 1, + NT_NETBSDCORE_AUXV = 2, + NT_NETBSDCORE_LWPSTATUS = 24, +}; + // OpenBSD core note types. enum { NT_OPENBSD_PROCINFO = 10, diff --git a/llvm/include/llvm/CodeGen/CommandFlags.h b/llvm/include/llvm/CodeGen/CommandFlags.h index ed3cd54df272..73d39fecc268 100644 --- a/llvm/include/llvm/CodeGen/CommandFlags.h +++ b/llvm/include/llvm/CodeGen/CommandFlags.h @@ -130,6 +130,7 @@ bool getEnableMachineFunctionSplitter(); bool getEnableDebugEntryValues(); bool getValueTrackingVariableLocations(); +Optional<bool> getExplicitValueTrackingVariableLocations(); bool getForceDwarfFrameSection(); @@ -170,6 +171,10 @@ void setFunctionAttributes(StringRef CPU, StringRef Features, Function &F); /// Set function attributes of functions in Module M based on CPU, /// Features, and command line flags. void setFunctionAttributes(StringRef CPU, StringRef Features, Module &M); + +/// Should value-tracking variable locations / instruction referencing be +/// enabled by default for this triple? +bool getDefaultValueTrackingVariableLocations(const llvm::Triple &T); } // namespace codegen } // namespace llvm diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h index ff4ad4b72636..f3fa652b0175 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -564,6 +564,7 @@ public: /// This variant does not erase \p MI after calling the build function. void applyBuildFnNoErase(MachineInstr &MI, BuildFnTy &MatchInfo); + bool matchOrShiftToFunnelShift(MachineInstr &MI, BuildFnTy &MatchInfo); bool matchFunnelShiftToRotate(MachineInstr &MI); void applyFunnelShiftToRotate(MachineInstr &MI); bool matchRotateOutOfRange(MachineInstr &MI); @@ -648,6 +649,54 @@ public: /// (fma fneg(x), fneg(y), z) -> (fma x, y, z) bool matchRedundantNegOperands(MachineInstr &MI, BuildFnTy &MatchInfo); + bool canCombineFMadOrFMA(MachineInstr &MI, bool &AllowFusionGlobally, + bool &HasFMAD, bool &Aggressive, + bool CanReassociate = false); + + /// Transform (fadd (fmul x, y), z) -> (fma x, y, z) + /// (fadd (fmul x, y), z) -> (fmad x, y, z) + bool matchCombineFAddFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo); + + /// Transform (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z) + /// (fadd (fpext (fmul x, y)), z) -> (fmad (fpext x), (fpext y), z) + bool matchCombineFAddFpExtFMulToFMadOrFMA(MachineInstr &MI, + BuildFnTy &MatchInfo); + + /// Transform (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y, (fma u, v, z)) + /// (fadd (fmad x, y, (fmul u, v)), z) -> (fmad x, y, (fmad u, v, z)) + bool matchCombineFAddFMAFMulToFMadOrFMA(MachineInstr &MI, + BuildFnTy &MatchInfo); + + // Transform (fadd (fma x, y, (fpext (fmul u, v))), z) + // -> (fma x, y, (fma (fpext u), (fpext v), z)) + // (fadd (fmad x, y, (fpext (fmul u, v))), z) + // -> (fmad x, y, (fmad (fpext u), (fpext v), z)) + bool matchCombineFAddFpExtFMulToFMadOrFMAAggressive(MachineInstr &MI, + BuildFnTy &MatchInfo); + + /// Transform (fsub (fmul x, y), z) -> (fma x, y, -z) + /// (fsub (fmul x, y), z) -> (fmad x, y, -z) + bool matchCombineFSubFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo); + + /// Transform (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) + /// (fsub (fneg (fmul, x, y)), z) -> (fmad (fneg x), y, (fneg z)) + bool matchCombineFSubFNegFMulToFMadOrFMA(MachineInstr &MI, + BuildFnTy &MatchInfo); + + /// Transform (fsub (fpext (fmul x, y)), z) + /// -> (fma (fpext x), (fpext y), (fneg z)) + /// (fsub (fpext (fmul x, y)), z) + /// -> (fmad (fpext x), (fpext y), (fneg z)) + bool matchCombineFSubFpExtFMulToFMadOrFMA(MachineInstr &MI, + BuildFnTy &MatchInfo); + + /// Transform (fsub (fpext (fneg (fmul x, y))), z) + /// -> (fneg (fma (fpext x), (fpext y), z)) + /// (fsub (fpext (fneg (fmul x, y))), z) + /// -> (fneg (fmad (fpext x), (fpext y), z)) + bool matchCombineFSubFpExtFNegFMulToFMadOrFMA(MachineInstr &MI, + BuildFnTy &MatchInfo); + private: /// Given a non-indexed load or store instruction \p MI, find an offset that /// can be usefully and legally folded into it as a post-indexing operation. diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h b/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h index e813d030eec3..a41166bb4c6b 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h @@ -129,6 +129,43 @@ inline SpecificConstantMatch m_SpecificICst(int64_t RequestedValue) { return SpecificConstantMatch(RequestedValue); } +/// Matcher for a specific constant splat. +struct SpecificConstantSplatMatch { + int64_t RequestedVal; + SpecificConstantSplatMatch(int64_t RequestedVal) + : RequestedVal(RequestedVal) {} + bool match(const MachineRegisterInfo &MRI, Register Reg) { + return isBuildVectorConstantSplat(Reg, MRI, RequestedVal, + /* AllowUndef */ false); + } +}; + +/// Matches a constant splat of \p RequestedValue. +inline SpecificConstantSplatMatch m_SpecificICstSplat(int64_t RequestedValue) { + return SpecificConstantSplatMatch(RequestedValue); +} + +/// Matcher for a specific constant or constant splat. +struct SpecificConstantOrSplatMatch { + int64_t RequestedVal; + SpecificConstantOrSplatMatch(int64_t RequestedVal) + : RequestedVal(RequestedVal) {} + bool match(const MachineRegisterInfo &MRI, Register Reg) { + int64_t MatchedVal; + if (mi_match(Reg, MRI, m_ICst(MatchedVal)) && MatchedVal == RequestedVal) + return true; + return isBuildVectorConstantSplat(Reg, MRI, RequestedVal, + /* AllowUndef */ false); + } +}; + +/// Matches a \p RequestedValue constant or a constant splat of \p +/// RequestedValue. +inline SpecificConstantOrSplatMatch +m_SpecificICstOrSplat(int64_t RequestedValue) { + return SpecificConstantOrSplatMatch(RequestedValue); +} + ///{ /// Convenience matchers for specific integer values. inline SpecificConstantMatch m_ZeroInt() { return SpecificConstantMatch(0); } @@ -489,6 +526,11 @@ inline UnaryOp_match<SrcTy, TargetOpcode::COPY> m_Copy(SrcTy &&Src) { return UnaryOp_match<SrcTy, TargetOpcode::COPY>(std::forward<SrcTy>(Src)); } +template <typename SrcTy> +inline UnaryOp_match<SrcTy, TargetOpcode::G_FSQRT> m_GFSqrt(const SrcTy &Src) { + return UnaryOp_match<SrcTy, TargetOpcode::G_FSQRT>(Src); +} + // General helper for generic MI compares, i.e. G_ICMP and G_FCMP // TODO: Allow checking a specific predicate. template <typename Pred_P, typename LHS_P, typename RHS_P, unsigned Opcode> diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h index 86545b976b8d..4126e2ac7b8f 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h @@ -378,6 +378,18 @@ Optional<FPValueAndVReg> getFConstantSplat(Register VReg, const MachineRegisterInfo &MRI, bool AllowUndef = true); +/// Return true if the specified register is defined by G_BUILD_VECTOR or +/// G_BUILD_VECTOR_TRUNC where all of the elements are \p SplatValue or undef. +bool isBuildVectorConstantSplat(const Register Reg, + const MachineRegisterInfo &MRI, + int64_t SplatValue, bool AllowUndef); + +/// Return true if the specified instruction is a G_BUILD_VECTOR or +/// G_BUILD_VECTOR_TRUNC where all of the elements are \p SplatValue or undef. +bool isBuildVectorConstantSplat(const MachineInstr &MI, + const MachineRegisterInfo &MRI, + int64_t SplatValue, bool AllowUndef); + /// Return true if the specified instruction is a G_BUILD_VECTOR or /// G_BUILD_VECTOR_TRUNC where all of the elements are 0 or undef. bool isBuildVectorAllZeros(const MachineInstr &MI, diff --git a/llvm/include/llvm/CodeGen/MachineFunction.h b/llvm/include/llvm/CodeGen/MachineFunction.h index dcbd19ac6b5a..ec23dde0c6c0 100644 --- a/llvm/include/llvm/CodeGen/MachineFunction.h +++ b/llvm/include/llvm/CodeGen/MachineFunction.h @@ -938,7 +938,8 @@ public: int64_t Offset, LLT Ty); MachineMemOperand *getMachineMemOperand(const MachineMemOperand *MMO, int64_t Offset, uint64_t Size) { - return getMachineMemOperand(MMO, Offset, LLT::scalar(8 * Size)); + return getMachineMemOperand( + MMO, Offset, Size == ~UINT64_C(0) ? LLT() : LLT::scalar(8 * Size)); } /// getMachineMemOperand - Allocate a new MachineMemOperand by copying diff --git a/llvm/include/llvm/CodeGen/TargetFrameLowering.h b/llvm/include/llvm/CodeGen/TargetFrameLowering.h index fa22ca6a98ac..a855a0797723 100644 --- a/llvm/include/llvm/CodeGen/TargetFrameLowering.h +++ b/llvm/include/llvm/CodeGen/TargetFrameLowering.h @@ -139,10 +139,13 @@ public: /// int getOffsetOfLocalArea() const { return LocalAreaOffset; } - /// isFPCloseToIncomingSP - Return true if the frame pointer is close to - /// the incoming stack pointer, false if it is close to the post-prologue - /// stack pointer. - virtual bool isFPCloseToIncomingSP() const { return true; } + /// Control the placement of special register scavenging spill slots when + /// allocating a stack frame. + /// + /// If this returns true, the frame indexes used by the RegScavenger will be + /// allocated closest to the incoming stack pointer. + virtual bool allocateScavengingFrameIndexesNearIncomingSP( + const MachineFunction &MF) const; /// assignCalleeSavedSpillSlots - Allows target to override spill slot /// assignment logic. If implemented, assignCalleeSavedSpillSlots() should @@ -220,6 +223,9 @@ public: virtual void inlineStackProbe(MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {} + /// Does the stack probe function call return with a modified stack pointer? + virtual bool stackProbeFunctionModifiesSP() const { return false; } + /// Adjust the prologue to have the function use segmented stacks. This works /// by adding a check even before the "normal" function prologue. virtual void adjustForSegmentedStacks(MachineFunction &MF, diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h index 8bc730a3eda5..d43dd9fac85d 100644 --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -1913,6 +1913,12 @@ public: "Target didn't implement TargetInstrInfo::getOutliningCandidateInfo!"); } + /// Optional target hook to create the LLVM IR attributes for the outlined + /// function. If overridden, the overriding function must call the default + /// implementation. + virtual void mergeOutliningCandidateAttributes( + Function &F, std::vector<outliner::Candidate> &Candidates) const; + /// Returns how or if \p MI should be outlined. virtual outliner::InstrType getOutliningType(MachineBasicBlock::iterator &MIT, unsigned Flags) const { diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 87f5168ec48f..d862701c37d7 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -425,6 +425,12 @@ public: return true; } + /// Return true if the @llvm.get.active.lane.mask intrinsic should be expanded + /// using generic code in SelectionDAGBuilder. + virtual bool shouldExpandGetActiveLaneMask(EVT VT, EVT OpVT) const { + return true; + } + /// Return true if it is profitable to convert a select of FP constants into /// a constant pool load whose address depends on the select condition. The /// parameter may be used to differentiate a select with FP compare from @@ -806,9 +812,12 @@ public: /// Return true if target always benefits from combining into FMA for a /// given value type. This must typically return false on targets where FMA /// takes more cycles to execute than FADD. - virtual bool enableAggressiveFMAFusion(EVT VT) const { - return false; - } + virtual bool enableAggressiveFMAFusion(EVT VT) const { return false; } + + /// Return true if target always benefits from combining into FMA for a + /// given value type. This must typically return false on targets where FMA + /// takes more cycles to execute than FADD. + virtual bool enableAggressiveFMAFusion(LLT Ty) const { return false; } /// Return the ValueType of the result of SETCC operations. virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, @@ -2710,6 +2719,14 @@ public: /// Return true if an fpext operation input to an \p Opcode operation is free /// (for instance, because half-precision floating-point numbers are /// implicitly extended to float-precision) for an FMA instruction. + virtual bool isFPExtFoldable(const MachineInstr &MI, unsigned Opcode, + LLT DestTy, LLT SrcTy) const { + return false; + } + + /// Return true if an fpext operation input to an \p Opcode operation is free + /// (for instance, because half-precision floating-point numbers are + /// implicitly extended to float-precision) for an FMA instruction. virtual bool isFPExtFoldable(const SelectionDAG &DAG, unsigned Opcode, EVT DestVT, EVT SrcVT) const { assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() && @@ -2748,11 +2765,47 @@ public: return false; } + /// Return true if an FMA operation is faster than a pair of fmul and fadd + /// instructions. fmuladd intrinsics will be expanded to FMAs when this method + /// returns true, otherwise fmuladd is expanded to fmul + fadd. + /// + /// NOTE: This may be called before legalization on types for which FMAs are + /// not legal, but should return true if those types will eventually legalize + /// to types that support FMAs. After legalization, it will only be called on + /// types that support FMAs (via Legal or Custom actions) + virtual bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, + LLT) const { + return false; + } + /// IR version virtual bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *) const { return false; } + /// Returns true if \p MI can be combined with another instruction to + /// form TargetOpcode::G_FMAD. \p N may be an TargetOpcode::G_FADD, + /// TargetOpcode::G_FSUB, or an TargetOpcode::G_FMUL which will be + /// distributed into an fadd/fsub. + virtual bool isFMADLegal(const MachineInstr &MI, LLT Ty) const { + assert((MI.getOpcode() == TargetOpcode::G_FADD || + MI.getOpcode() == TargetOpcode::G_FSUB || + MI.getOpcode() == TargetOpcode::G_FMUL) && + "unexpected node in FMAD forming combine"); + switch (Ty.getScalarSizeInBits()) { + case 16: + return isOperationLegal(TargetOpcode::G_FMAD, MVT::f16); + case 32: + return isOperationLegal(TargetOpcode::G_FMAD, MVT::f32); + case 64: + return isOperationLegal(TargetOpcode::G_FMAD, MVT::f64); + default: + break; + } + + return false; + } + /// Returns true if be combined with to form an ISD::FMAD. \p N may be an /// ISD::FADD, ISD::FSUB, or an ISD::FMUL which will be distributed into an /// fadd/fsub. @@ -2852,6 +2905,12 @@ public: /// passed to the fp16 to fp conversion library function. virtual bool shouldKeepZExtForFP16Conv() const { return false; } + /// Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT + /// from min(max(fptoi)) saturation patterns. + virtual bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const { + return isOperationLegalOrCustom(Op, VT); + } + //===--------------------------------------------------------------------===// // Runtime Library hooks // diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h index 902973ff5722..ae1afeb668be 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h @@ -156,6 +156,11 @@ public: NormalUnits.getNumInfoUnits()); } + const DWARFUnitVector &getNormalUnitsVector() { + parseNormalUnits(); + return NormalUnits; + } + /// Get units from .debug_types in this context. unit_iterator_range types_section_units() { parseNormalUnits(); diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h index d471b80c7fe1..505686bfbf59 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h @@ -14,6 +14,7 @@ #include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h" #include "llvm/DebugInfo/DWARF/DWARFDie.h" #include "llvm/DebugInfo/DWARF/DWARFUnitIndex.h" +#include "llvm/DebugInfo/DWARF/DWARFUnit.h" #include <cstdint> #include <map> #include <set> @@ -153,8 +154,8 @@ private: /// \param SectionKind The object-file section kind that S comes from. /// /// \returns The number of errors that occurred during verification. - unsigned verifyUnitSection(const DWARFSection &S, - DWARFSectionKind SectionKind); + unsigned verifyUnitSection(const DWARFSection &S); + unsigned verifyUnits(const DWARFUnitVector &Units); /// Verifies that a call site entry is nested within a subprogram with a /// DW_AT_call attribute. diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Core.h b/llvm/include/llvm/ExecutionEngine/Orc/Core.h index 362e8ab8e296..2180be3341e1 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/Core.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Core.h @@ -519,6 +519,7 @@ private: /// symbols of an error. class MaterializationResponsibility { friend class ExecutionSession; + friend class JITDylib; public: MaterializationResponsibility(MaterializationResponsibility &&) = delete; @@ -535,10 +536,10 @@ public: /// Returns the target JITDylib that these symbols are being materialized /// into. - JITDylib &getTargetJITDylib() const { return *JD; } + JITDylib &getTargetJITDylib() const { return JD; } /// Returns the ExecutionSession for this instance. - ExecutionSession &getExecutionSession(); + ExecutionSession &getExecutionSession() const; /// Returns the symbol flags map for this responsibility instance. /// Note: The returned flags may have transient flags (Lazy, Materializing) @@ -640,15 +641,16 @@ public: private: /// Create a MaterializationResponsibility for the given JITDylib and /// initial symbols. - MaterializationResponsibility(JITDylibSP JD, SymbolFlagsMap SymbolFlags, + MaterializationResponsibility(ResourceTrackerSP RT, + SymbolFlagsMap SymbolFlags, SymbolStringPtr InitSymbol) - : JD(std::move(JD)), SymbolFlags(std::move(SymbolFlags)), - InitSymbol(std::move(InitSymbol)) { - assert(this->JD && "Cannot initialize with null JITDylib"); + : JD(RT->getJITDylib()), RT(std::move(RT)), + SymbolFlags(std::move(SymbolFlags)), InitSymbol(std::move(InitSymbol)) { assert(!this->SymbolFlags.empty() && "Materializing nothing?"); } - JITDylibSP JD; + JITDylib &JD; + ResourceTrackerSP RT; SymbolFlagsMap SymbolFlags; SymbolStringPtr InitSymbol; }; @@ -913,12 +915,26 @@ public: const SymbolLookupSet &LookupSet) = 0; }; -/// A symbol table that supports asynchoronous symbol queries. +/// Represents a JIT'd dynamic library. +/// +/// This class aims to mimic the behavior of a regular dylib or shared object, +/// but without requiring the contained program representations to be compiled +/// up-front. The JITDylib's content is defined by adding MaterializationUnits, +/// and contained MaterializationUnits will typically rely on the JITDylib's +/// links-against order to resolve external references (similar to a regular +/// dylib). +/// +/// The JITDylib object is a thin wrapper that references state held by the +/// ExecutionSession. JITDylibs can be removed, clearing this underlying state +/// and leaving the JITDylib object in a defunct state. In this state the +/// JITDylib's name is guaranteed to remain accessible. If the ExecutionSession +/// is still alive then other operations are callable but will return an Error +/// or null result (depending on the API). It is illegal to call any operation +/// other than getName on a JITDylib after the ExecutionSession has been torn +/// down. /// -/// Represents a virtual shared object. Instances can not be copied or moved, so -/// their addresses may be used as keys for resource management. -/// JITDylib state changes must be made via an ExecutionSession to guarantee -/// that they are synchronized with respect to other JITDylib operations. +/// JITDylibs cannot be moved or copied. Their address is stable, and useful as +/// a key in some JIT data structures. class JITDylib : public ThreadSafeRefCountedBase<JITDylib>, public jitlink::JITLinkDylib { friend class AsynchronousSymbolQuery; @@ -931,10 +947,21 @@ public: JITDylib &operator=(const JITDylib &) = delete; JITDylib(JITDylib &&) = delete; JITDylib &operator=(JITDylib &&) = delete; + ~JITDylib(); /// Get a reference to the ExecutionSession for this JITDylib. + /// + /// It is legal to call this method on a defunct JITDylib, however the result + /// will only usable if the ExecutionSession is still alive. If this JITDylib + /// is held by an error that may have torn down the JIT then the result + /// should not be used. ExecutionSession &getExecutionSession() const { return ES; } + /// Dump current JITDylib state to OS. + /// + /// It is legal to call this method on a defunct JITDylib. + void dump(raw_ostream &OS); + /// Calls remove on all trackers currently associated with this JITDylib. /// Does not run static deinits. /// @@ -942,12 +969,21 @@ public: /// added concurrently while the clear is underway, and the newly added /// code will *not* be cleared. Adding new code concurrently with a clear /// is usually a bug and should be avoided. + /// + /// It is illegal to call this method on a defunct JITDylib and the client + /// is responsible for ensuring that they do not do so. Error clear(); /// Get the default resource tracker for this JITDylib. + /// + /// It is illegal to call this method on a defunct JITDylib and the client + /// is responsible for ensuring that they do not do so. ResourceTrackerSP getDefaultResourceTracker(); /// Create a resource tracker for this JITDylib. + /// + /// It is illegal to call this method on a defunct JITDylib and the client + /// is responsible for ensuring that they do not do so. ResourceTrackerSP createResourceTracker(); /// Adds a definition generator to this JITDylib and returns a referenece to @@ -956,6 +992,9 @@ public: /// When JITDylibs are searched during lookup, if no existing definition of /// a symbol is found, then any generators that have been added are run (in /// the order that they were added) to potentially generate a definition. + /// + /// It is illegal to call this method on a defunct JITDylib and the client + /// is responsible for ensuring that they do not do so. template <typename GeneratorT> GeneratorT &addGenerator(std::unique_ptr<GeneratorT> DefGenerator); @@ -963,6 +1002,9 @@ public: /// /// The given generator must exist in this JITDylib's generators list (i.e. /// have been added and not yet removed). + /// + /// It is illegal to call this method on a defunct JITDylib and the client + /// is responsible for ensuring that they do not do so. void removeGenerator(DefinitionGenerator &G); /// Set the link order to be used when fixing up definitions in JITDylib. @@ -983,26 +1025,41 @@ public: /// as the first in the link order (instead of this dylib) ensures that /// definitions within this dylib resolve to the lazy-compiling stubs, /// rather than immediately materializing the definitions in this dylib. + /// + /// It is illegal to call this method on a defunct JITDylib and the client + /// is responsible for ensuring that they do not do so. void setLinkOrder(JITDylibSearchOrder NewSearchOrder, bool LinkAgainstThisJITDylibFirst = true); /// Add the given JITDylib to the link order for definitions in this /// JITDylib. + /// + /// It is illegal to call this method on a defunct JITDylib and the client + /// is responsible for ensuring that they do not do so. void addToLinkOrder(JITDylib &JD, JITDylibLookupFlags JDLookupFlags = JITDylibLookupFlags::MatchExportedSymbolsOnly); /// Replace OldJD with NewJD in the link order if OldJD is present. /// Otherwise this operation is a no-op. + /// + /// It is illegal to call this method on a defunct JITDylib and the client + /// is responsible for ensuring that they do not do so. void replaceInLinkOrder(JITDylib &OldJD, JITDylib &NewJD, JITDylibLookupFlags JDLookupFlags = JITDylibLookupFlags::MatchExportedSymbolsOnly); /// Remove the given JITDylib from the link order for this JITDylib if it is /// present. Otherwise this operation is a no-op. + /// + /// It is illegal to call this method on a defunct JITDylib and the client + /// is responsible for ensuring that they do not do so. void removeFromLinkOrder(JITDylib &JD); /// Do something with the link order (run under the session lock). + /// + /// It is illegal to call this method on a defunct JITDylib and the client + /// is responsible for ensuring that they do not do so. template <typename Func> auto withLinkOrderDo(Func &&F) -> decltype(F(std::declval<const JITDylibSearchOrder &>())); @@ -1014,6 +1071,9 @@ public: /// /// This overload always takes ownership of the MaterializationUnit. If any /// errors occur, the MaterializationUnit consumed. + /// + /// It is illegal to call this method on a defunct JITDylib and the client + /// is responsible for ensuring that they do not do so. template <typename MaterializationUnitType> Error define(std::unique_ptr<MaterializationUnitType> &&MU, ResourceTrackerSP RT = nullptr); @@ -1025,6 +1085,9 @@ public: /// generated. If an error occurs, ownership remains with the caller. This /// may allow the caller to modify the MaterializationUnit to correct the /// issue, then re-call define. + /// + /// It is illegal to call this method on a defunct JITDylib and the client + /// is responsible for ensuring that they do not do so. template <typename MaterializationUnitType> Error define(std::unique_ptr<MaterializationUnitType> &MU, ResourceTrackerSP RT = nullptr); @@ -1039,28 +1102,40 @@ public: /// /// On success, all symbols are removed. On failure, the JITDylib state is /// left unmodified (no symbols are removed). + /// + /// It is illegal to call this method on a defunct JITDylib and the client + /// is responsible for ensuring that they do not do so. Error remove(const SymbolNameSet &Names); - /// Dump current JITDylib state to OS. - void dump(raw_ostream &OS); - /// Returns the given JITDylibs and all of their transitive dependencies in /// DFS order (based on linkage relationships). Each JITDylib will appear /// only once. + /// + /// It is illegal to call this method on a defunct JITDylib and the client + /// is responsible for ensuring that they do not do so. static std::vector<JITDylibSP> getDFSLinkOrder(ArrayRef<JITDylibSP> JDs); /// Returns the given JITDylibs and all of their transitive dependensies in /// reverse DFS order (based on linkage relationships). Each JITDylib will /// appear only once. + /// + /// It is illegal to call this method on a defunct JITDylib and the client + /// is responsible for ensuring that they do not do so. static std::vector<JITDylibSP> getReverseDFSLinkOrder(ArrayRef<JITDylibSP> JDs); /// Return this JITDylib and its transitive dependencies in DFS order /// based on linkage relationships. + /// + /// It is illegal to call this method on a defunct JITDylib and the client + /// is responsible for ensuring that they do not do so. std::vector<JITDylibSP> getDFSLinkOrder(); /// Rteurn this JITDylib and its transitive dependencies in reverse DFS order /// based on linkage relationships. + /// + /// It is illegal to call this method on a defunct JITDylib and the client + /// is responsible for ensuring that they do not do so. std::vector<JITDylibSP> getReverseDFSLinkOrder(); private: @@ -1151,7 +1226,6 @@ private: JITDylib(ExecutionSession &ES, std::string Name); - ResourceTrackerSP getTracker(MaterializationResponsibility &MR); std::pair<AsynchronousSymbolQuerySet, std::shared_ptr<SymbolDependenceMap>> removeTracker(ResourceTracker &RT); @@ -1197,8 +1271,8 @@ private: failSymbols(FailedSymbolsWorklist); ExecutionSession &ES; + enum { Open, Closing, Closed } State = Open; std::mutex GeneratorsMutex; - bool Open = true; SymbolTable Symbols; UnmaterializedInfosMap UnmaterializedInfos; MaterializingInfosMap MaterializingInfos; @@ -1208,7 +1282,8 @@ private: // Map trackers to sets of symbols tracked. DenseMap<ResourceTracker *, SymbolNameVector> TrackerSymbols; - DenseMap<MaterializationResponsibility *, ResourceTracker *> MRTrackers; + DenseMap<ResourceTracker *, DenseSet<MaterializationResponsibility *>> + TrackerMRs; }; /// Platforms set up standard symbols and mediate interactions between dynamic @@ -1363,6 +1438,18 @@ public: /// If no Platform is attached this call is equivalent to createBareJITDylib. Expected<JITDylib &> createJITDylib(std::string Name); + /// Closes the given JITDylib. + /// + /// This method clears all resources held for the JITDylib, puts it in the + /// closed state, and clears all references held by the ExecutionSession and + /// other JITDylibs. No further code can be added to the JITDylib, and the + /// object will be freed once any remaining JITDylibSPs to it are destroyed. + /// + /// This method does *not* run static destructors. + /// + /// This method can only be called once for each JITDylib. + Error removeJITDylib(JITDylib &JD); + /// Set the error reporter function. ExecutionSession &setErrorReporter(ErrorReporter ReportError) { this->ReportError = std::move(ReportError); @@ -1574,9 +1661,9 @@ private: SymbolStringPtr InitSymbol) { auto &JD = RT.getJITDylib(); std::unique_ptr<MaterializationResponsibility> MR( - new MaterializationResponsibility(&JD, std::move(Symbols), + new MaterializationResponsibility(&RT, std::move(Symbols), std::move(InitSymbol))); - JD.MRTrackers[MR.get()] = &RT; + JD.TrackerMRs[&RT].insert(MR.get()); return MR; } @@ -1660,18 +1747,17 @@ private: JITDispatchHandlers; }; -inline ExecutionSession &MaterializationResponsibility::getExecutionSession() { - return JD->getExecutionSession(); +inline ExecutionSession & +MaterializationResponsibility::getExecutionSession() const { + return JD.getExecutionSession(); } template <typename Func> Error MaterializationResponsibility::withResourceKeyDo(Func &&F) const { - return JD->getExecutionSession().runSessionLocked([&]() -> Error { - auto I = JD->MRTrackers.find(this); - assert(I != JD->MRTrackers.end() && "No tracker for this MR"); - if (I->second->isDefunct()) - return make_error<ResourceTrackerDefunct>(I->second); - F(I->second->getKeyUnsafe()); + return JD.getExecutionSession().runSessionLocked([&]() -> Error { + if (RT->isDefunct()) + return make_error<ResourceTrackerDefunct>(RT); + F(RT->getKeyUnsafe()); return Error::success(); }); } @@ -1679,14 +1765,17 @@ Error MaterializationResponsibility::withResourceKeyDo(Func &&F) const { template <typename GeneratorT> GeneratorT &JITDylib::addGenerator(std::unique_ptr<GeneratorT> DefGenerator) { auto &G = *DefGenerator; - std::lock_guard<std::mutex> Lock(GeneratorsMutex); - DefGenerators.push_back(std::move(DefGenerator)); + ES.runSessionLocked([&] { + assert(State == Open && "Cannot add generator to closed JITDylib"); + DefGenerators.push_back(std::move(DefGenerator)); + }); return G; } template <typename Func> auto JITDylib::withLinkOrderDo(Func &&F) -> decltype(F(std::declval<const JITDylibSearchOrder &>())) { + assert(State == Open && "Cannot use link order of closed JITDylib"); return ES.runSessionLocked([&]() { return F(LinkOrder); }); } @@ -1715,6 +1804,8 @@ Error JITDylib::define(std::unique_ptr<MaterializationUnitType> &&MU, }); return ES.runSessionLocked([&, this]() -> Error { + assert(State == Open && "JD is defunct"); + if (auto Err = defineImpl(*MU)) return Err; @@ -1756,6 +1847,8 @@ Error JITDylib::define(std::unique_ptr<MaterializationUnitType> &MU, }); return ES.runSessionLocked([&, this]() -> Error { + assert(State == Open && "JD is defunct"); + if (auto Err = defineImpl(*MU)) return Err; @@ -1800,50 +1893,50 @@ private: // --------------------------------------------- inline MaterializationResponsibility::~MaterializationResponsibility() { - JD->getExecutionSession().OL_destroyMaterializationResponsibility(*this); + getExecutionSession().OL_destroyMaterializationResponsibility(*this); } inline SymbolNameSet MaterializationResponsibility::getRequestedSymbols() const { - return JD->getExecutionSession().OL_getRequestedSymbols(*this); + return getExecutionSession().OL_getRequestedSymbols(*this); } inline Error MaterializationResponsibility::notifyResolved( const SymbolMap &Symbols) { - return JD->getExecutionSession().OL_notifyResolved(*this, Symbols); + return getExecutionSession().OL_notifyResolved(*this, Symbols); } inline Error MaterializationResponsibility::notifyEmitted() { - return JD->getExecutionSession().OL_notifyEmitted(*this); + return getExecutionSession().OL_notifyEmitted(*this); } inline Error MaterializationResponsibility::defineMaterializing( SymbolFlagsMap SymbolFlags) { - return JD->getExecutionSession().OL_defineMaterializing( - *this, std::move(SymbolFlags)); + return getExecutionSession().OL_defineMaterializing(*this, + std::move(SymbolFlags)); } inline void MaterializationResponsibility::failMaterialization() { - JD->getExecutionSession().OL_notifyFailed(*this); + getExecutionSession().OL_notifyFailed(*this); } inline Error MaterializationResponsibility::replace( std::unique_ptr<MaterializationUnit> MU) { - return JD->getExecutionSession().OL_replace(*this, std::move(MU)); + return getExecutionSession().OL_replace(*this, std::move(MU)); } inline Expected<std::unique_ptr<MaterializationResponsibility>> MaterializationResponsibility::delegate(const SymbolNameSet &Symbols) { - return JD->getExecutionSession().OL_delegate(*this, Symbols); + return getExecutionSession().OL_delegate(*this, Symbols); } inline void MaterializationResponsibility::addDependencies( const SymbolStringPtr &Name, const SymbolDependenceMap &Dependencies) { - JD->getExecutionSession().OL_addDependencies(*this, Name, Dependencies); + getExecutionSession().OL_addDependencies(*this, Name, Dependencies); } inline void MaterializationResponsibility::addDependenciesForAll( const SymbolDependenceMap &Dependencies) { - JD->getExecutionSession().OL_addDependenciesForAll(*this, Dependencies); + getExecutionSession().OL_addDependenciesForAll(*this, Dependencies); } } // End namespace orc diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h index 2fec3e7e4230..d2f9bac16e5a 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h @@ -120,6 +120,10 @@ enum class OMPScheduleType { Runtime = 37, Auto = 38, // auto + StaticBalancedChunked = 45, // static with chunk adjustment (e.g., simd) + GuidedSimd = 46, // guided with chunk adjustment + RuntimeSimd = 47, // runtime with chunk adjustment + ModifierMonotonic = (1 << 29), // Set if the monotonic schedule modifier was present ModifierNonmonotonic = diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h index b4e099e4ec20..bcf52278ccbb 100644 --- a/llvm/include/llvm/IR/IRBuilder.h +++ b/llvm/include/llvm/IR/IRBuilder.h @@ -1670,32 +1670,6 @@ public: return CreateAlignedLoad(Ty, Ptr, MaybeAlign(), isVolatile, Name); } - // Deprecated [opaque pointer types] - LLVM_ATTRIBUTE_DEPRECATED(LoadInst *CreateLoad(Value *Ptr, - const char *Name), - "Use the version that explicitly specifies the " - "loaded type instead") { - return CreateLoad(Ptr->getType()->getPointerElementType(), Ptr, Name); - } - - // Deprecated [opaque pointer types] - LLVM_ATTRIBUTE_DEPRECATED(LoadInst *CreateLoad(Value *Ptr, - const Twine &Name = ""), - "Use the version that explicitly specifies the " - "loaded type instead") { - return CreateLoad(Ptr->getType()->getPointerElementType(), Ptr, Name); - } - - // Deprecated [opaque pointer types] - LLVM_ATTRIBUTE_DEPRECATED(LoadInst *CreateLoad(Value *Ptr, - bool isVolatile, - const Twine &Name = ""), - "Use the version that explicitly specifies the " - "loaded type instead") { - return CreateLoad(Ptr->getType()->getPointerElementType(), Ptr, isVolatile, - Name); - } - StoreInst *CreateStore(Value *Val, Value *Ptr, bool isVolatile = false) { return CreateAlignedStore(Val, Ptr, MaybeAlign(), isVolatile); } @@ -1719,35 +1693,6 @@ public: return Insert(new LoadInst(Ty, Ptr, Twine(), isVolatile, *Align), Name); } - // Deprecated [opaque pointer types] - LLVM_ATTRIBUTE_DEPRECATED(LoadInst *CreateAlignedLoad(Value *Ptr, - MaybeAlign Align, - const char *Name), - "Use the version that explicitly specifies the " - "loaded type instead") { - return CreateAlignedLoad(Ptr->getType()->getPointerElementType(), Ptr, - Align, Name); - } - // Deprecated [opaque pointer types] - LLVM_ATTRIBUTE_DEPRECATED(LoadInst *CreateAlignedLoad(Value *Ptr, - MaybeAlign Align, - const Twine &Name = ""), - "Use the version that explicitly specifies the " - "loaded type instead") { - return CreateAlignedLoad(Ptr->getType()->getPointerElementType(), Ptr, - Align, Name); - } - // Deprecated [opaque pointer types] - LLVM_ATTRIBUTE_DEPRECATED(LoadInst *CreateAlignedLoad(Value *Ptr, - MaybeAlign Align, - bool isVolatile, - const Twine &Name = ""), - "Use the version that explicitly specifies the " - "loaded type instead") { - return CreateAlignedLoad(Ptr->getType()->getPointerElementType(), Ptr, - Align, isVolatile, Name); - } - StoreInst *CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile = false) { if (!Align) { @@ -1788,14 +1733,6 @@ public: return Insert(new AtomicRMWInst(Op, Ptr, Val, *Align, Ordering, SSID)); } - LLVM_ATTRIBUTE_DEPRECATED( - Value *CreateGEP(Value *Ptr, ArrayRef<Value *> IdxList, - const Twine &Name = ""), - "Use the version with explicit element type instead") { - return CreateGEP(Ptr->getType()->getScalarType()->getPointerElementType(), - Ptr, IdxList, Name); - } - Value *CreateGEP(Type *Ty, Value *Ptr, ArrayRef<Value *> IdxList, const Twine &Name = "") { if (auto *PC = dyn_cast<Constant>(Ptr)) { @@ -1810,15 +1747,6 @@ public: return Insert(GetElementPtrInst::Create(Ty, Ptr, IdxList), Name); } - LLVM_ATTRIBUTE_DEPRECATED( - Value *CreateInBoundsGEP(Value *Ptr, ArrayRef<Value *> IdxList, - const Twine &Name = ""), - "Use the version with explicit element type instead") { - return CreateInBoundsGEP( - Ptr->getType()->getScalarType()->getPointerElementType(), Ptr, IdxList, - Name); - } - Value *CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef<Value *> IdxList, const Twine &Name = "") { if (auto *PC = dyn_cast<Constant>(Ptr)) { @@ -1849,15 +1777,6 @@ public: return Insert(GetElementPtrInst::CreateInBounds(Ty, Ptr, Idx), Name); } - LLVM_ATTRIBUTE_DEPRECATED( - Value *CreateConstGEP1_32(Value *Ptr, unsigned Idx0, - const Twine &Name = ""), - "Use the version with explicit element type instead") { - return CreateConstGEP1_32( - Ptr->getType()->getScalarType()->getPointerElementType(), Ptr, Idx0, - Name); - } - Value *CreateConstGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0, const Twine &Name = "") { Value *Idx = ConstantInt::get(Type::getInt32Ty(Context), Idx0); @@ -1914,15 +1833,6 @@ public: return Insert(GetElementPtrInst::Create(Ty, Ptr, Idx), Name); } - LLVM_ATTRIBUTE_DEPRECATED( - Value *CreateConstGEP1_64(Value *Ptr, uint64_t Idx0, - const Twine &Name = ""), - "Use the version with explicit element type instead") { - return CreateConstGEP1_64( - Ptr->getType()->getScalarType()->getPointerElementType(), Ptr, Idx0, - Name); - } - Value *CreateConstInBoundsGEP1_64(Type *Ty, Value *Ptr, uint64_t Idx0, const Twine &Name = "") { Value *Idx = ConstantInt::get(Type::getInt64Ty(Context), Idx0); @@ -1933,15 +1843,6 @@ public: return Insert(GetElementPtrInst::CreateInBounds(Ty, Ptr, Idx), Name); } - LLVM_ATTRIBUTE_DEPRECATED( - Value *CreateConstInBoundsGEP1_64(Value *Ptr, uint64_t Idx0, - const Twine &Name = ""), - "Use the version with explicit element type instead") { - return CreateConstInBoundsGEP1_64( - Ptr->getType()->getScalarType()->getPointerElementType(), Ptr, Idx0, - Name); - } - Value *CreateConstGEP2_64(Type *Ty, Value *Ptr, uint64_t Idx0, uint64_t Idx1, const Twine &Name = "") { Value *Idxs[] = { @@ -1955,15 +1856,6 @@ public: return Insert(GetElementPtrInst::Create(Ty, Ptr, Idxs), Name); } - LLVM_ATTRIBUTE_DEPRECATED( - Value *CreateConstGEP2_64(Value *Ptr, uint64_t Idx0, uint64_t Idx1, - const Twine &Name = ""), - "Use the version with explicit element type instead") { - return CreateConstGEP2_64( - Ptr->getType()->getScalarType()->getPointerElementType(), Ptr, Idx0, - Idx1, Name); - } - Value *CreateConstInBoundsGEP2_64(Type *Ty, Value *Ptr, uint64_t Idx0, uint64_t Idx1, const Twine &Name = "") { Value *Idxs[] = { @@ -1977,28 +1869,11 @@ public: return Insert(GetElementPtrInst::CreateInBounds(Ty, Ptr, Idxs), Name); } - LLVM_ATTRIBUTE_DEPRECATED( - Value *CreateConstInBoundsGEP2_64(Value *Ptr, uint64_t Idx0, - uint64_t Idx1, const Twine &Name = ""), - "Use the version with explicit element type instead") { - return CreateConstInBoundsGEP2_64( - Ptr->getType()->getScalarType()->getPointerElementType(), Ptr, Idx0, - Idx1, Name); - } - Value *CreateStructGEP(Type *Ty, Value *Ptr, unsigned Idx, const Twine &Name = "") { return CreateConstInBoundsGEP2_32(Ty, Ptr, 0, Idx, Name); } - LLVM_ATTRIBUTE_DEPRECATED( - Value *CreateStructGEP(Value *Ptr, unsigned Idx, const Twine &Name = ""), - "Use the version with explicit element type instead") { - return CreateConstInBoundsGEP2_32( - Ptr->getType()->getScalarType()->getPointerElementType(), Ptr, 0, Idx, - Name); - } - /// Same as CreateGlobalString, but return a pointer with "i8*" type /// instead of a pointer to array of i8. /// diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h index 6d32a898b668..046e9b5e809e 100644 --- a/llvm/include/llvm/IR/Instructions.h +++ b/llvm/include/llvm/IR/Instructions.h @@ -975,15 +975,6 @@ public: NameStr, InsertAtEnd); } - LLVM_ATTRIBUTE_DEPRECATED(static GetElementPtrInst *CreateInBounds( - Value *Ptr, ArrayRef<Value *> IdxList, const Twine &NameStr = "", - Instruction *InsertBefore = nullptr), - "Use the version with explicit element type instead") { - return CreateInBounds( - Ptr->getType()->getScalarType()->getPointerElementType(), Ptr, IdxList, - NameStr, InsertBefore); - } - /// Create an "inbounds" getelementptr. See the documentation for the /// "inbounds" flag in LangRef.html for details. static GetElementPtrInst * @@ -996,15 +987,6 @@ public: return GEP; } - LLVM_ATTRIBUTE_DEPRECATED(static GetElementPtrInst *CreateInBounds( - Value *Ptr, ArrayRef<Value *> IdxList, const Twine &NameStr, - BasicBlock *InsertAtEnd), - "Use the version with explicit element type instead") { - return CreateInBounds( - Ptr->getType()->getScalarType()->getPointerElementType(), Ptr, IdxList, - NameStr, InsertAtEnd); - } - static GetElementPtrInst *CreateInBounds(Type *PointeeType, Value *Ptr, ArrayRef<Value *> IdxList, const Twine &NameStr, diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td index 8290342c0d51..b01fa10763b8 100644 --- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -524,6 +524,20 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.". def int_ppc_altivec_vprtybq : GCCBuiltin<"__builtin_altivec_vprtybq">, Intrinsic<[llvm_v1i128_ty],[llvm_v1i128_ty],[IntrNoMem]>; + // BCD intrinsics. + def int_ppc_bcdadd : GCCBuiltin<"__builtin_ppc_bcdadd">, Intrinsic< + [llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], + [IntrNoMem, ImmArg<ArgIndex<2>>]>; + def int_ppc_bcdadd_p : GCCBuiltin<"__builtin_ppc_bcdadd_p">, Intrinsic< + [llvm_i32_ty], [llvm_i32_ty, llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem, ImmArg<ArgIndex<0>>]>; + def int_ppc_bcdsub : GCCBuiltin<"__builtin_ppc_bcdsub">, Intrinsic< + [llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], + [IntrNoMem, ImmArg<ArgIndex<2>>]>; + def int_ppc_bcdsub_p : GCCBuiltin<"__builtin_ppc_bcdsub_p">, Intrinsic< + [llvm_i32_ty], [llvm_i32_ty, llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem, ImmArg<ArgIndex<0>>]>; + // P10 Vector Extract with Mask def int_ppc_altivec_vextractbm : GCCBuiltin<"__builtin_altivec_vextractbm">, Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty], [IntrNoMem]>; @@ -1073,6 +1087,10 @@ let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.". GCCBuiltin<"__builtin_altivec_crypto_vpermxor">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; + def int_ppc_altivec_crypto_vpermxor_be : + GCCBuiltin<"__builtin_altivec_crypto_vpermxor_be">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, + llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; def int_ppc_altivec_crypto_vshasigmad : GCCBuiltin<"__builtin_altivec_crypto_vshasigmad">, diff --git a/llvm/include/llvm/IR/Operator.h b/llvm/include/llvm/IR/Operator.h index b83d83f0d0ab..7d232bba0864 100644 --- a/llvm/include/llvm/IR/Operator.h +++ b/llvm/include/llvm/IR/Operator.h @@ -250,8 +250,16 @@ public: bool operator!=(const FastMathFlags &OtherFlags) const { return Flags != OtherFlags.Flags; } + + /// Print fast-math flags to \p O. + void print(raw_ostream &O) const; }; +inline raw_ostream &operator<<(raw_ostream &O, FastMathFlags FMF) { + FMF.print(O); + return O; +} + /// Utility class for floating point operations which can have /// information about relaxed accuracy requirements attached to them. class FPMathOperator : public Operator { diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h index b858733530e3..320deb80bb1f 100644 --- a/llvm/include/llvm/IR/PatternMatch.h +++ b/llvm/include/llvm/IR/PatternMatch.h @@ -2285,6 +2285,31 @@ m_Not(const ValTy &V) { return m_c_Xor(V, m_AllOnes()); } +template <typename ValTy> struct NotForbidUndef_match { + ValTy Val; + NotForbidUndef_match(const ValTy &V) : Val(V) {} + + template <typename OpTy> bool match(OpTy *V) { + // We do not use m_c_Xor because that could match an arbitrary APInt that is + // not -1 as C and then fail to match the other operand if it is -1. + // This code should still work even when both operands are constants. + Value *X; + const APInt *C; + if (m_Xor(m_Value(X), m_APIntForbidUndef(C)).match(V) && C->isAllOnes()) + return Val.match(X); + if (m_Xor(m_APIntForbidUndef(C), m_Value(X)).match(V) && C->isAllOnes()) + return Val.match(X); + return false; + } +}; + +/// Matches a bitwise 'not' as 'xor V, -1' or 'xor -1, V'. For vectors, the +/// constant value must be composed of only -1 scalar elements. +template <typename ValTy> +inline NotForbidUndef_match<ValTy> m_NotForbidUndef(const ValTy &V) { + return NotForbidUndef_match<ValTy>(V); +} + /// Matches an SMin with LHS and RHS in either order. template <typename LHS, typename RHS> inline MaxMin_match<ICmpInst, LHS, RHS, smin_pred_ty, true> diff --git a/llvm/include/llvm/IR/Type.h b/llvm/include/llvm/IR/Type.h index 47431adc6fac..c899c46d4055 100644 --- a/llvm/include/llvm/IR/Type.h +++ b/llvm/include/llvm/IR/Type.h @@ -368,6 +368,8 @@ public: Type *getPointerElementType() const { assert(getTypeID() == PointerTyID); + assert(NumContainedTys && + "Attempting to get element type of opaque pointer"); return ContainedTys[0]; } diff --git a/llvm/include/llvm/IR/VPIntrinsics.def b/llvm/include/llvm/IR/VPIntrinsics.def index 361d6357b303..a3c6b4e70bf5 100644 --- a/llvm/include/llvm/IR/VPIntrinsics.def +++ b/llvm/include/llvm/IR/VPIntrinsics.def @@ -38,7 +38,7 @@ // is one VP intrinsic that maps directly to one SDNode that goes by the // same name. Since the operands are also the same, we open the property // scopes for both the VPIntrinsic and the SDNode at once. -// \p SDOPC The SelectionDAG Node id (eg VP_ADD). +// \p VPSD The SelectionDAG Node id (eg VP_ADD). // \p LEGALPOS The operand position of the SDNode that is used for legalizing // this SDNode. This can be `-1`, in which case the return type of // the SDNode is used. @@ -46,12 +46,12 @@ // \p MASKPOS The mask operand position. // \p EVLPOS The explicit vector length operand position. #ifndef BEGIN_REGISTER_VP_SDNODE -#define BEGIN_REGISTER_VP_SDNODE(SDOPC, LEGALPOS, TDNAME, MASKPOS, EVLPOS) +#define BEGIN_REGISTER_VP_SDNODE(VPSD, LEGALPOS, TDNAME, MASKPOS, EVLPOS) #endif // End the property scope of a new VP SDNode. #ifndef END_REGISTER_VP_SDNODE -#define END_REGISTER_VP_SDNODE(SDOPC) +#define END_REGISTER_VP_SDNODE(VPSD) #endif // Helper macros for the common "1:1 - Intrinsic : SDNode" case. @@ -60,22 +60,21 @@ // same name. Since the operands are also the same, we open the property // scopes for both the VPIntrinsic and the SDNode at once. // -// \p INTRIN The canonical name (eg `vp_add`, which at the same time is the +// \p VPID The canonical name (eg `vp_add`, which at the same time is the // name of the intrinsic and the TableGen def of the SDNode). // \p MASKPOS The mask operand position. // \p EVLPOS The explicit vector length operand position. -// \p SDOPC The SelectionDAG Node id (eg VP_ADD). +// \p VPSD The SelectionDAG Node id (eg VP_ADD). // \p LEGALPOS The operand position of the SDNode that is used for legalizing // this SDNode. This can be `-1`, in which case the return type of // the SDNode is used. -#define BEGIN_REGISTER_VP(INTRIN, MASKPOS, EVLPOS, SDOPC, LEGALPOS) \ -BEGIN_REGISTER_VP_INTRINSIC(INTRIN, MASKPOS, EVLPOS) \ -BEGIN_REGISTER_VP_SDNODE(SDOPC, LEGALPOS, INTRIN, MASKPOS, EVLPOS) - -#define END_REGISTER_VP(INTRIN, SDOPC) \ -END_REGISTER_VP_INTRINSIC(INTRIN) \ -END_REGISTER_VP_SDNODE(SDOPC) +#define BEGIN_REGISTER_VP(VPID, MASKPOS, EVLPOS, VPSD, LEGALPOS) \ + BEGIN_REGISTER_VP_INTRINSIC(VPID, MASKPOS, EVLPOS) \ + BEGIN_REGISTER_VP_SDNODE(VPSD, LEGALPOS, VPID, MASKPOS, EVLPOS) +#define END_REGISTER_VP(VPID, VPSD) \ + END_REGISTER_VP_INTRINSIC(VPID) \ + END_REGISTER_VP_SDNODE(VPSD) // The following macros attach properties to the scope they are placed in. This // assigns the property to the VP Intrinsic and/or SDNode that belongs to the @@ -84,9 +83,9 @@ END_REGISTER_VP_SDNODE(SDOPC) // Property Macros { // The intrinsic and/or SDNode has the same function as this LLVM IR Opcode. -// \p OPC The standard IR opcode. -#ifndef HANDLE_VP_TO_OPC -#define HANDLE_VP_TO_OPC(OPC) +// \p OPC The opcode of the instruction with the same function. +#ifndef VP_PROPERTY_FUNCTIONAL_OPC +#define VP_PROPERTY_FUNCTIONAL_OPC(OPC) #endif // Whether the intrinsic may have a rounding mode or exception behavior operand @@ -96,34 +95,30 @@ END_REGISTER_VP_SDNODE(SDOPC) // \p HASEXCEPT '1' if the intrinsic can have an exception behavior operand // bundle, '0' otherwise. // \p INTRINID The constrained fp intrinsic this VP intrinsic corresponds to. -#ifndef HANDLE_VP_TO_CONSTRAINEDFP -#define HANDLE_VP_TO_CONSTRAINEDFP(HASROUND, HASEXCEPT, INTRINID) +#ifndef VP_PROPERTY_CONSTRAINEDFP +#define VP_PROPERTY_CONSTRAINEDFP(HASROUND, HASEXCEPT, INTRINID) #endif // Map this VP intrinsic to its canonical functional intrinsic. -#ifndef HANDLE_VP_TO_INTRIN -#define HANDLE_VP_TO_INTRIN(ID) +// \p INTRIN The non-VP intrinsics with the same function. +#ifndef VP_PROPERTY_FUNCTIONAL_INTRINSIC +#define VP_PROPERTY_FUNCTIONAL_INTRINSIC(INTRIN) #endif // This VP Intrinsic is a memory operation // The pointer arg is at POINTERPOS and the data arg is at DATAPOS. -#ifndef HANDLE_VP_IS_MEMOP -#define HANDLE_VP_IS_MEMOP(VPID, POINTERPOS, DATAPOS) +#ifndef VP_PROPERTY_MEMOP +#define VP_PROPERTY_MEMOP(POINTERPOS, DATAPOS) #endif // Map this VP reduction intrinsic to its reduction operand positions. -#ifndef HANDLE_VP_REDUCTION -#define HANDLE_VP_REDUCTION(ID, STARTPOS, VECTORPOS) +#ifndef VP_PROPERTY_REDUCTION +#define VP_PROPERTY_REDUCTION(STARTPOS, VECTORPOS) #endif // A property to infer VP binary-op SDNode opcodes automatically. -#ifndef PROPERTY_VP_BINARYOP_SDNODE -#define PROPERTY_VP_BINARYOP_SDNODE(ID) -#endif - -// A property to infer VP reduction SDNode opcodes automatically. -#ifndef PROPERTY_VP_REDUCTION_SDNODE -#define PROPERTY_VP_REDUCTION_SDNODE(ID) +#ifndef VP_PROPERTY_BINARYOP +#define VP_PROPERTY_BINARYOP #endif /// } Property Macros @@ -132,15 +127,14 @@ END_REGISTER_VP_SDNODE(SDOPC) // Specialized helper macro for integer binary operators (%x, %y, %mask, %evl). #ifdef HELPER_REGISTER_BINARY_INT_VP -#error "The internal helper macro HELPER_REGISTER_BINARY_INT_VP is already defined!" +#error \ + "The internal helper macro HELPER_REGISTER_BINARY_INT_VP is already defined!" #endif -#define HELPER_REGISTER_BINARY_INT_VP(INTRIN, SDOPC, OPC) \ -BEGIN_REGISTER_VP(INTRIN, 2, 3, SDOPC, -1) \ -HANDLE_VP_TO_OPC(OPC) \ -PROPERTY_VP_BINARYOP_SDNODE(SDOPC) \ -END_REGISTER_VP(INTRIN, SDOPC) - - +#define HELPER_REGISTER_BINARY_INT_VP(VPID, VPSD, IROPC) \ + BEGIN_REGISTER_VP(VPID, 2, 3, VPSD, -1) \ + VP_PROPERTY_FUNCTIONAL_OPC(IROPC) \ + VP_PROPERTY_BINARYOP \ + END_REGISTER_VP(VPID, VPSD) // llvm.vp.add(x,y,mask,vlen) HELPER_REGISTER_BINARY_INT_VP(vp_add, VP_ADD, Add) @@ -193,12 +187,12 @@ HELPER_REGISTER_BINARY_INT_VP(vp_xor, VP_XOR, Xor) #error \ "The internal helper macro HELPER_REGISTER_BINARY_FP_VP is already defined!" #endif -#define HELPER_REGISTER_BINARY_FP_VP(OPSUFFIX, SDOPC, OPC) \ - BEGIN_REGISTER_VP(vp_##OPSUFFIX, 2, 3, SDOPC, -1) \ - HANDLE_VP_TO_OPC(OPC) \ - HANDLE_VP_TO_CONSTRAINEDFP(1, 1, experimental_constrained_##OPSUFFIX) \ - PROPERTY_VP_BINARYOP_SDNODE(SDOPC) \ - END_REGISTER_VP(vp_##OPSUFFIX, SDOPC) +#define HELPER_REGISTER_BINARY_FP_VP(OPSUFFIX, VPSD, IROPC) \ + BEGIN_REGISTER_VP(vp_##OPSUFFIX, 2, 3, VPSD, -1) \ + VP_PROPERTY_FUNCTIONAL_OPC(IROPC) \ + VP_PROPERTY_CONSTRAINEDFP(1, 1, experimental_constrained_##OPSUFFIX) \ + VP_PROPERTY_BINARYOP \ + END_REGISTER_VP(vp_##OPSUFFIX, VPSD) // llvm.vp.fadd(x,y,mask,vlen) HELPER_REGISTER_BINARY_FP_VP(fadd, VP_FADD, FAdd) @@ -224,34 +218,34 @@ HELPER_REGISTER_BINARY_FP_VP(frem, VP_FREM, FRem) BEGIN_REGISTER_VP_INTRINSIC(vp_store, 2, 3) // chain = VP_STORE chain,val,base,offset,mask,evl BEGIN_REGISTER_VP_SDNODE(VP_STORE, 0, vp_store, 4, 5) -HANDLE_VP_TO_OPC(Store) -HANDLE_VP_TO_INTRIN(masked_store) -HANDLE_VP_IS_MEMOP(vp_store, 1, 0) +VP_PROPERTY_FUNCTIONAL_OPC(Store) +VP_PROPERTY_FUNCTIONAL_INTRINSIC(masked_store) +VP_PROPERTY_MEMOP(1, 0) END_REGISTER_VP(vp_store, VP_STORE) // llvm.vp.scatter(ptr,val,mask,vlen) BEGIN_REGISTER_VP_INTRINSIC(vp_scatter, 2, 3) // chain = VP_SCATTER chain,val,base,indices,scale,mask,evl BEGIN_REGISTER_VP_SDNODE(VP_SCATTER, -1, vp_scatter, 5, 6) -HANDLE_VP_TO_INTRIN(masked_scatter) -HANDLE_VP_IS_MEMOP(vp_scatter, 1, 0) +VP_PROPERTY_FUNCTIONAL_INTRINSIC(masked_scatter) +VP_PROPERTY_MEMOP(1, 0) END_REGISTER_VP(vp_scatter, VP_SCATTER) // llvm.vp.load(ptr,mask,vlen) BEGIN_REGISTER_VP_INTRINSIC(vp_load, 1, 2) // val,chain = VP_LOAD chain,base,offset,mask,evl BEGIN_REGISTER_VP_SDNODE(VP_LOAD, -1, vp_load, 3, 4) -HANDLE_VP_TO_OPC(Load) -HANDLE_VP_TO_INTRIN(masked_load) -HANDLE_VP_IS_MEMOP(vp_load, 0, None) +VP_PROPERTY_FUNCTIONAL_OPC(Load) +VP_PROPERTY_FUNCTIONAL_INTRINSIC(masked_load) +VP_PROPERTY_MEMOP(0, None) END_REGISTER_VP(vp_load, VP_LOAD) // llvm.vp.gather(ptr,mask,vlen) BEGIN_REGISTER_VP_INTRINSIC(vp_gather, 1, 2) // val,chain = VP_GATHER chain,base,indices,scale,mask,evl BEGIN_REGISTER_VP_SDNODE(VP_GATHER, -1, vp_gather, 4, 5) -HANDLE_VP_TO_INTRIN(masked_gather) -HANDLE_VP_IS_MEMOP(vp_gather, 0, None) +VP_PROPERTY_FUNCTIONAL_INTRINSIC(masked_gather) +VP_PROPERTY_MEMOP(0, None) END_REGISTER_VP(vp_gather, VP_GATHER) ///// } Memory Operations @@ -260,14 +254,14 @@ END_REGISTER_VP(vp_gather, VP_GATHER) // Specialized helper macro for VP reductions (%start, %x, %mask, %evl). #ifdef HELPER_REGISTER_REDUCTION_VP -#error "The internal helper macro HELPER_REGISTER_REDUCTION_VP is already defined!" +#error \ + "The internal helper macro HELPER_REGISTER_REDUCTION_VP is already defined!" #endif -#define HELPER_REGISTER_REDUCTION_VP(VPINTRIN, SDOPC, INTRIN) \ -BEGIN_REGISTER_VP(VPINTRIN, 2, 3, SDOPC, -1) \ -HANDLE_VP_TO_INTRIN(INTRIN) \ -HANDLE_VP_REDUCTION(VPINTRIN, 0, 1) \ -PROPERTY_VP_REDUCTION_SDNODE(SDOPC) \ -END_REGISTER_VP(VPINTRIN, SDOPC) +#define HELPER_REGISTER_REDUCTION_VP(VPID, VPSD, INTRIN) \ + BEGIN_REGISTER_VP(VPID, 2, 3, VPSD, -1) \ + VP_PROPERTY_FUNCTIONAL_INTRINSIC(INTRIN) \ + VP_PROPERTY_REDUCTION(0, 1) \ + END_REGISTER_VP(VPID, VPSD) // llvm.vp.reduce.add(start,x,mask,vlen) HELPER_REGISTER_REDUCTION_VP(vp_reduce_add, VP_REDUCE_ADD, @@ -320,19 +314,19 @@ HELPER_REGISTER_REDUCTION_VP(vp_reduce_fmin, VP_REDUCE_FMIN, // fast-math flags in the IR and as two distinct ISD opcodes in the // SelectionDAG. #ifdef HELPER_REGISTER_REDUCTION_SEQ_VP -#error "The internal helper macro HELPER_REGISTER_REDUCTION_SEQ_VP is already defined!" +#error \ + "The internal helper macro HELPER_REGISTER_REDUCTION_SEQ_VP is already defined!" #endif -#define HELPER_REGISTER_REDUCTION_SEQ_VP(VPINTRIN, SDOPC, SEQ_SDOPC, INTRIN) \ -BEGIN_REGISTER_VP_INTRINSIC(VPINTRIN, 2, 3) \ -BEGIN_REGISTER_VP_SDNODE(SDOPC, -1, VPINTRIN, 2, 3) \ -END_REGISTER_VP_SDNODE(SDOPC) \ -BEGIN_REGISTER_VP_SDNODE(SEQ_SDOPC, -1, VPINTRIN, 2, 3) \ -END_REGISTER_VP_SDNODE(SEQ_SDOPC) \ -HANDLE_VP_TO_INTRIN(INTRIN) \ -HANDLE_VP_REDUCTION(VPINTRIN, 0, 1) \ -PROPERTY_VP_REDUCTION_SDNODE(SDOPC) \ -PROPERTY_VP_REDUCTION_SDNODE(SEQ_SDOPC) \ -END_REGISTER_VP_INTRINSIC(VPINTRIN) +#define HELPER_REGISTER_REDUCTION_SEQ_VP(VPID, VPSD, SEQ_VPSD, INTRIN) \ + BEGIN_REGISTER_VP_INTRINSIC(VPID, 2, 3) \ + BEGIN_REGISTER_VP_SDNODE(VPSD, -1, VPID, 2, 3) \ + VP_PROPERTY_REDUCTION(0, 1) \ + END_REGISTER_VP_SDNODE(VPSD) \ + BEGIN_REGISTER_VP_SDNODE(SEQ_VPSD, -1, VPID, 2, 3) \ + VP_PROPERTY_REDUCTION(0, 1) \ + END_REGISTER_VP_SDNODE(SEQ_VPSD) \ + VP_PROPERTY_FUNCTIONAL_INTRINSIC(INTRIN) \ + END_REGISTER_VP_INTRINSIC(VPID) // llvm.vp.reduce.fadd(start,x,mask,vlen) HELPER_REGISTER_REDUCTION_SEQ_VP(vp_reduce_fadd, VP_REDUCE_FADD, @@ -356,8 +350,7 @@ BEGIN_REGISTER_VP_INTRINSIC(vp_select, 0, 3) // END_REGISTER_CASES(vp_select, VP_SELECT) END_REGISTER_VP_INTRINSIC(vp_select) -BEGIN_REGISTER_VP(experimental_vp_splice, 3, 5, - EXPERIMENTAL_VP_SPLICE, -1) +BEGIN_REGISTER_VP(experimental_vp_splice, 3, 5, EXPERIMENTAL_VP_SPLICE, -1) END_REGISTER_VP(experimental_vp_splice, EXPERIMENTAL_VP_SPLICE) ///// } Shuffles @@ -368,10 +361,9 @@ END_REGISTER_VP(experimental_vp_splice, EXPERIMENTAL_VP_SPLICE) #undef END_REGISTER_VP #undef END_REGISTER_VP_INTRINSIC #undef END_REGISTER_VP_SDNODE -#undef HANDLE_VP_TO_OPC -#undef HANDLE_VP_TO_CONSTRAINEDFP -#undef HANDLE_VP_TO_INTRIN -#undef HANDLE_VP_IS_MEMOP -#undef HANDLE_VP_REDUCTION -#undef PROPERTY_VP_BINARYOP_SDNODE -#undef PROPERTY_VP_REDUCTION_SDNODE +#undef VP_PROPERTY_BINARYOP +#undef VP_PROPERTY_CONSTRAINEDFP +#undef VP_PROPERTY_FUNCTIONAL_INTRINSIC +#undef VP_PROPERTY_FUNCTIONAL_OPC +#undef VP_PROPERTY_MEMOP +#undef VP_PROPERTY_REDUCTION diff --git a/llvm/include/llvm/IR/Verifier.h b/llvm/include/llvm/IR/Verifier.h index f4381d2ae4a9..52a4c7b4301f 100644 --- a/llvm/include/llvm/IR/Verifier.h +++ b/llvm/include/llvm/IR/Verifier.h @@ -6,9 +6,9 @@ // //===----------------------------------------------------------------------===// // -// This file defines the function verifier interface, that can be used for some -// sanity checking of input to the system, and for checking that transformations -// haven't done something bad. +// This file defines the function verifier interface, that can be used for +// validation checking of input to the system, and for checking that +// transformations haven't done something bad. // // Note that this does not provide full 'java style' security and verifications, // instead it just tries to ensure that code is well formed. diff --git a/llvm/include/llvm/MCA/HardwareUnits/Scheduler.h b/llvm/include/llvm/MCA/HardwareUnits/Scheduler.h index 2b0f391570cd..8c0ad2699b8d 100644 --- a/llvm/include/llvm/MCA/HardwareUnits/Scheduler.h +++ b/llvm/include/llvm/MCA/HardwareUnits/Scheduler.h @@ -264,9 +264,10 @@ public: // Update the ready queues. void dump() const; - // This routine performs a sanity check. This routine should only be called - // when we know that 'IR' is not in the scheduler's instruction queues. - void sanityCheck(const InstRef &IR) const { + // This routine performs a basic correctness check. This routine should only + // be called when we know that 'IR' is not in the scheduler's instruction + // queues. + void instructionCheck(const InstRef &IR) const { assert(!is_contained(WaitSet, IR) && "Already in the wait set!"); assert(!is_contained(ReadySet, IR) && "Already in the ready set!"); assert(!is_contained(IssuedSet, IR) && "Already executing!"); diff --git a/llvm/include/llvm/ObjectYAML/MachOYAML.h b/llvm/include/llvm/ObjectYAML/MachOYAML.h index ee89f4eac61f..38a7de3d6131 100644 --- a/llvm/include/llvm/ObjectYAML/MachOYAML.h +++ b/llvm/include/llvm/ObjectYAML/MachOYAML.h @@ -121,6 +121,7 @@ struct LinkEditData { MachOYAML::ExportEntry ExportTrie; std::vector<NListEntry> NameList; std::vector<StringRef> StringTable; + std::vector<yaml::Hex32> IndirectSymbols; bool isEmpty() const; }; diff --git a/llvm/include/llvm/ProfileData/MemProfData.inc b/llvm/include/llvm/ProfileData/MemProfData.inc new file mode 100644 index 000000000000..d64227e4ba31 --- /dev/null +++ b/llvm/include/llvm/ProfileData/MemProfData.inc @@ -0,0 +1,61 @@ +#ifndef MEMPROF_DATA_INC +#define MEMPROF_DATA_INC +/*===-- MemProfData.inc - MemProf profiling runtime structures -*- C++ -*-=== *\ +|* +|* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +|* See https://llvm.org/LICENSE.txt for license information. +|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +|* +\*===----------------------------------------------------------------------===*/ +/* + * This is the main file that defines all the data structure, signature, + * constant literals that are shared across profiling runtime library, + * and host tools (reader/writer). + * + * This file has two identical copies. The primary copy lives in LLVM and + * the other one sits in compiler-rt/include/profile directory. To make changes + * in this file, first modify the primary copy and copy it over to compiler-rt. + * Testing of any change in this file can start only after the two copies are + * synced up. + * +\*===----------------------------------------------------------------------===*/ + + +#ifdef _MSC_VER +#define PACKED(__decl__) __pragma(pack(push,1)) __decl__ __pragma(pack(pop)) +#else +#define PACKED(__decl__) __decl__ __attribute__((__packed__)) +#endif + +// A 64-bit magic number to uniquely identify the raw binary memprof profile file. +#define MEMPROF_RAW_MAGIC_64 \ + ((uint64_t)255 << 56 | (uint64_t)'m' << 48 | (uint64_t)'p' << 40 | (uint64_t)'r' << 32 | \ + (uint64_t)'o' << 24 | (uint64_t)'f' << 16 | (uint64_t)'r' << 8 | (uint64_t)129) + +// The version number of the raw binary format. +#define MEMPROF_RAW_VERSION 1ULL + +namespace llvm { +namespace memprof { +// A struct describing the header used for the raw binary memprof profile format. +PACKED(struct Header { + uint64_t Magic; + uint64_t Version; + uint64_t TotalSize; + uint64_t SegmentOffset; + uint64_t MIBOffset; + uint64_t StackOffset; +}); + +// A struct describing the information necessary to describe a /proc/maps +// segment entry for a particular binary/library identified by its build id. +PACKED(struct SegmentEntry { + uint64_t Start; + uint64_t End; + uint64_t Offset; + uint8_t BuildId[32]; +}); +} // namespace memprof +} // namespace llvm + +#endif diff --git a/llvm/include/llvm/ProfileData/RawMemProfReader.h b/llvm/include/llvm/ProfileData/RawMemProfReader.h new file mode 100644 index 000000000000..45544927a86f --- /dev/null +++ b/llvm/include/llvm/ProfileData/RawMemProfReader.h @@ -0,0 +1,43 @@ +#ifndef LLVM_PROFILEDATA_RAWMEMPROFREADER_H_ +#define LLVM_PROFILEDATA_RAWMEMPROFREADER_H_ +//===- MemProfReader.h - Instrumented memory profiling reader ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains support for reading MemProf profiling data. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Error.h" +#include "llvm/Support/MemoryBuffer.h" + +namespace llvm { +namespace memprof { + +class RawMemProfReader { +public: + RawMemProfReader(std::unique_ptr<MemoryBuffer> DataBuffer) + : DataBuffer(std::move(DataBuffer)) {} + // Prints aggregate counts for each raw profile parsed from the DataBuffer. + void printSummaries(raw_ostream &OS) const; + + // Return true if the \p DataBuffer starts with magic bytes indicating it is + // a raw binary memprof profile. + static bool hasFormat(const MemoryBuffer &DataBuffer); + + // Create a RawMemProfReader after sanity checking the contents of the file at + // \p Path. + static Expected<std::unique_ptr<RawMemProfReader>> create(const Twine &Path); + +private: + std::unique_ptr<MemoryBuffer> DataBuffer; +}; + +} // namespace memprof +} // namespace llvm + +#endif // LLVM_PROFILEDATA_RAWMEMPROFREADER_H_ diff --git a/llvm/include/llvm/Support/AArch64TargetParser.def b/llvm/include/llvm/Support/AArch64TargetParser.def index b3cfb71601f1..48e82fa55a0f 100644 --- a/llvm/include/llvm/Support/AArch64TargetParser.def +++ b/llvm/include/llvm/Support/AArch64TargetParser.def @@ -181,7 +181,8 @@ AARCH64_CPU_NAME("cortex-a78c", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, AARCH64_CPU_NAME("cortex-a710", ARMV9A, FK_NEON_FP_ARMV8, false, (AArch64::AEK_MTE | AArch64::AEK_PAUTH | AArch64::AEK_FLAGM | AArch64::AEK_SB | AArch64::AEK_I8MM | AArch64::AEK_FP16FML | - AArch64::AEK_SVE2BITPERM | AArch64::AEK_BF16)) + AArch64::AEK_SVE | AArch64::AEK_SVE2 | AArch64::AEK_SVE2BITPERM | + AArch64::AEK_BF16)) AARCH64_CPU_NAME("cortex-r82", ARMV8R, FK_CRYPTO_NEON_FP_ARMV8, false, (AArch64::AEK_LSE)) AARCH64_CPU_NAME("cortex-x1", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, diff --git a/llvm/include/llvm/Support/AArch64TargetParser.h b/llvm/include/llvm/Support/AArch64TargetParser.h index 131a58412db6..15bb428f19bc 100644 --- a/llvm/include/llvm/Support/AArch64TargetParser.h +++ b/llvm/include/llvm/Support/AArch64TargetParser.h @@ -137,15 +137,6 @@ void fillValidCPUArchList(SmallVectorImpl<StringRef> &Values); bool isX18ReservedByDefault(const Triple &TT); -struct ParsedBranchProtection { - StringRef Scope; - StringRef Key; - bool BranchTargetEnforcement; -}; - -bool parseBranchProtection(StringRef Spec, ParsedBranchProtection &PBP, - StringRef &Err); - } // namespace AArch64 } // namespace llvm diff --git a/llvm/include/llvm/Support/ARMAttributeParser.h b/llvm/include/llvm/Support/ARMAttributeParser.h index 5d12b7e08d58..b46a4d9f690f 100644 --- a/llvm/include/llvm/Support/ARMAttributeParser.h +++ b/llvm/include/llvm/Support/ARMAttributeParser.h @@ -67,6 +67,10 @@ class ARMAttributeParser : public ELFAttributeParser { Error DSP_extension(ARMBuildAttrs::AttrType tag); Error T2EE_use(ARMBuildAttrs::AttrType tag); Error Virtualization_use(ARMBuildAttrs::AttrType tag); + Error PAC_extension(ARMBuildAttrs::AttrType tag); + Error BTI_extension(ARMBuildAttrs::AttrType tag); + Error PACRET_use(ARMBuildAttrs::AttrType tag); + Error BTI_use(ARMBuildAttrs::AttrType tag); Error nodefaults(ARMBuildAttrs::AttrType tag); public: diff --git a/llvm/include/llvm/Support/ARMBuildAttributes.h b/llvm/include/llvm/Support/ARMBuildAttributes.h index 37c37522fd26..b4405e7d4908 100644 --- a/llvm/include/llvm/Support/ARMBuildAttributes.h +++ b/llvm/include/llvm/Support/ARMBuildAttributes.h @@ -70,9 +70,13 @@ enum AttrType : unsigned { DIV_use = 44, DSP_extension = 46, MVE_arch = 48, + PAC_extension = 50, + BTI_extension = 52, also_compatible_with = 65, conformance = 67, Virtualization_use = 68, + BTI_use = 74, + PACRET_use = 76, /// Legacy Tags Section = 2, // deprecated (ABI r2.09) @@ -237,7 +241,25 @@ enum { // Tag_Virtualization_use, (=68), uleb128 AllowTZ = 1, AllowVirtualization = 2, - AllowTZVirtualization = 3 + AllowTZVirtualization = 3, + + // Tag_PAC_extension, (=50), uleb128 + DisallowPAC = 0, + AllowPACInNOPSpace = 1, + AllowPAC = 2, + + // Tag_BTI_extension, (=52), uleb128 + DisallowBTI = 0, + AllowBTIInNOPSpace = 1, + AllowBTI = 2, + + // Tag_BTI_use, (=74), uleb128 + BTINotUsed = 0, + BTIUsed = 1, + + // Tag_PACRET_use, (=76), uleb128 + PACRETNotUsed = 0, + PACRETUsed = 1 }; } // namespace ARMBuildAttrs diff --git a/llvm/include/llvm/Support/ARMTargetParser.def b/llvm/include/llvm/Support/ARMTargetParser.def index fd08f3e6960c..7d29808f0501 100644 --- a/llvm/include/llvm/Support/ARMTargetParser.def +++ b/llvm/include/llvm/Support/ARMTargetParser.def @@ -201,6 +201,7 @@ ARM_ARCH_EXT_NAME("cdecp4", ARM::AEK_CDECP4, "+cdecp4", "-cdecp4") ARM_ARCH_EXT_NAME("cdecp5", ARM::AEK_CDECP5, "+cdecp5", "-cdecp5") ARM_ARCH_EXT_NAME("cdecp6", ARM::AEK_CDECP6, "+cdecp6", "-cdecp6") ARM_ARCH_EXT_NAME("cdecp7", ARM::AEK_CDECP7, "+cdecp7", "-cdecp7") +ARM_ARCH_EXT_NAME("pacbti", ARM::AEK_PACBTI, "+pacbti", "-pacbti") #undef ARM_ARCH_EXT_NAME #ifndef ARM_HW_DIV_NAME diff --git a/llvm/include/llvm/Support/ARMTargetParser.h b/llvm/include/llvm/Support/ARMTargetParser.h index b1ffcfb34552..b40704c24e87 100644 --- a/llvm/include/llvm/Support/ARMTargetParser.h +++ b/llvm/include/llvm/Support/ARMTargetParser.h @@ -59,7 +59,7 @@ enum ArchExtKind : uint64_t { AEK_CDECP5 = 1 << 27, AEK_CDECP6 = 1 << 28, AEK_CDECP7 = 1 << 29, - + AEK_PACBTI = 1 << 30, // Unsupported extensions. AEK_OS = 1ULL << 59, AEK_IWMMXT = 1ULL << 60, diff --git a/llvm/include/llvm/Support/GenericDomTree.h b/llvm/include/llvm/Support/GenericDomTree.h index 21fd50763b1f..f39400c26eab 100644 --- a/llvm/include/llvm/Support/GenericDomTree.h +++ b/llvm/include/llvm/Support/GenericDomTree.h @@ -528,9 +528,9 @@ protected: /// of CFG edges must not delete the CFG nodes before calling this function. /// /// The applyUpdates function can reorder the updates and remove redundant - /// ones internally. The batch updater is also able to detect sequences of - /// zero and exactly one update -- it's optimized to do less work in these - /// cases. + /// ones internally (as long as it is done in a deterministic fashion). The + /// batch updater is also able to detect sequences of zero and exactly one + /// update -- it's optimized to do less work in these cases. /// /// Note that for postdominators it automatically takes care of applying /// updates on reverse edges internally (so there's no need to swap the @@ -538,8 +538,8 @@ protected: /// The type of updates is the same for DomTreeBase<T> and PostDomTreeBase<T> /// with the same template parameter T. /// - /// \param Updates An unordered sequence of updates to perform. The current - /// CFG and the reverse of these updates provides the pre-view of the CFG. + /// \param Updates An ordered sequence of updates to perform. The current CFG + /// and the reverse of these updates provides the pre-view of the CFG. /// void applyUpdates(ArrayRef<UpdateType> Updates) { GraphDiff<NodePtr, IsPostDominator> PreViewCFG( @@ -547,9 +547,9 @@ protected: DomTreeBuilder::ApplyUpdates(*this, PreViewCFG, nullptr); } - /// \param Updates An unordered sequence of updates to perform. The current - /// CFG and the reverse of these updates provides the pre-view of the CFG. - /// \param PostViewUpdates An unordered sequence of update to perform in order + /// \param Updates An ordered sequence of updates to perform. The current CFG + /// and the reverse of these updates provides the pre-view of the CFG. + /// \param PostViewUpdates An ordered sequence of update to perform in order /// to obtain a post-view of the CFG. The DT will be updated assuming the /// obtained PostViewCFG is the desired end state. void applyUpdates(ArrayRef<UpdateType> Updates, diff --git a/llvm/include/llvm/Support/HTTPClient.h b/llvm/include/llvm/Support/HTTPClient.h new file mode 100644 index 000000000000..3172610c2d8b --- /dev/null +++ b/llvm/include/llvm/Support/HTTPClient.h @@ -0,0 +1,113 @@ +//===-- llvm/Support/HTTPClient.h - HTTP client library ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains the declarations of the HTTPClient, HTTPMethod, +/// HTTPResponseHandler, and BufferedHTTPResponseHandler classes, as well as +/// the HTTPResponseBuffer and HTTPRequest structs. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_HTTP_CLIENT_H +#define LLVM_SUPPORT_HTTP_CLIENT_H + +#include "llvm/Support/Error.h" +#include "llvm/Support/MemoryBuffer.h" + +namespace llvm { + +enum class HTTPMethod { GET }; + +/// A stateless description of an outbound HTTP request. +struct HTTPRequest { + SmallString<128> Url; + HTTPMethod Method = HTTPMethod::GET; + bool FollowRedirects = true; + HTTPRequest(StringRef Url); +}; + +bool operator==(const HTTPRequest &A, const HTTPRequest &B); + +/// A handler for state updates occurring while an HTTPRequest is performed. +/// Can trigger the client to abort the request by returning an Error from any +/// of its methods. +class HTTPResponseHandler { +public: + /// Processes one line of HTTP response headers. + virtual Error handleHeaderLine(StringRef HeaderLine) = 0; + + /// Processes an additional chunk of bytes of the HTTP response body. + virtual Error handleBodyChunk(StringRef BodyChunk) = 0; + + /// Processes the HTTP response status code. + virtual Error handleStatusCode(unsigned Code) = 0; + +protected: + ~HTTPResponseHandler(); +}; + +/// An HTTP response status code bundled with a buffer to store the body. +struct HTTPResponseBuffer { + unsigned Code = 0; + std::unique_ptr<WritableMemoryBuffer> Body; +}; + +/// A simple handler which writes returned data to an HTTPResponseBuffer. +/// Ignores all headers except the Content-Length, which it uses to +/// allocate an appropriately-sized Body buffer. +class BufferedHTTPResponseHandler final : public HTTPResponseHandler { + size_t Offset = 0; + +public: + /// Stores the data received from the HTTP server. + HTTPResponseBuffer ResponseBuffer; + + /// These callbacks store the body and status code in an HTTPResponseBuffer + /// allocated based on Content-Length. The Content-Length header must be + /// handled by handleHeaderLine before any calls to handleBodyChunk. + Error handleHeaderLine(StringRef HeaderLine) override; + Error handleBodyChunk(StringRef BodyChunk) override; + Error handleStatusCode(unsigned Code) override; +}; + +/// A reusable client that can perform HTTPRequests through a network socket. +class HTTPClient { +public: + HTTPClient(); + ~HTTPClient(); + + /// Returns true only if LLVM has been compiled with a working HTTPClient. + static bool isAvailable(); + + /// Must be called at the beginning of a program, while it is a single thread. + static void initialize(); + + /// Must be called at the end of a program, while it is a single thread. + static void cleanup(); + + /// Sets the timeout for the entire request, in milliseconds. A zero or + /// negative value means the request never times out. + void setTimeout(std::chrono::milliseconds Timeout); + + /// Performs the Request, passing response data to the Handler. Returns all + /// errors which occur during the request. Aborts if an error is returned by a + /// Handler method. + Error perform(const HTTPRequest &Request, HTTPResponseHandler &Handler); + + /// Performs the Request with the default BufferedHTTPResponseHandler, and + /// returns its HTTPResponseBuffer or an Error. + Expected<HTTPResponseBuffer> perform(const HTTPRequest &Request); + + /// Performs an HTTPRequest with the default configuration to make a GET + /// request to the given Url. Returns an HTTPResponseBuffer or an Error. + Expected<HTTPResponseBuffer> get(StringRef Url); +}; + +} // end namespace llvm + +#endif // LLVM_SUPPORT_HTTP_CLIENT_H diff --git a/llvm/include/llvm/Support/Mutex.h b/llvm/include/llvm/Support/Mutex.h index 1d8a0d3c87cb..d73bb8ef1120 100644 --- a/llvm/include/llvm/Support/Mutex.h +++ b/llvm/include/llvm/Support/Mutex.h @@ -36,7 +36,7 @@ namespace llvm return true; } else { // Single-threaded debugging code. This would be racy in - // multithreaded mode, but provides not sanity checks in single + // multithreaded mode, but provides not basic checks in single // threaded mode. ++acquired; return true; @@ -49,7 +49,7 @@ namespace llvm return true; } else { // Single-threaded debugging code. This would be racy in - // multithreaded mode, but provides not sanity checks in single + // multithreaded mode, but provides not basic checks in single // threaded mode. assert(acquired && "Lock not acquired before release!"); --acquired; diff --git a/llvm/include/llvm/Support/RWMutex.h b/llvm/include/llvm/Support/RWMutex.h index 150bc7dbbce1..33a5d3efffee 100644 --- a/llvm/include/llvm/Support/RWMutex.h +++ b/llvm/include/llvm/Support/RWMutex.h @@ -114,7 +114,7 @@ public: } // Single-threaded debugging code. This would be racy in multithreaded - // mode, but provides not sanity checks in single threaded mode. + // mode, but provides not basic checks in single threaded mode. ++readers; return true; } @@ -126,7 +126,7 @@ public: } // Single-threaded debugging code. This would be racy in multithreaded - // mode, but provides not sanity checks in single threaded mode. + // mode, but provides not basic checks in single threaded mode. assert(readers > 0 && "Reader lock not acquired before release!"); --readers; return true; @@ -139,7 +139,7 @@ public: } // Single-threaded debugging code. This would be racy in multithreaded - // mode, but provides not sanity checks in single threaded mode. + // mode, but provides not basic checks in single threaded mode. assert(writers == 0 && "Writer lock already acquired!"); ++writers; return true; @@ -152,7 +152,7 @@ public: } // Single-threaded debugging code. This would be racy in multithreaded - // mode, but provides not sanity checks in single threaded mode. + // mode, but provides not basic checks in single threaded mode. assert(writers == 1 && "Writer lock not acquired before release!"); --writers; return true; diff --git a/llvm/include/llvm/Support/TargetParser.h b/llvm/include/llvm/Support/TargetParser.h index 366dd3cf55c6..b11467dcce28 100644 --- a/llvm/include/llvm/Support/TargetParser.h +++ b/llvm/include/llvm/Support/TargetParser.h @@ -177,6 +177,18 @@ StringRef resolveTuneCPUAlias(StringRef TuneCPU, bool IsRV64); } // namespace RISCV +namespace ARM { +struct ParsedBranchProtection { + StringRef Scope; + StringRef Key; + bool BranchTargetEnforcement; +}; + +bool parseBranchProtection(StringRef Spec, ParsedBranchProtection &PBP, + StringRef &Err); + +} // namespace ARM + } // namespace llvm #endif diff --git a/llvm/include/llvm/Support/ThreadPool.h b/llvm/include/llvm/Support/ThreadPool.h index 4c41b88d6043..8d30e8e92755 100644 --- a/llvm/include/llvm/Support/ThreadPool.h +++ b/llvm/include/llvm/Support/ThreadPool.h @@ -36,9 +36,6 @@ namespace llvm { /// for some work to become available. class ThreadPool { public: - using TaskTy = std::function<void()>; - using PackagedTaskTy = std::packaged_task<void()>; - /// Construct a pool using the hardware strategy \p S for mapping hardware /// execution resources (threads, cores, CPUs) /// Defaults to using the maximum execution resources in the system, but @@ -51,17 +48,17 @@ public: /// Asynchronous submission of a task to the pool. The returned future can be /// used to wait for the task to finish and is *non-blocking* on destruction. template <typename Function, typename... Args> - inline std::shared_future<void> async(Function &&F, Args &&... ArgList) { + inline auto async(Function &&F, Args &&...ArgList) { auto Task = std::bind(std::forward<Function>(F), std::forward<Args>(ArgList)...); - return asyncImpl(std::move(Task)); + return async(std::move(Task)); } /// Asynchronous submission of a task to the pool. The returned future can be /// used to wait for the task to finish and is *non-blocking* on destruction. - template <typename Function> - inline std::shared_future<void> async(Function &&F) { - return asyncImpl(std::forward<Function>(F)); + template <typename Func> + auto async(Func &&F) -> std::shared_future<decltype(F())> { + return asyncImpl(std::function<decltype(F())()>(std::forward<Func>(F))); } /// Blocking wait for all the threads to complete and the queue to be empty. @@ -74,17 +71,70 @@ public: bool isWorkerThread() const; private: + /// Helpers to create a promise and a callable wrapper of \p Task that sets + /// the result of the promise. Returns the callable and a future to access the + /// result. + template <typename ResTy> + static std::pair<std::function<void()>, std::future<ResTy>> + createTaskAndFuture(std::function<ResTy()> Task) { + std::shared_ptr<std::promise<ResTy>> Promise = + std::make_shared<std::promise<ResTy>>(); + auto F = Promise->get_future(); + return { + [Promise = std::move(Promise), Task]() { Promise->set_value(Task()); }, + std::move(F)}; + } + static std::pair<std::function<void()>, std::future<void>> + createTaskAndFuture(std::function<void()> Task) { + std::shared_ptr<std::promise<void>> Promise = + std::make_shared<std::promise<void>>(); + auto F = Promise->get_future(); + return {[Promise = std::move(Promise), Task]() { + Task(); + Promise->set_value(); + }, + std::move(F)}; + } + bool workCompletedUnlocked() { return !ActiveThreads && Tasks.empty(); } /// Asynchronous submission of a task to the pool. The returned future can be /// used to wait for the task to finish and is *non-blocking* on destruction. - std::shared_future<void> asyncImpl(TaskTy F); + template <typename ResTy> + std::shared_future<ResTy> asyncImpl(std::function<ResTy()> Task) { + +#if LLVM_ENABLE_THREADS + /// Wrap the Task in a std::function<void()> that sets the result of the + /// corresponding future. + auto R = createTaskAndFuture(Task); + + { + // Lock the queue and push the new task + std::unique_lock<std::mutex> LockGuard(QueueLock); + + // Don't allow enqueueing after disabling the pool + assert(EnableFlag && "Queuing a thread during ThreadPool destruction"); + Tasks.push(std::move(R.first)); + } + QueueCondition.notify_one(); + return R.second.share(); + +#else // LLVM_ENABLE_THREADS Disabled + + // Get a Future with launch::deferred execution using std::async + auto Future = std::async(std::launch::deferred, std::move(Task)).share(); + // Wrap the future so that both ThreadPool::wait() can operate and the + // returned future can be sync'ed on. + Tasks.push([Future]() { Future.get(); }); + return Future; +#endif + } /// Threads in flight std::vector<llvm::thread> Threads; /// Tasks waiting for execution in the pool. - std::queue<PackagedTaskTy> Tasks; + std::queue<std::function<void()>> Tasks; /// Locking and signaling for accessing the Tasks queue. std::mutex QueueLock; diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index e2d3dbdda88a..1d189c6dea6d 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -645,6 +645,13 @@ def extract_vec_elt_combines : GICombineGroup<[ extract_vec_elt_build_vec, extract_all_elts_from_build_vector]>; +def funnel_shift_from_or_shift : GICombineRule< + (defs root:$root, build_fn_matchinfo:$info), + (match (wip_match_opcode G_OR):$root, + [{ return Helper.matchOrShiftToFunnelShift(*${root}, ${info}); }]), + (apply [{ Helper.applyBuildFn(*${root}, ${info}); }]) +>; + def funnel_shift_to_rotate : GICombineRule< (defs root:$root), (match (wip_match_opcode G_FSHL, G_FSHR):$root, @@ -683,7 +690,8 @@ def bitfield_extract_from_and : GICombineRule< [{ return Helper.matchBitfieldExtractFromAnd(*${root}, ${info}); }]), (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>; -def funnel_shift_combines : GICombineGroup<[funnel_shift_to_rotate]>; +def funnel_shift_combines : GICombineGroup<[funnel_shift_from_or_shift, + funnel_shift_to_rotate]>; def bitfield_extract_from_sext_inreg : GICombineRule< (defs root:$root, build_fn_matchinfo:$info), @@ -751,6 +759,84 @@ def redundant_neg_operands: GICombineRule< [{ return Helper.matchRedundantNegOperands(*${root}, ${matchinfo}); }]), (apply [{ Helper.applyBuildFnNoErase(*${root}, ${matchinfo}); }])>; +// Transform (fadd x, (fmul y, z)) -> (fma y, z, x) +// (fadd x, (fmul y, z)) -> (fmad y, z, x) +// Transform (fadd (fmul x, y), z) -> (fma x, y, z) +// (fadd (fmul x, y), z) -> (fmad x, y, z) +def combine_fadd_fmul_to_fmad_or_fma: GICombineRule< + (defs root:$root, build_fn_matchinfo:$info), + (match (wip_match_opcode G_FADD):$root, + [{ return Helper.matchCombineFAddFMulToFMadOrFMA(*${root}, + ${info}); }]), + (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>; + +// Transform (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z) +// -> (fmad (fpext x), (fpext y), z) +// Transform (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x) +// -> (fmad (fpext y), (fpext z), x) +def combine_fadd_fpext_fmul_to_fmad_or_fma: GICombineRule< + (defs root:$root, build_fn_matchinfo:$info), + (match (wip_match_opcode G_FADD):$root, + [{ return Helper.matchCombineFAddFpExtFMulToFMadOrFMA(*${root}, + ${info}); }]), + (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>; + +// Transform (fadd (fma x, y, (fmul z, u)), v) -> (fma x, y, (fma z, u, v)) +// (fadd (fmad x, y, (fmul z, u)), v) -> (fmad x, y, (fmad z, u, v)) +// Transform (fadd v, (fma x, y, (fmul z, u))) -> (fma x, y, (fma z, u, v)) +// (fadd v, (fmad x, y, (fmul z, u))) -> (fmad x, y, (fmad z, u, v)) +def combine_fadd_fma_fmul_to_fmad_or_fma: GICombineRule< + (defs root:$root, build_fn_matchinfo:$info), + (match (wip_match_opcode G_FADD):$root, + [{ return Helper.matchCombineFAddFMAFMulToFMadOrFMA(*${root}, + ${info}); }]), + (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>; + +// Transform (fadd (fma x, y, (fpext (fmul u, v))), z) -> +// (fma x, y, (fma (fpext u), (fpext v), z)) +def combine_fadd_fpext_fma_fmul_to_fmad_or_fma: GICombineRule< + (defs root:$root, build_fn_matchinfo:$info), + (match (wip_match_opcode G_FADD):$root, + [{ return Helper.matchCombineFAddFpExtFMulToFMadOrFMAAggressive( + *${root}, ${info}); }]), + (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>; + +// Transform (fsub (fmul x, y), z) -> (fma x, y, -z) +// -> (fmad x, y, -z) +def combine_fsub_fmul_to_fmad_or_fma: GICombineRule< + (defs root:$root, build_fn_matchinfo:$info), + (match (wip_match_opcode G_FSUB):$root, + [{ return Helper.matchCombineFSubFMulToFMadOrFMA(*${root}, + ${info}); }]), + (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>; + +// Transform (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) +// (fsub x, (fneg (fmul, y, z))) -> (fma y, z, x) +def combine_fsub_fneg_fmul_to_fmad_or_fma: GICombineRule< + (defs root:$root, build_fn_matchinfo:$info), + (match (wip_match_opcode G_FSUB):$root, + [{ return Helper.matchCombineFSubFNegFMulToFMadOrFMA(*${root}, + ${info}); }]), + (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>; + +// Transform (fsub (fpext (fmul x, y)), z) -> +// (fma (fpext x), (fpext y), (fneg z)) +def combine_fsub_fpext_fmul_to_fmad_or_fma: GICombineRule< + (defs root:$root, build_fn_matchinfo:$info), + (match (wip_match_opcode G_FSUB):$root, + [{ return Helper.matchCombineFSubFpExtFMulToFMadOrFMA(*${root}, + ${info}); }]), + (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>; + +// Transform (fsub (fneg (fpext (fmul x, y))), z) -> +// (fneg (fma (fpext x), (fpext y), z)) +def combine_fsub_fpext_fneg_fmul_to_fmad_or_fma: GICombineRule< + (defs root:$root, build_fn_matchinfo:$info), + (match (wip_match_opcode G_FSUB):$root, + [{ return Helper.matchCombineFSubFpExtFNegFMulToFMadOrFMA( + *${root}, ${info}); }]), + (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>; + // FIXME: These should use the custom predicate feature once it lands. def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero, undef_to_negative_one, @@ -783,6 +869,12 @@ def select_combines : GICombineGroup<[select_undef_cmp, select_constant_cmp]>; def trivial_combines : GICombineGroup<[copy_prop, mul_to_shl, add_p2i_to_ptradd, mul_by_neg_one]>; +def fma_combines : GICombineGroup<[combine_fadd_fmul_to_fmad_or_fma, + combine_fadd_fpext_fmul_to_fmad_or_fma, combine_fadd_fma_fmul_to_fmad_or_fma, + combine_fadd_fpext_fma_fmul_to_fmad_or_fma, combine_fsub_fmul_to_fmad_or_fma, + combine_fsub_fneg_fmul_to_fmad_or_fma, combine_fsub_fpext_fmul_to_fmad_or_fma, + combine_fsub_fpext_fneg_fmul_to_fmad_or_fma]>; + def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines, extract_vec_elt_combines, combines_for_extload, combine_indexed_load_store, undef_combines, identity_combines, phi_combines, @@ -799,7 +891,7 @@ def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines, truncstore_merge, div_rem_to_divrem, funnel_shift_combines, form_bitfield_extract, constant_fold, fabs_fneg_fold, intdiv_combines, mulh_combines, redundant_neg_operands, - and_or_disjoint_mask ]>; + and_or_disjoint_mask, fma_combines]>; // A combine group used to for prelegalizer combiners at -O0. The combines in // this group have been selected based on experiments to balance code size and diff --git a/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h b/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h index 6e45f8f6fb05..429fcbd81b45 100644 --- a/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h +++ b/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h @@ -24,22 +24,47 @@ using namespace sampleprof; namespace llvm { namespace sampleprof { +struct ProfiledCallGraphNode; + +struct ProfiledCallGraphEdge { + ProfiledCallGraphEdge(ProfiledCallGraphNode *Source, + ProfiledCallGraphNode *Target, uint64_t Weight) + : Source(Source), Target(Target), Weight(Weight) {} + ProfiledCallGraphNode *Source; + ProfiledCallGraphNode *Target; + uint64_t Weight; + + // The call destination is the only important data here, + // allow to transparently unwrap into it. + operator ProfiledCallGraphNode *() const { return Target; } +}; + struct ProfiledCallGraphNode { - ProfiledCallGraphNode(StringRef FName = StringRef()) : Name(FName) {} - StringRef Name; - struct ProfiledCallGraphNodeComparer { - bool operator()(const ProfiledCallGraphNode *L, - const ProfiledCallGraphNode *R) const { - return L->Name < R->Name; + // Sort edges by callee names only since all edges to be compared are from + // same caller. Edge weights are not considered either because for the same + // callee only the edge with the largest weight is added to the edge set. + struct ProfiledCallGraphEdgeComparer { + bool operator()(const ProfiledCallGraphEdge &L, + const ProfiledCallGraphEdge &R) const { + return L.Target->Name < R.Target->Name; } }; - std::set<ProfiledCallGraphNode *, ProfiledCallGraphNodeComparer> Callees; + + using iterator = std::set<ProfiledCallGraphEdge>::iterator; + using const_iterator = std::set<ProfiledCallGraphEdge>::const_iterator; + using edge = ProfiledCallGraphEdge; + using edges = std::set<ProfiledCallGraphEdge, ProfiledCallGraphEdgeComparer>; + + ProfiledCallGraphNode(StringRef FName = StringRef()) : Name(FName) {} + + StringRef Name; + edges Edges; }; class ProfiledCallGraph { public: - using iterator = std::set<ProfiledCallGraphNode *>::iterator; + using iterator = std::set<ProfiledCallGraphEdge>::iterator; // Constructor for non-CS profile. ProfiledCallGraph(SampleProfileMap &ProfileMap) { @@ -63,8 +88,9 @@ public: while (!Queue.empty()) { ContextTrieNode *Caller = Queue.front(); Queue.pop(); - // Add calls for context. When AddNodeWithSamplesOnly is true, both caller - // and callee need to have context profile. + FunctionSamples *CallerSamples = Caller->getFunctionSamples(); + + // Add calls for context. // Note that callsite target samples are completely ignored since they can // conflict with the context edges, which are formed by context // compression during profile generation, for cyclic SCCs. This may @@ -74,31 +100,61 @@ public: ContextTrieNode *Callee = &Child.second; addProfiledFunction(ContextTracker.getFuncNameFor(Callee)); Queue.push(Callee); + + // Fetch edge weight from the profile. + uint64_t Weight; + FunctionSamples *CalleeSamples = Callee->getFunctionSamples(); + if (!CalleeSamples || !CallerSamples) { + Weight = 0; + } else { + uint64_t CalleeEntryCount = CalleeSamples->getEntrySamples(); + uint64_t CallsiteCount = 0; + LineLocation Callsite = Callee->getCallSiteLoc(); + if (auto CallTargets = CallerSamples->findCallTargetMapAt(Callsite)) { + SampleRecord::CallTargetMap &TargetCounts = CallTargets.get(); + auto It = TargetCounts.find(CalleeSamples->getName()); + if (It != TargetCounts.end()) + CallsiteCount = It->second; + } + Weight = std::max(CallsiteCount, CalleeEntryCount); + } + addProfiledCall(ContextTracker.getFuncNameFor(Caller), - ContextTracker.getFuncNameFor(Callee)); + ContextTracker.getFuncNameFor(Callee), Weight); } } } - iterator begin() { return Root.Callees.begin(); } - iterator end() { return Root.Callees.end(); } + iterator begin() { return Root.Edges.begin(); } + iterator end() { return Root.Edges.end(); } ProfiledCallGraphNode *getEntryNode() { return &Root; } void addProfiledFunction(StringRef Name) { if (!ProfiledFunctions.count(Name)) { // Link to synthetic root to make sure every node is reachable // from root. This does not affect SCC order. ProfiledFunctions[Name] = ProfiledCallGraphNode(Name); - Root.Callees.insert(&ProfiledFunctions[Name]); + Root.Edges.emplace(&Root, &ProfiledFunctions[Name], 0); } } - void addProfiledCall(StringRef CallerName, StringRef CalleeName) { +private: + void addProfiledCall(StringRef CallerName, StringRef CalleeName, + uint64_t Weight = 0) { assert(ProfiledFunctions.count(CallerName)); auto CalleeIt = ProfiledFunctions.find(CalleeName); - if (CalleeIt == ProfiledFunctions.end()) { + if (CalleeIt == ProfiledFunctions.end()) return; + ProfiledCallGraphEdge Edge(&ProfiledFunctions[CallerName], + &CalleeIt->second, Weight); + auto &Edges = ProfiledFunctions[CallerName].Edges; + auto EdgeIt = Edges.find(Edge); + if (EdgeIt == Edges.end()) { + Edges.insert(Edge); + } else if (EdgeIt->Weight < Edge.Weight) { + // Replace existing call edges with same target but smaller weight. + Edges.erase(EdgeIt); + Edges.insert(Edge); } - ProfiledFunctions[CallerName].Callees.insert(&CalleeIt->second); } void addProfiledCalls(const FunctionSamples &Samples) { @@ -107,20 +163,20 @@ public: for (const auto &Sample : Samples.getBodySamples()) { for (const auto &Target : Sample.second.getCallTargets()) { addProfiledFunction(Target.first()); - addProfiledCall(Samples.getFuncName(), Target.first()); + addProfiledCall(Samples.getFuncName(), Target.first(), Target.second); } } for (const auto &CallsiteSamples : Samples.getCallsiteSamples()) { for (const auto &InlinedSamples : CallsiteSamples.second) { addProfiledFunction(InlinedSamples.first); - addProfiledCall(Samples.getFuncName(), InlinedSamples.first); + addProfiledCall(Samples.getFuncName(), InlinedSamples.first, + InlinedSamples.second.getEntrySamples()); addProfiledCalls(InlinedSamples.second); } } } -private: ProfiledCallGraphNode Root; StringMap<ProfiledCallGraphNode> ProfiledFunctions; }; @@ -128,12 +184,14 @@ private: } // end namespace sampleprof template <> struct GraphTraits<ProfiledCallGraphNode *> { + using NodeType = ProfiledCallGraphNode; using NodeRef = ProfiledCallGraphNode *; - using ChildIteratorType = std::set<ProfiledCallGraphNode *>::iterator; + using EdgeType = NodeType::edge; + using ChildIteratorType = NodeType::const_iterator; static NodeRef getEntryNode(NodeRef PCGN) { return PCGN; } - static ChildIteratorType child_begin(NodeRef N) { return N->Callees.begin(); } - static ChildIteratorType child_end(NodeRef N) { return N->Callees.end(); } + static ChildIteratorType child_begin(NodeRef N) { return N->Edges.begin(); } + static ChildIteratorType child_end(NodeRef N) { return N->Edges.end(); } }; template <> diff --git a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h index c13407a44091..6002f0270083 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h +++ b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h @@ -141,7 +141,7 @@ private: AsanDtorKind DestructorKind; }; -// Insert AddressSanitizer (address sanity checking) instrumentation +// Insert AddressSanitizer (address basic correctness checking) instrumentation FunctionPass *createAddressSanitizerFunctionPass( bool CompileKernel = false, bool Recover = false, bool UseAfterScope = false, diff --git a/llvm/include/llvm/Transforms/Scalar/AnnotationRemarks.h b/llvm/include/llvm/Transforms/Scalar/AnnotationRemarks.h index d76b55babc74..45983ad9d571 100644 --- a/llvm/include/llvm/Transforms/Scalar/AnnotationRemarks.h +++ b/llvm/include/llvm/Transforms/Scalar/AnnotationRemarks.h @@ -22,6 +22,7 @@ class Function; struct AnnotationRemarksPass : public PassInfoMixin<AnnotationRemarksPass> { PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + static bool isRequired() { return true; } }; } // namespace llvm diff --git a/llvm/include/llvm/Transforms/Utils/Local.h b/llvm/include/llvm/Transforms/Utils/Local.h index 72cb606eb51a..3c529abce85a 100644 --- a/llvm/include/llvm/Transforms/Utils/Local.h +++ b/llvm/include/llvm/Transforms/Utils/Local.h @@ -55,7 +55,6 @@ class MDNode; class MemorySSAUpdater; class PHINode; class StoreInst; -class SwitchInst; class TargetLibraryInfo; class TargetTransformInfo; @@ -238,10 +237,6 @@ CallInst *createCallMatchingInvoke(InvokeInst *II); /// This function converts the specified invoek into a normall call. void changeToCall(InvokeInst *II, DomTreeUpdater *DTU = nullptr); -/// This function removes the default destination from the specified switch. -void createUnreachableSwitchDefault(SwitchInst *Switch, - DomTreeUpdater *DTU = nullptr); - ///===---------------------------------------------------------------------===// /// Dbg Intrinsic utilities /// diff --git a/llvm/include/llvm/Transforms/Utils/SSAUpdater.h b/llvm/include/llvm/Transforms/Utils/SSAUpdater.h index 22b2295cc9d7..c233e3dc168e 100644 --- a/llvm/include/llvm/Transforms/Utils/SSAUpdater.h +++ b/llvm/include/llvm/Transforms/Utils/SSAUpdater.h @@ -169,6 +169,10 @@ public: /// Called to update debug info associated with the instruction. virtual void updateDebugInfo(Instruction *I) const {} + + /// Return false if a sub-class wants to keep one of the loads/stores + /// after the SSA construction. + virtual bool shouldDelete(Instruction *I) const { return true; } }; } // end namespace llvm diff --git a/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h b/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h new file mode 100644 index 000000000000..e1f681bbd367 --- /dev/null +++ b/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h @@ -0,0 +1,284 @@ +//===- Transforms/Utils/SampleProfileInference.h ----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This file provides the interface for the profile inference algorithm, profi. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_UTILS_SAMPLEPROFILEINFERENCE_H +#define LLVM_TRANSFORMS_UTILS_SAMPLEPROFILEINFERENCE_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SmallVector.h" + +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" + +namespace llvm { + +class BasicBlock; +class Function; +class MachineBasicBlock; +class MachineFunction; + +namespace afdo_detail { + +template <class BlockT> struct TypeMap {}; +template <> struct TypeMap<BasicBlock> { + using BasicBlockT = BasicBlock; + using FunctionT = Function; +}; +template <> struct TypeMap<MachineBasicBlock> { + using BasicBlockT = MachineBasicBlock; + using FunctionT = MachineFunction; +}; + +} // end namespace afdo_detail + +struct FlowJump; + +/// A wrapper of a binary basic block. +struct FlowBlock { + uint64_t Index; + uint64_t Weight{0}; + bool UnknownWeight{false}; + uint64_t Flow{0}; + bool HasSelfEdge{false}; + std::vector<FlowJump *> SuccJumps; + std::vector<FlowJump *> PredJumps; + + /// Check if it is the entry block in the function. + bool isEntry() const { return PredJumps.empty(); } + + /// Check if it is an exit block in the function. + bool isExit() const { return SuccJumps.empty(); } +}; + +/// A wrapper of a jump between two basic blocks. +struct FlowJump { + uint64_t Source; + uint64_t Target; + uint64_t Flow{0}; + bool IsUnlikely{false}; +}; + +/// A wrapper of binary function with basic blocks and jumps. +struct FlowFunction { + std::vector<FlowBlock> Blocks; + std::vector<FlowJump> Jumps; + /// The index of the entry block. + uint64_t Entry; +}; + +void applyFlowInference(FlowFunction &Func); + +/// Sample profile inference pass. +template <typename BT> class SampleProfileInference { +public: + using BasicBlockT = typename afdo_detail::TypeMap<BT>::BasicBlockT; + using FunctionT = typename afdo_detail::TypeMap<BT>::FunctionT; + using Edge = std::pair<const BasicBlockT *, const BasicBlockT *>; + using BlockWeightMap = DenseMap<const BasicBlockT *, uint64_t>; + using EdgeWeightMap = DenseMap<Edge, uint64_t>; + using BlockEdgeMap = + DenseMap<const BasicBlockT *, SmallVector<const BasicBlockT *, 8>>; + + SampleProfileInference(FunctionT &F, BlockEdgeMap &Successors, + BlockWeightMap &SampleBlockWeights) + : F(F), Successors(Successors), SampleBlockWeights(SampleBlockWeights) {} + + /// Apply the profile inference algorithm for a given function + void apply(BlockWeightMap &BlockWeights, EdgeWeightMap &EdgeWeights); + +private: + /// Try to infer branch probabilities mimicking implementation of + /// BranchProbabilityInfo. Unlikely taken branches are marked so that the + /// inference algorithm can avoid sending flow along corresponding edges. + void findUnlikelyJumps(const std::vector<const BasicBlockT *> &BasicBlocks, + BlockEdgeMap &Successors, FlowFunction &Func); + + /// Determine whether the block is an exit in the CFG. + bool isExit(const BasicBlockT *BB); + + /// Function. + const FunctionT &F; + + /// Successors for each basic block in the CFG. + BlockEdgeMap &Successors; + + /// Map basic blocks to their sampled weights. + BlockWeightMap &SampleBlockWeights; +}; + +template <typename BT> +void SampleProfileInference<BT>::apply(BlockWeightMap &BlockWeights, + EdgeWeightMap &EdgeWeights) { + // Find all forwards reachable blocks which the inference algorithm will be + // applied on. + df_iterator_default_set<const BasicBlockT *> Reachable; + for (auto *BB : depth_first_ext(&F, Reachable)) + (void)BB /* Mark all reachable blocks */; + + // Find all backwards reachable blocks which the inference algorithm will be + // applied on. + df_iterator_default_set<const BasicBlockT *> InverseReachable; + for (const auto &BB : F) { + // An exit block is a block without any successors. + if (isExit(&BB)) { + for (auto *RBB : inverse_depth_first_ext(&BB, InverseReachable)) + (void)RBB; + } + } + + // Keep a stable order for reachable blocks + DenseMap<const BasicBlockT *, uint64_t> BlockIndex; + std::vector<const BasicBlockT *> BasicBlocks; + BlockIndex.reserve(Reachable.size()); + BasicBlocks.reserve(Reachable.size()); + for (const auto &BB : F) { + if (Reachable.count(&BB) && InverseReachable.count(&BB)) { + BlockIndex[&BB] = BasicBlocks.size(); + BasicBlocks.push_back(&BB); + } + } + + BlockWeights.clear(); + EdgeWeights.clear(); + bool HasSamples = false; + for (const auto *BB : BasicBlocks) { + auto It = SampleBlockWeights.find(BB); + if (It != SampleBlockWeights.end() && It->second > 0) { + HasSamples = true; + BlockWeights[BB] = It->second; + } + } + // Quit early for functions with a single block or ones w/o samples + if (BasicBlocks.size() <= 1 || !HasSamples) { + return; + } + + // Create necessary objects + FlowFunction Func; + Func.Blocks.reserve(BasicBlocks.size()); + // Create FlowBlocks + for (const auto *BB : BasicBlocks) { + FlowBlock Block; + if (SampleBlockWeights.find(BB) != SampleBlockWeights.end()) { + Block.UnknownWeight = false; + Block.Weight = SampleBlockWeights[BB]; + } else { + Block.UnknownWeight = true; + Block.Weight = 0; + } + Block.Index = Func.Blocks.size(); + Func.Blocks.push_back(Block); + } + // Create FlowEdges + for (const auto *BB : BasicBlocks) { + for (auto *Succ : Successors[BB]) { + if (!BlockIndex.count(Succ)) + continue; + FlowJump Jump; + Jump.Source = BlockIndex[BB]; + Jump.Target = BlockIndex[Succ]; + Func.Jumps.push_back(Jump); + if (BB == Succ) { + Func.Blocks[BlockIndex[BB]].HasSelfEdge = true; + } + } + } + for (auto &Jump : Func.Jumps) { + Func.Blocks[Jump.Source].SuccJumps.push_back(&Jump); + Func.Blocks[Jump.Target].PredJumps.push_back(&Jump); + } + + // Try to infer probabilities of jumps based on the content of basic block + findUnlikelyJumps(BasicBlocks, Successors, Func); + + // Find the entry block + for (size_t I = 0; I < Func.Blocks.size(); I++) { + if (Func.Blocks[I].isEntry()) { + Func.Entry = I; + break; + } + } + + // Create and apply the inference network model. + applyFlowInference(Func); + + // Extract the resulting weights from the control flow + // All weights are increased by one to avoid propagation errors introduced by + // zero weights. + for (const auto *BB : BasicBlocks) { + BlockWeights[BB] = Func.Blocks[BlockIndex[BB]].Flow; + } + for (auto &Jump : Func.Jumps) { + Edge E = std::make_pair(BasicBlocks[Jump.Source], BasicBlocks[Jump.Target]); + EdgeWeights[E] = Jump.Flow; + } + +#ifndef NDEBUG + // Unreachable blocks and edges should not have a weight. + for (auto &I : BlockWeights) { + assert(Reachable.contains(I.first)); + assert(InverseReachable.contains(I.first)); + } + for (auto &I : EdgeWeights) { + assert(Reachable.contains(I.first.first) && + Reachable.contains(I.first.second)); + assert(InverseReachable.contains(I.first.first) && + InverseReachable.contains(I.first.second)); + } +#endif +} + +template <typename BT> +inline void SampleProfileInference<BT>::findUnlikelyJumps( + const std::vector<const BasicBlockT *> &BasicBlocks, + BlockEdgeMap &Successors, FlowFunction &Func) {} + +template <> +inline void SampleProfileInference<BasicBlock>::findUnlikelyJumps( + const std::vector<const BasicBlockT *> &BasicBlocks, + BlockEdgeMap &Successors, FlowFunction &Func) { + for (auto &Jump : Func.Jumps) { + const auto *BB = BasicBlocks[Jump.Source]; + const auto *Succ = BasicBlocks[Jump.Target]; + const Instruction *TI = BB->getTerminator(); + // Check if a block ends with InvokeInst and mark non-taken branch unlikely. + // In that case block Succ should be a landing pad + if (Successors[BB].size() == 2 && Successors[BB].back() == Succ) { + if (isa<InvokeInst>(TI)) { + Jump.IsUnlikely = true; + } + } + const Instruction *SuccTI = Succ->getTerminator(); + // Check if the target block contains UnreachableInst and mark it unlikely + if (SuccTI->getNumSuccessors() == 0) { + if (isa<UnreachableInst>(SuccTI)) { + Jump.IsUnlikely = true; + } + } + } +} + +template <typename BT> +inline bool SampleProfileInference<BT>::isExit(const BasicBlockT *BB) { + return BB->succ_empty(); +} + +template <> +inline bool SampleProfileInference<BasicBlock>::isExit(const BasicBlock *BB) { + return succ_empty(BB); +} + +} // end namespace llvm +#endif // LLVM_TRANSFORMS_UTILS_SAMPLEPROFILEINFERENCE_H diff --git a/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h b/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h index 6a2f0acf46f3..175bdde7fd05 100644 --- a/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h +++ b/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h @@ -38,6 +38,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/GenericDomTree.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/SampleProfileInference.h" #include "llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h" namespace llvm { @@ -74,6 +75,8 @@ template <> struct IRTraits<BasicBlock> { } // end namespace afdo_detail +extern cl::opt<bool> SampleProfileUseProfi; + template <typename BT> class SampleProfileLoaderBaseImpl { public: SampleProfileLoaderBaseImpl(std::string Name, std::string RemapName) @@ -142,6 +145,9 @@ protected: ArrayRef<BasicBlockT *> Descendants, PostDominatorTreeT *DomTree); void propagateWeights(FunctionT &F); + void applyProfi(FunctionT &F, BlockEdgeMap &Successors, + BlockWeightMap &SampleBlockWeights, + BlockWeightMap &BlockWeights, EdgeWeightMap &EdgeWeights); uint64_t visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge); void buildEdges(FunctionT &F); bool propagateThroughEdges(FunctionT &F, bool UpdateBlockCount); @@ -150,6 +156,11 @@ protected: bool computeAndPropagateWeights(FunctionT &F, const DenseSet<GlobalValue::GUID> &InlinedGUIDs); + void initWeightPropagation(FunctionT &F, + const DenseSet<GlobalValue::GUID> &InlinedGUIDs); + void + finalizeWeightPropagation(FunctionT &F, + const DenseSet<GlobalValue::GUID> &InlinedGUIDs); void emitCoverageRemarks(FunctionT &F); /// Map basic blocks to their computed weights. @@ -741,50 +752,65 @@ void SampleProfileLoaderBaseImpl<BT>::buildEdges(FunctionT &F) { /// known). template <typename BT> void SampleProfileLoaderBaseImpl<BT>::propagateWeights(FunctionT &F) { - bool Changed = true; - unsigned I = 0; - - // If BB weight is larger than its corresponding loop's header BB weight, - // use the BB weight to replace the loop header BB weight. - for (auto &BI : F) { - BasicBlockT *BB = &BI; - LoopT *L = LI->getLoopFor(BB); - if (!L) { - continue; + // Flow-based profile inference is only usable with BasicBlock instantiation + // of SampleProfileLoaderBaseImpl. + if (SampleProfileUseProfi) { + // Prepare block sample counts for inference. + BlockWeightMap SampleBlockWeights; + for (const auto &BI : F) { + ErrorOr<uint64_t> Weight = getBlockWeight(&BI); + if (Weight) + SampleBlockWeights[&BI] = Weight.get(); } - BasicBlockT *Header = L->getHeader(); - if (Header && BlockWeights[BB] > BlockWeights[Header]) { - BlockWeights[Header] = BlockWeights[BB]; + // Fill in BlockWeights and EdgeWeights using an inference algorithm. + applyProfi(F, Successors, SampleBlockWeights, BlockWeights, EdgeWeights); + } else { + bool Changed = true; + unsigned I = 0; + + // If BB weight is larger than its corresponding loop's header BB weight, + // use the BB weight to replace the loop header BB weight. + for (auto &BI : F) { + BasicBlockT *BB = &BI; + LoopT *L = LI->getLoopFor(BB); + if (!L) { + continue; + } + BasicBlockT *Header = L->getHeader(); + if (Header && BlockWeights[BB] > BlockWeights[Header]) { + BlockWeights[Header] = BlockWeights[BB]; + } } - } - // Before propagation starts, build, for each block, a list of - // unique predecessors and successors. This is necessary to handle - // identical edges in multiway branches. Since we visit all blocks and all - // edges of the CFG, it is cleaner to build these lists once at the start - // of the pass. - buildEdges(F); + // Propagate until we converge or we go past the iteration limit. + while (Changed && I++ < SampleProfileMaxPropagateIterations) { + Changed = propagateThroughEdges(F, false); + } - // Propagate until we converge or we go past the iteration limit. - while (Changed && I++ < SampleProfileMaxPropagateIterations) { - Changed = propagateThroughEdges(F, false); - } + // The first propagation propagates BB counts from annotated BBs to unknown + // BBs. The 2nd propagation pass resets edges weights, and use all BB + // weights to propagate edge weights. + VisitedEdges.clear(); + Changed = true; + while (Changed && I++ < SampleProfileMaxPropagateIterations) { + Changed = propagateThroughEdges(F, false); + } - // The first propagation propagates BB counts from annotated BBs to unknown - // BBs. The 2nd propagation pass resets edges weights, and use all BB weights - // to propagate edge weights. - VisitedEdges.clear(); - Changed = true; - while (Changed && I++ < SampleProfileMaxPropagateIterations) { - Changed = propagateThroughEdges(F, false); + // The 3rd propagation pass allows adjust annotated BB weights that are + // obviously wrong. + Changed = true; + while (Changed && I++ < SampleProfileMaxPropagateIterations) { + Changed = propagateThroughEdges(F, true); + } } +} - // The 3rd propagation pass allows adjust annotated BB weights that are - // obviously wrong. - Changed = true; - while (Changed && I++ < SampleProfileMaxPropagateIterations) { - Changed = propagateThroughEdges(F, true); - } +template <typename BT> +void SampleProfileLoaderBaseImpl<BT>::applyProfi( + FunctionT &F, BlockEdgeMap &Successors, BlockWeightMap &SampleBlockWeights, + BlockWeightMap &BlockWeights, EdgeWeightMap &EdgeWeights) { + auto Infer = SampleProfileInference<BT>(F, Successors, SampleBlockWeights); + Infer.apply(BlockWeights, EdgeWeights); } /// Generate branch weight metadata for all branches in \p F. @@ -842,26 +868,64 @@ bool SampleProfileLoaderBaseImpl<BT>::computeAndPropagateWeights( Changed |= computeBlockWeights(F); if (Changed) { - // Add an entry count to the function using the samples gathered at the - // function entry. - // Sets the GUIDs that are inlined in the profiled binary. This is used - // for ThinLink to make correct liveness analysis, and also make the IR - // match the profiled binary before annotation. - getFunction(F).setEntryCount( - ProfileCount(Samples->getHeadSamples() + 1, Function::PCT_Real), - &InlinedGUIDs); + // Initialize propagation. + initWeightPropagation(F, InlinedGUIDs); + // Propagate weights to all edges. + propagateWeights(F); + + // Post-process propagated weights. + finalizeWeightPropagation(F, InlinedGUIDs); + } + + return Changed; +} + +template <typename BT> +void SampleProfileLoaderBaseImpl<BT>::initWeightPropagation( + FunctionT &F, const DenseSet<GlobalValue::GUID> &InlinedGUIDs) { + // Add an entry count to the function using the samples gathered at the + // function entry. + // Sets the GUIDs that are inlined in the profiled binary. This is used + // for ThinLink to make correct liveness analysis, and also make the IR + // match the profiled binary before annotation. + getFunction(F).setEntryCount( + ProfileCount(Samples->getHeadSamples() + 1, Function::PCT_Real), + &InlinedGUIDs); + + if (!SampleProfileUseProfi) { // Compute dominance and loop info needed for propagation. computeDominanceAndLoopInfo(F); // Find equivalence classes. findEquivalenceClasses(F); - - // Propagate weights to all edges. - propagateWeights(F); } - return Changed; + // Before propagation starts, build, for each block, a list of + // unique predecessors and successors. This is necessary to handle + // identical edges in multiway branches. Since we visit all blocks and all + // edges of the CFG, it is cleaner to build these lists once at the start + // of the pass. + buildEdges(F); +} + +template <typename BT> +void SampleProfileLoaderBaseImpl<BT>::finalizeWeightPropagation( + FunctionT &F, const DenseSet<GlobalValue::GUID> &InlinedGUIDs) { + // If we utilize a flow-based count inference, then we trust the computed + // counts and set the entry count as computed by the algorithm. This is + // primarily done to sync the counts produced by profi and BFI inference, + // which uses the entry count for mass propagation. + // If profi produces a zero-value for the entry count, we fallback to + // Samples->getHeadSamples() + 1 to avoid functions with zero count. + if (SampleProfileUseProfi) { + const BasicBlockT *EntryBB = getEntryBB(&F); + if (BlockWeights[EntryBB] > 0) { + getFunction(F).setEntryCount( + ProfileCount(BlockWeights[EntryBB], Function::PCT_Real), + &InlinedGUIDs); + } + } } template <typename BT> diff --git a/llvm/lib/Analysis/BranchProbabilityInfo.cpp b/llvm/lib/Analysis/BranchProbabilityInfo.cpp index 33fdc8b628c5..856d7e90acb2 100644 --- a/llvm/lib/Analysis/BranchProbabilityInfo.cpp +++ b/llvm/lib/Analysis/BranchProbabilityInfo.cpp @@ -104,12 +104,67 @@ static const uint32_t LBH_NONTAKEN_WEIGHT = 4; /// All reachable probability will proportionally share the remaining part. static const BranchProbability UR_TAKEN_PROB = BranchProbability::getRaw(1); +/// Heuristics and lookup tables for non-loop branches: +/// Pointer Heuristics (PH) static const uint32_t PH_TAKEN_WEIGHT = 20; static const uint32_t PH_NONTAKEN_WEIGHT = 12; +static const BranchProbability + PtrTakenProb(PH_TAKEN_WEIGHT, PH_TAKEN_WEIGHT + PH_NONTAKEN_WEIGHT); +static const BranchProbability + PtrUntakenProb(PH_NONTAKEN_WEIGHT, PH_TAKEN_WEIGHT + PH_NONTAKEN_WEIGHT); + +using ProbabilityList = SmallVector<BranchProbability>; +using ProbabilityTable = std::map<CmpInst::Predicate, ProbabilityList>; + +/// Pointer comparisons: +static const ProbabilityTable PointerTable{ + {ICmpInst::ICMP_NE, {PtrTakenProb, PtrUntakenProb}}, /// p != q -> Likely + {ICmpInst::ICMP_EQ, {PtrUntakenProb, PtrTakenProb}}, /// p == q -> Unlikely +}; +/// Zero Heuristics (ZH) static const uint32_t ZH_TAKEN_WEIGHT = 20; static const uint32_t ZH_NONTAKEN_WEIGHT = 12; +static const BranchProbability + ZeroTakenProb(ZH_TAKEN_WEIGHT, ZH_TAKEN_WEIGHT + ZH_NONTAKEN_WEIGHT); +static const BranchProbability + ZeroUntakenProb(ZH_NONTAKEN_WEIGHT, ZH_TAKEN_WEIGHT + ZH_NONTAKEN_WEIGHT); + +/// Integer compares with 0: +static const ProbabilityTable ICmpWithZeroTable{ + {CmpInst::ICMP_EQ, {ZeroUntakenProb, ZeroTakenProb}}, /// X == 0 -> Unlikely + {CmpInst::ICMP_NE, {ZeroTakenProb, ZeroUntakenProb}}, /// X != 0 -> Likely + {CmpInst::ICMP_SLT, {ZeroUntakenProb, ZeroTakenProb}}, /// X < 0 -> Unlikely + {CmpInst::ICMP_SGT, {ZeroTakenProb, ZeroUntakenProb}}, /// X > 0 -> Likely +}; + +/// Integer compares with -1: +static const ProbabilityTable ICmpWithMinusOneTable{ + {CmpInst::ICMP_EQ, {ZeroUntakenProb, ZeroTakenProb}}, /// X == -1 -> Unlikely + {CmpInst::ICMP_NE, {ZeroTakenProb, ZeroUntakenProb}}, /// X != -1 -> Likely + // InstCombine canonicalizes X >= 0 into X > -1 + {CmpInst::ICMP_SGT, {ZeroTakenProb, ZeroUntakenProb}}, /// X >= 0 -> Likely +}; + +/// Integer compares with 1: +static const ProbabilityTable ICmpWithOneTable{ + // InstCombine canonicalizes X <= 0 into X < 1 + {CmpInst::ICMP_SLT, {ZeroUntakenProb, ZeroTakenProb}}, /// X <= 0 -> Unlikely +}; + +/// strcmp and similar functions return zero, negative, or positive, if the +/// first string is equal, less, or greater than the second. We consider it +/// likely that the strings are not equal, so a comparison with zero is +/// probably false, but also a comparison with any other number is also +/// probably false given that what exactly is returned for nonzero values is +/// not specified. Any kind of comparison other than equality we know +/// nothing about. +static const ProbabilityTable ICmpWithLibCallTable{ + {CmpInst::ICMP_EQ, {ZeroUntakenProb, ZeroTakenProb}}, + {CmpInst::ICMP_NE, {ZeroTakenProb, ZeroUntakenProb}}, +}; +// Floating-Point Heuristics (FPH) static const uint32_t FPH_TAKEN_WEIGHT = 20; static const uint32_t FPH_NONTAKEN_WEIGHT = 12; @@ -120,6 +175,21 @@ static const uint32_t FPH_ORD_WEIGHT = 1024 * 1024 - 1; /// exceptional case, so the result is unlikely. static const uint32_t FPH_UNO_WEIGHT = 1; +static const BranchProbability FPOrdTakenProb(FPH_ORD_WEIGHT, + FPH_ORD_WEIGHT + FPH_UNO_WEIGHT); +static const BranchProbability + FPOrdUntakenProb(FPH_UNO_WEIGHT, FPH_ORD_WEIGHT + FPH_UNO_WEIGHT); +static const BranchProbability + FPTakenProb(FPH_TAKEN_WEIGHT, FPH_TAKEN_WEIGHT + FPH_NONTAKEN_WEIGHT); +static const BranchProbability + FPUntakenProb(FPH_NONTAKEN_WEIGHT, FPH_TAKEN_WEIGHT + FPH_NONTAKEN_WEIGHT); + +/// Floating-Point compares: +static const ProbabilityTable FCmpTable{ + {FCmpInst::FCMP_ORD, {FPOrdTakenProb, FPOrdUntakenProb}}, /// !isnan -> Likely + {FCmpInst::FCMP_UNO, {FPOrdUntakenProb, FPOrdTakenProb}}, /// isnan -> Unlikely +}; + /// Set of dedicated "absolute" execution weights for a block. These weights are /// meaningful relative to each other and their derivatives only. enum class BlockExecWeight : std::uint32_t { @@ -468,21 +538,10 @@ bool BranchProbabilityInfo::calcPointerHeuristics(const BasicBlock *BB) { assert(CI->getOperand(1)->getType()->isPointerTy()); - BranchProbability TakenProb(PH_TAKEN_WEIGHT, - PH_TAKEN_WEIGHT + PH_NONTAKEN_WEIGHT); - BranchProbability UntakenProb(PH_NONTAKEN_WEIGHT, - PH_TAKEN_WEIGHT + PH_NONTAKEN_WEIGHT); - - // p != 0 -> isProb = true - // p == 0 -> isProb = false - // p != q -> isProb = true - // p == q -> isProb = false; - bool isProb = CI->getPredicate() == ICmpInst::ICMP_NE; - if (!isProb) - std::swap(TakenProb, UntakenProb); - - setEdgeProbability( - BB, SmallVector<BranchProbability, 2>({TakenProb, UntakenProb})); + auto Search = PointerTable.find(CI->getPredicate()); + if (Search == PointerTable.end()) + return false; + setEdgeProbability(BB, Search->second); return true; } @@ -949,86 +1008,33 @@ bool BranchProbabilityInfo::calcZeroHeuristics(const BasicBlock *BB, if (Function *CalledFn = Call->getCalledFunction()) TLI->getLibFunc(*CalledFn, Func); - bool isProb; + ProbabilityTable::const_iterator Search; if (Func == LibFunc_strcasecmp || Func == LibFunc_strcmp || Func == LibFunc_strncasecmp || Func == LibFunc_strncmp || Func == LibFunc_memcmp || Func == LibFunc_bcmp) { - // strcmp and similar functions return zero, negative, or positive, if the - // first string is equal, less, or greater than the second. We consider it - // likely that the strings are not equal, so a comparison with zero is - // probably false, but also a comparison with any other number is also - // probably false given that what exactly is returned for nonzero values is - // not specified. Any kind of comparison other than equality we know - // nothing about. - switch (CI->getPredicate()) { - case CmpInst::ICMP_EQ: - isProb = false; - break; - case CmpInst::ICMP_NE: - isProb = true; - break; - default: + Search = ICmpWithLibCallTable.find(CI->getPredicate()); + if (Search == ICmpWithLibCallTable.end()) return false; - } } else if (CV->isZero()) { - switch (CI->getPredicate()) { - case CmpInst::ICMP_EQ: - // X == 0 -> Unlikely - isProb = false; - break; - case CmpInst::ICMP_NE: - // X != 0 -> Likely - isProb = true; - break; - case CmpInst::ICMP_SLT: - // X < 0 -> Unlikely - isProb = false; - break; - case CmpInst::ICMP_SGT: - // X > 0 -> Likely - isProb = true; - break; - default: + Search = ICmpWithZeroTable.find(CI->getPredicate()); + if (Search == ICmpWithZeroTable.end()) + return false; + } else if (CV->isOne()) { + Search = ICmpWithOneTable.find(CI->getPredicate()); + if (Search == ICmpWithOneTable.end()) return false; - } - } else if (CV->isOne() && CI->getPredicate() == CmpInst::ICMP_SLT) { - // InstCombine canonicalizes X <= 0 into X < 1. - // X <= 0 -> Unlikely - isProb = false; } else if (CV->isMinusOne()) { - switch (CI->getPredicate()) { - case CmpInst::ICMP_EQ: - // X == -1 -> Unlikely - isProb = false; - break; - case CmpInst::ICMP_NE: - // X != -1 -> Likely - isProb = true; - break; - case CmpInst::ICMP_SGT: - // InstCombine canonicalizes X >= 0 into X > -1. - // X >= 0 -> Likely - isProb = true; - break; - default: + Search = ICmpWithMinusOneTable.find(CI->getPredicate()); + if (Search == ICmpWithMinusOneTable.end()) return false; - } } else { return false; } - BranchProbability TakenProb(ZH_TAKEN_WEIGHT, - ZH_TAKEN_WEIGHT + ZH_NONTAKEN_WEIGHT); - BranchProbability UntakenProb(ZH_NONTAKEN_WEIGHT, - ZH_TAKEN_WEIGHT + ZH_NONTAKEN_WEIGHT); - if (!isProb) - std::swap(TakenProb, UntakenProb); - - setEdgeProbability( - BB, SmallVector<BranchProbability, 2>({TakenProb, UntakenProb})); + setEdgeProbability(BB, Search->second); return true; } @@ -1042,34 +1048,21 @@ bool BranchProbabilityInfo::calcFloatingPointHeuristics(const BasicBlock *BB) { if (!FCmp) return false; - uint32_t TakenWeight = FPH_TAKEN_WEIGHT; - uint32_t NontakenWeight = FPH_NONTAKEN_WEIGHT; - bool isProb; + ProbabilityList ProbList; if (FCmp->isEquality()) { - // f1 == f2 -> Unlikely - // f1 != f2 -> Likely - isProb = !FCmp->isTrueWhenEqual(); - } else if (FCmp->getPredicate() == FCmpInst::FCMP_ORD) { - // !isnan -> Likely - isProb = true; - TakenWeight = FPH_ORD_WEIGHT; - NontakenWeight = FPH_UNO_WEIGHT; - } else if (FCmp->getPredicate() == FCmpInst::FCMP_UNO) { - // isnan -> Unlikely - isProb = false; - TakenWeight = FPH_ORD_WEIGHT; - NontakenWeight = FPH_UNO_WEIGHT; + ProbList = !FCmp->isTrueWhenEqual() ? + // f1 == f2 -> Unlikely + ProbabilityList({FPTakenProb, FPUntakenProb}) : + // f1 != f2 -> Likely + ProbabilityList({FPUntakenProb, FPTakenProb}); } else { - return false; + auto Search = FCmpTable.find(FCmp->getPredicate()); + if (Search == FCmpTable.end()) + return false; + ProbList = Search->second; } - BranchProbability TakenProb(TakenWeight, TakenWeight + NontakenWeight); - BranchProbability UntakenProb(NontakenWeight, TakenWeight + NontakenWeight); - if (!isProb) - std::swap(TakenProb, UntakenProb); - - setEdgeProbability( - BB, SmallVector<BranchProbability, 2>({TakenProb, UntakenProb})); + setEdgeProbability(BB, ProbList); return true; } diff --git a/llvm/lib/Analysis/DivergenceAnalysis.cpp b/llvm/lib/Analysis/DivergenceAnalysis.cpp index 3634526370f5..7426d0c07592 100644 --- a/llvm/lib/Analysis/DivergenceAnalysis.cpp +++ b/llvm/lib/Analysis/DivergenceAnalysis.cpp @@ -24,12 +24,12 @@ // divergent can help the compiler to selectively run these optimizations. // // This implementation is derived from the Vectorization Analysis of the -// Region Vectorizer (RV). That implementation in turn is based on the approach -// described in +// Region Vectorizer (RV). The analysis is based on the approach described in // -// Improving Performance of OpenCL on CPUs -// Ralf Karrenberg and Sebastian Hack -// CC '12 +// An abstract interpretation for SPMD divergence +// on reducible control flow graphs. +// Julian Rosemann, Simon Moll and Sebastian Hack +// POPL '21 // // This implementation is generic in the sense that it does // not itself identify original sources of divergence. diff --git a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp index f22c6aa04f5e..2ec6cbeabda2 100644 --- a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp +++ b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp @@ -820,7 +820,7 @@ void IRSimilarityIdentifier::populateMapper( /// subsequence from the \p InstrList, and create an IRSimilarityCandidate from /// the IRInstructionData in subsequence. /// -/// \param [in] Mapper - The instruction mapper for sanity checks. +/// \param [in] Mapper - The instruction mapper for basic correctness checks. /// \param [in] InstrList - The vector that holds the instruction data. /// \param [in] IntegerMapping - The vector that holds the mapped integers. /// \param [out] CandsForRepSubstring - The vector to store the generated diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp index c4b7239b43ab..cfe910df4e91 100644 --- a/llvm/lib/Analysis/IVDescriptors.cpp +++ b/llvm/lib/Analysis/IVDescriptors.cpp @@ -81,6 +81,7 @@ bool RecurrenceDescriptor::isArithmeticRecurrenceKind(RecurKind Kind) { case RecurKind::Mul: case RecurKind::FAdd: case RecurKind::FMul: + case RecurKind::FMulAdd: return true; } return false; @@ -194,21 +195,28 @@ static void collectCastsToIgnore(Loop *TheLoop, Instruction *Exit, // vectorizing floating point operations without unsafe math. static bool checkOrderedReduction(RecurKind Kind, Instruction *ExactFPMathInst, Instruction *Exit, PHINode *Phi) { - // Currently only FAdd is supported - if (Kind != RecurKind::FAdd) + // Currently only FAdd and FMulAdd are supported. + if (Kind != RecurKind::FAdd && Kind != RecurKind::FMulAdd) return false; - // Ensure the exit instruction is an FAdd, and that it only has one user - // other than the reduction PHI - if (Exit->getOpcode() != Instruction::FAdd || Exit->hasNUsesOrMore(3) || - Exit != ExactFPMathInst) + if (Kind == RecurKind::FAdd && Exit->getOpcode() != Instruction::FAdd) + return false; + + if (Kind == RecurKind::FMulAdd && + !RecurrenceDescriptor::isFMulAddIntrinsic(Exit)) + return false; + + // Ensure the exit instruction has only one user other than the reduction PHI + if (Exit != ExactFPMathInst || Exit->hasNUsesOrMore(3)) return false; // The only pattern accepted is the one in which the reduction PHI // is used as one of the operands of the exit instruction - auto *LHS = Exit->getOperand(0); - auto *RHS = Exit->getOperand(1); - if (LHS != Phi && RHS != Phi) + auto *Op0 = Exit->getOperand(0); + auto *Op1 = Exit->getOperand(1); + if (Kind == RecurKind::FAdd && Op0 != Phi && Op1 != Phi) + return false; + if (Kind == RecurKind::FMulAdd && Exit->getOperand(2) != Phi) return false; LLVM_DEBUG(dbgs() << "LV: Found an ordered reduction: Phi: " << *Phi @@ -389,6 +397,12 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind, for (User *U : Cur->users()) { Instruction *UI = cast<Instruction>(U); + // If the user is a call to llvm.fmuladd then the instruction can only be + // the final operand. + if (isFMulAddIntrinsic(UI)) + if (Cur == UI->getOperand(0) || Cur == UI->getOperand(1)) + return false; + // Check if we found the exit user. BasicBlock *Parent = UI->getParent(); if (!TheLoop->contains(Parent)) { @@ -710,6 +724,9 @@ RecurrenceDescriptor::isRecurrenceInstr(Loop *L, PHINode *OrigPhi, I->hasNoSignedZeros())) && isFPMinMaxRecurrenceKind(Kind))) return isMinMaxPattern(I, Kind, Prev); + else if (isFMulAddIntrinsic(I)) + return InstDesc(Kind == RecurKind::FMulAdd, I, + I->hasAllowReassoc() ? nullptr : I); return InstDesc(false, I); } } @@ -804,6 +821,11 @@ bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop, << " PHI." << *Phi << "\n"); return true; } + if (AddReductionVar(Phi, RecurKind::FMulAdd, TheLoop, FMF, RedDes, DB, AC, + DT)) { + LLVM_DEBUG(dbgs() << "Found an FMulAdd reduction PHI." << *Phi << "\n"); + return true; + } // Not a reduction of known type. return false; } @@ -927,6 +949,7 @@ Value *RecurrenceDescriptor::getRecurrenceIdentity(RecurKind K, Type *Tp, case RecurKind::FMul: // Multiplying a number by 1 does not change it. return ConstantFP::get(Tp, 1.0L); + case RecurKind::FMulAdd: case RecurKind::FAdd: // Adding zero to a number does not change it. // FIXME: Ideally we should not need to check FMF for FAdd and should always @@ -974,6 +997,7 @@ unsigned RecurrenceDescriptor::getOpcode(RecurKind Kind) { return Instruction::Xor; case RecurKind::FMul: return Instruction::FMul; + case RecurKind::FMulAdd: case RecurKind::FAdd: return Instruction::FAdd; case RecurKind::SMax: @@ -1032,6 +1056,10 @@ RecurrenceDescriptor::getReductionOpChain(PHINode *Phi, Loop *L) const { return SelectPatternResult::isMinOrMax( matchSelectPattern(Cur, LHS, RHS).Flavor); } + // Recognize a call to the llvm.fmuladd intrinsic. + if (isFMulAddIntrinsic(Cur)) + return true; + return Cur->getOpcode() == RedOp; }; diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 864eeea4f8bf..22d2ce11cc90 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -2180,6 +2180,55 @@ Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { return ::SimplifyAndInst(Op0, Op1, Q, RecursionLimit); } +static Value *simplifyOrLogic(Value *X, Value *Y) { + assert(X->getType() == Y->getType() && "Expected same type for 'or' ops"); + Type *Ty = X->getType(); + + // X | ~X --> -1 + if (match(Y, m_Not(m_Specific(X)))) + return ConstantInt::getAllOnesValue(Ty); + + // X | ~(X & ?) = -1 + if (match(Y, m_Not(m_c_And(m_Specific(X), m_Value())))) + return ConstantInt::getAllOnesValue(Ty); + + // X | (X & ?) --> X + if (match(Y, m_c_And(m_Specific(X), m_Value()))) + return X; + + Value *A, *B; + + // (A & ~B) | (A ^ B) --> A ^ B + // (~B & A) | (A ^ B) --> A ^ B + // (A & ~B) | (B ^ A) --> B ^ A + // (~B & A) | (B ^ A) --> B ^ A + if (match(X, m_c_And(m_Value(A), m_Not(m_Value(B)))) && + match(Y, m_c_Xor(m_Specific(A), m_Specific(B)))) + return Y; + + // (~A ^ B) | (A & B) --> ~A ^ B + // (B ^ ~A) | (A & B) --> B ^ ~A + // (~A ^ B) | (B & A) --> ~A ^ B + // (B ^ ~A) | (B & A) --> B ^ ~A + if (match(X, m_c_Xor(m_Not(m_Value(A)), m_Value(B))) && + match(Y, m_c_And(m_Specific(A), m_Specific(B)))) + return X; + + // (A ^ B) | (A | B) --> A | B + // (A ^ B) | (B | A) --> B | A + if (match(X, m_Xor(m_Value(A), m_Value(B))) && + match(Y, m_c_Or(m_Specific(A), m_Specific(B)))) + return Y; + + // ~(A ^ B) | (A | B) --> -1 + // ~(A ^ B) | (B | A) --> -1 + if (match(X, m_Not(m_Xor(m_Value(A), m_Value(B)))) && + match(Y, m_c_Or(m_Specific(A), m_Specific(B)))) + return ConstantInt::getAllOnesValue(Ty); + + return nullptr; +} + /// Given operands for an Or, see if we can fold the result. /// If not, this returns null. static Value *SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, @@ -2202,81 +2251,15 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, if (Op0 == Op1 || match(Op1, m_Zero())) return Op0; - // A | ~A = ~A | A = -1 - if (match(Op0, m_Not(m_Specific(Op1))) || - match(Op1, m_Not(m_Specific(Op0)))) - return Constant::getAllOnesValue(Op0->getType()); - - // (A & ?) | A = A - if (match(Op0, m_c_And(m_Specific(Op1), m_Value()))) - return Op1; - - // A | (A & ?) = A - if (match(Op1, m_c_And(m_Specific(Op0), m_Value()))) - return Op0; - - // ~(A & ?) | A = -1 - if (match(Op0, m_Not(m_c_And(m_Specific(Op1), m_Value())))) - return Constant::getAllOnesValue(Op1->getType()); - - // A | ~(A & ?) = -1 - if (match(Op1, m_Not(m_c_And(m_Specific(Op0), m_Value())))) - return Constant::getAllOnesValue(Op0->getType()); + if (Value *R = simplifyOrLogic(Op0, Op1)) + return R; + if (Value *R = simplifyOrLogic(Op1, Op0)) + return R; if (Value *V = simplifyLogicOfAddSub(Op0, Op1, Instruction::Or)) return V; Value *A, *B, *NotA; - // (A & ~B) | (A ^ B) -> (A ^ B) - // (~B & A) | (A ^ B) -> (A ^ B) - // (A & ~B) | (B ^ A) -> (B ^ A) - // (~B & A) | (B ^ A) -> (B ^ A) - if (match(Op1, m_Xor(m_Value(A), m_Value(B))) && - (match(Op0, m_c_And(m_Specific(A), m_Not(m_Specific(B)))) || - match(Op0, m_c_And(m_Not(m_Specific(A)), m_Specific(B))))) - return Op1; - - // Commute the 'or' operands. - // (A ^ B) | (A & ~B) -> (A ^ B) - // (A ^ B) | (~B & A) -> (A ^ B) - // (B ^ A) | (A & ~B) -> (B ^ A) - // (B ^ A) | (~B & A) -> (B ^ A) - if (match(Op0, m_Xor(m_Value(A), m_Value(B))) && - (match(Op1, m_c_And(m_Specific(A), m_Not(m_Specific(B)))) || - match(Op1, m_c_And(m_Not(m_Specific(A)), m_Specific(B))))) - return Op0; - - // (A & B) | (~A ^ B) -> (~A ^ B) - // (B & A) | (~A ^ B) -> (~A ^ B) - // (A & B) | (B ^ ~A) -> (B ^ ~A) - // (B & A) | (B ^ ~A) -> (B ^ ~A) - if (match(Op0, m_And(m_Value(A), m_Value(B))) && - (match(Op1, m_c_Xor(m_Specific(A), m_Not(m_Specific(B)))) || - match(Op1, m_c_Xor(m_Not(m_Specific(A)), m_Specific(B))))) - return Op1; - - // Commute the 'or' operands. - // (~A ^ B) | (A & B) -> (~A ^ B) - // (~A ^ B) | (B & A) -> (~A ^ B) - // (B ^ ~A) | (A & B) -> (B ^ ~A) - // (B ^ ~A) | (B & A) -> (B ^ ~A) - if (match(Op1, m_And(m_Value(A), m_Value(B))) && - (match(Op0, m_c_Xor(m_Specific(A), m_Not(m_Specific(B)))) || - match(Op0, m_c_Xor(m_Not(m_Specific(A)), m_Specific(B))))) - return Op0; - - // (A | B) | (A ^ B) --> A | B - // (B | A) | (A ^ B) --> B | A - if (match(Op1, m_Xor(m_Value(A), m_Value(B))) && - match(Op0, m_c_Or(m_Specific(A), m_Specific(B)))) - return Op0; - - // Commute the outer 'or' operands. - // (A ^ B) | (A | B) --> A | B - // (A ^ B) | (B | A) --> B | A - if (match(Op0, m_Xor(m_Value(A), m_Value(B))) && - match(Op1, m_c_Or(m_Specific(A), m_Specific(B)))) - return Op1; // (~A & B) | ~(A | B) --> ~A // (~A & B) | ~(B | A) --> ~A @@ -2414,6 +2397,30 @@ static Value *SimplifyXorInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, match(Op1, m_Not(m_Specific(Op0)))) return Constant::getAllOnesValue(Op0->getType()); + auto foldAndOrNot = [](Value *X, Value *Y) -> Value * { + Value *A, *B; + // (~A & B) ^ (A | B) --> A -- There are 8 commuted variants. + if (match(X, m_c_And(m_Not(m_Value(A)), m_Value(B))) && + match(Y, m_c_Or(m_Specific(A), m_Specific(B)))) + return A; + + // (~A | B) ^ (A & B) --> ~A -- There are 8 commuted variants. + // The 'not' op must contain a complete -1 operand (no undef elements for + // vector) for the transform to be safe. + Value *NotA; + if (match(X, + m_c_Or(m_CombineAnd(m_NotForbidUndef(m_Value(A)), m_Value(NotA)), + m_Value(B))) && + match(Y, m_c_And(m_Specific(A), m_Specific(B)))) + return NotA; + + return nullptr; + }; + if (Value *R = foldAndOrNot(Op0, Op1)) + return R; + if (Value *R = foldAndOrNot(Op1, Op0)) + return R; + if (Value *V = simplifyLogicOfAddSub(Op0, Op1, Instruction::Xor)) return V; @@ -2935,8 +2942,10 @@ static Value *simplifyICmpWithBinOpOnLHS( return getFalse(ITy); } - // x >> y <=u x - // x udiv y <=u x. + // x >>u y <=u x --> true. + // x >>u y >u x --> false. + // x udiv y <=u x --> true. + // x udiv y >u x --> false. if (match(LBO, m_LShr(m_Specific(RHS), m_Value())) || match(LBO, m_UDiv(m_Specific(RHS), m_Value()))) { // icmp pred (X op Y), X @@ -2946,6 +2955,37 @@ static Value *simplifyICmpWithBinOpOnLHS( return getTrue(ITy); } + // If x is nonzero: + // x >>u C <u x --> true for C != 0. + // x >>u C != x --> true for C != 0. + // x >>u C >=u x --> false for C != 0. + // x >>u C == x --> false for C != 0. + // x udiv C <u x --> true for C != 1. + // x udiv C != x --> true for C != 1. + // x udiv C >=u x --> false for C != 1. + // x udiv C == x --> false for C != 1. + // TODO: allow non-constant shift amount/divisor + const APInt *C; + if ((match(LBO, m_LShr(m_Specific(RHS), m_APInt(C))) && *C != 0) || + (match(LBO, m_UDiv(m_Specific(RHS), m_APInt(C))) && *C != 1)) { + if (isKnownNonZero(RHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) { + switch (Pred) { + default: + break; + case ICmpInst::ICMP_EQ: + case ICmpInst::ICMP_UGE: + return getFalse(ITy); + case ICmpInst::ICMP_NE: + case ICmpInst::ICMP_ULT: + return getTrue(ITy); + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_ULE: + // UGT/ULE are handled by the more general case just above + llvm_unreachable("Unexpected UGT/ULE, should have been handled"); + } + } + } + // (x*C1)/C2 <= x for C1 <= C2. // This holds even if the multiplication overflows: Assume that x != 0 and // arithmetic is modulo M. For overflow to occur we must have C1 >= M/x and diff --git a/llvm/lib/Analysis/IntervalPartition.cpp b/llvm/lib/Analysis/IntervalPartition.cpp index 23ff4fd6f85e..d9620fd405bc 100644 --- a/llvm/lib/Analysis/IntervalPartition.cpp +++ b/llvm/lib/Analysis/IntervalPartition.cpp @@ -36,16 +36,16 @@ INITIALIZE_PASS(IntervalPartition, "intervals", // releaseMemory - Reset state back to before function was analyzed void IntervalPartition::releaseMemory() { - for (unsigned i = 0, e = Intervals.size(); i != e; ++i) - delete Intervals[i]; + for (Interval *I : Intervals) + delete I; IntervalMap.clear(); Intervals.clear(); RootInterval = nullptr; } void IntervalPartition::print(raw_ostream &O, const Module*) const { - for(unsigned i = 0, e = Intervals.size(); i != e; ++i) - Intervals[i]->print(O); + for (const Interval *I : Intervals) + I->print(O); } // addIntervalToPartition - Add an interval to the internal list of intervals, @@ -87,8 +87,8 @@ bool IntervalPartition::runOnFunction(Function &F) { // Now that we know all of the successor information, propagate this to the // predecessors for each block. - for (unsigned i = 0, e = Intervals.size(); i != e; ++i) - updatePredecessors(Intervals[i]); + for (Interval *I : Intervals) + updatePredecessors(I); return false; } @@ -113,6 +113,6 @@ IntervalPartition::IntervalPartition(IntervalPartition &IP, bool) // Now that we know all of the successor information, propagate this to the // predecessors for each block. - for (unsigned i = 0, e = Intervals.size(); i != e; ++i) - updatePredecessors(Intervals[i]); + for (Interval *I : Intervals) + updatePredecessors(I); } diff --git a/llvm/lib/Analysis/LazyValueInfo.cpp b/llvm/lib/Analysis/LazyValueInfo.cpp index 50fa169c2081..5b5d48bf6fe5 100644 --- a/llvm/lib/Analysis/LazyValueInfo.cpp +++ b/llvm/lib/Analysis/LazyValueInfo.cpp @@ -1095,7 +1095,8 @@ static ValueLatticeElement getValueFromICmpCondition(Value *Val, ICmpInst *ICI, if (!Ty->isIntegerTy()) return ValueLatticeElement::getOverdefined(); - APInt Offset(Ty->getScalarSizeInBits(), 0); + unsigned BitWidth = Ty->getScalarSizeInBits(); + APInt Offset(BitWidth, 0); if (matchICmpOperand(Offset, LHS, Val, EdgePred)) return getValueFromSimpleICmpCondition(EdgePred, RHS, Offset); @@ -1118,13 +1119,23 @@ static ValueLatticeElement getValueFromICmpCondition(Value *Val, ICmpInst *ICI, // If (Val & Mask) != 0 then the value must be larger than the lowest set // bit of Mask. if (EdgePred == ICmpInst::ICMP_NE && !Mask->isZero() && C->isZero()) { - unsigned BitWidth = Ty->getIntegerBitWidth(); return ValueLatticeElement::getRange(ConstantRange::getNonEmpty( APInt::getOneBitSet(BitWidth, Mask->countTrailingZeros()), APInt::getZero(BitWidth))); } } + // If (X urem Modulus) >= C, then X >= C. + // TODO: An upper bound could be computed as well. + if (match(LHS, m_URem(m_Specific(Val), m_Value())) && + match(RHS, m_APInt(C))) { + // Use the icmp region so we don't have to deal with different predicates. + ConstantRange CR = ConstantRange::makeExactICmpRegion(EdgePred, *C); + if (!CR.isEmptySet()) + return ValueLatticeElement::getRange(ConstantRange::getNonEmpty( + CR.getUnsignedMin(), APInt(BitWidth, 0))); + } + return ValueLatticeElement::getOverdefined(); } diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index f9bd7167317f..19a24ac6a484 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -666,6 +666,29 @@ static bool isNoWrap(PredicatedScalarEvolution &PSE, return false; } +static void visitPointers(Value *StartPtr, const Loop &InnermostLoop, + function_ref<void(Value *)> AddPointer) { + SmallPtrSet<Value *, 8> Visited; + SmallVector<Value *> WorkList; + WorkList.push_back(StartPtr); + + while (!WorkList.empty()) { + Value *Ptr = WorkList.pop_back_val(); + if (!Visited.insert(Ptr).second) + continue; + auto *PN = dyn_cast<PHINode>(Ptr); + // SCEV does not look through non-header PHIs inside the loop. Such phis + // can be analyzed by adding separate accesses for each incoming pointer + // value. + if (PN && InnermostLoop.contains(PN->getParent()) && + PN->getParent() != InnermostLoop.getHeader()) { + for (const Use &Inc : PN->incoming_values()) + WorkList.push_back(Inc); + } else + AddPointer(Ptr); + } +} + bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck, MemAccessInfo Access, const ValueToValueMap &StridesMap, @@ -1032,13 +1055,11 @@ int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, bool ShouldCheckWrap) { Type *Ty = Ptr->getType(); assert(Ty->isPointerTy() && "Unexpected non-ptr"); - unsigned AddrSpace = Ty->getPointerAddressSpace(); + assert(!AccessTy->isAggregateType() && "Bad stride - Not a pointer to a scalar type"); - // Make sure we're not accessing an aggregate type. - // TODO: Why? This doesn't make any sense. - if (AccessTy->isAggregateType()) { - LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not a pointer to a scalar type" - << *Ptr << "\n"); + if (isa<ScalableVectorType>(AccessTy)) { + LLVM_DEBUG(dbgs() << "LAA: Bad stride - Scalable object: " << *AccessTy + << "\n"); return 0; } @@ -1068,6 +1089,7 @@ int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, // An getelementptr without an inbounds attribute and unit stride would have // to access the pointer value "0" which is undefined behavior in address // space 0, therefore we can also vectorize this case. + unsigned AddrSpace = Ty->getPointerAddressSpace(); bool IsInBoundsGEP = isInBoundsGep(Ptr); bool IsNoWrapAddRec = !ShouldCheckWrap || PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW) || @@ -1101,7 +1123,8 @@ int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, } auto &DL = Lp->getHeader()->getModule()->getDataLayout(); - int64_t Size = DL.getTypeAllocSize(AccessTy); + TypeSize AllocSize = DL.getTypeAllocSize(AccessTy); + int64_t Size = AllocSize.getFixedSize(); const APInt &APStepVal = C->getAPInt(); // Huge step value - give up. @@ -1263,29 +1286,6 @@ bool llvm::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, return Diff && *Diff == 1; } -static void visitPointers(Value *StartPtr, const Loop &InnermostLoop, - function_ref<void(Value *)> AddPointer) { - SmallPtrSet<Value *, 8> Visited; - SmallVector<Value *> WorkList; - WorkList.push_back(StartPtr); - - while (!WorkList.empty()) { - Value *Ptr = WorkList.pop_back_val(); - if (!Visited.insert(Ptr).second) - continue; - auto *PN = dyn_cast<PHINode>(Ptr); - // SCEV does not look through non-header PHIs inside the loop. Such phis - // can be analyzed by adding separate accesses for each incoming pointer - // value. - if (PN && InnermostLoop.contains(PN->getParent()) && - PN->getParent() != InnermostLoop.getHeader()) { - for (const Use &Inc : PN->incoming_values()) - WorkList.push_back(Inc); - } else - AddPointer(Ptr); - } -} - void MemoryDepChecker::addAccess(StoreInst *SI) { visitPointers(SI->getPointerOperand(), *InnermostLoop, [this, SI](Value *Ptr) { diff --git a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp index b44d15e71556..da6bb4c49cba 100644 --- a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -1481,11 +1481,11 @@ void MemoryDependenceResults::removeCachedNonLocalPointerDependencies( // instructions from the reverse map. NonLocalDepInfo &PInfo = It->second.NonLocalDeps; - for (unsigned i = 0, e = PInfo.size(); i != e; ++i) { - Instruction *Target = PInfo[i].getResult().getInst(); + for (const NonLocalDepEntry &DE : PInfo) { + Instruction *Target = DE.getResult().getInst(); if (!Target) continue; // Ignore non-local dep results. - assert(Target->getParent() == PInfo[i].getBB()); + assert(Target->getParent() == DE.getBB()); // Eliminating the dirty entry from 'Cache', so update the reverse info. RemoveFromReverseMap(ReverseNonLocalPtrDeps, Target, P); diff --git a/llvm/lib/Analysis/MemoryLocation.cpp b/llvm/lib/Analysis/MemoryLocation.cpp index 7f2d04c49565..854ba83bd34a 100644 --- a/llvm/lib/Analysis/MemoryLocation.cpp +++ b/llvm/lib/Analysis/MemoryLocation.cpp @@ -213,6 +213,28 @@ MemoryLocation MemoryLocation::getForArgument(const CallBase *Call, LibFunc F; if (TLI && TLI->getLibFunc(*Call, F) && TLI->has(F)) { switch (F) { + case LibFunc_memset_chk: { + assert(ArgIdx == 0 && "Invalid argument index for memset_chk"); + LocationSize Size = LocationSize::afterPointer(); + if (const auto *Len = dyn_cast<ConstantInt>(Call->getArgOperand(2))) { + // memset_chk writes at most Len bytes. It may write less, if Len + // exceeds the specified max size and aborts. + Size = LocationSize::upperBound(Len->getZExtValue()); + } + return MemoryLocation(Arg, Size, AATags); + } + case LibFunc_strncpy: { + assert((ArgIdx == 0 || ArgIdx == 1) && + "Invalid argument index for strncpy"); + LocationSize Size = LocationSize::afterPointer(); + if (const auto *Len = dyn_cast<ConstantInt>(Call->getArgOperand(2))) { + // strncpy is guaranteed to write Len bytes, but only reads up to Len + // bytes. + Size = ArgIdx == 0 ? LocationSize::precise(Len->getZExtValue()) + : LocationSize::upperBound(Len->getZExtValue()); + } + return MemoryLocation(Arg, Size, AATags); + } case LibFunc_memset_pattern16: assert((ArgIdx == 0 || ArgIdx == 1) && "Invalid argument index for memset_pattern16"); diff --git a/llvm/lib/Analysis/PHITransAddr.cpp b/llvm/lib/Analysis/PHITransAddr.cpp index c73e1fd82915..4c80f6743411 100644 --- a/llvm/lib/Analysis/PHITransAddr.cpp +++ b/llvm/lib/Analysis/PHITransAddr.cpp @@ -69,7 +69,7 @@ static bool VerifySubExpr(Value *Expr, } // If it isn't in the InstInputs list it is a subexpr incorporated into the - // address. Sanity check that it is phi translatable. + // address. Validate that it is phi translatable. if (!CanPHITrans(I)) { errs() << "Instruction in PHITransAddr is not phi-translatable:\n"; errs() << *I << '\n'; diff --git a/llvm/lib/Analysis/RegionPass.cpp b/llvm/lib/Analysis/RegionPass.cpp index a73607dbef61..c20ecff5f912 100644 --- a/llvm/lib/Analysis/RegionPass.cpp +++ b/llvm/lib/Analysis/RegionPass.cpp @@ -15,6 +15,7 @@ #include "llvm/Analysis/RegionPass.h" #include "llvm/IR/OptBisect.h" #include "llvm/IR/PassTimingInfo.h" +#include "llvm/IR/PrintPasses.h" #include "llvm/IR/StructuralHash.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Timer.h" @@ -187,6 +188,8 @@ public: } bool runOnRegion(Region *R, RGPassManager &RGM) override { + if (!isFunctionInPrintList(R->getEntry()->getParent()->getName())) + return false; Out << Banner; for (const auto *BB : R->blocks()) { if (BB) diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index f7c22cfb0310..7dc7f9904c70 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -2915,8 +2915,8 @@ ScalarEvolution::getOrCreateAddRecExpr(ArrayRef<const SCEV *> Ops, const Loop *L, SCEV::NoWrapFlags Flags) { FoldingSetNodeID ID; ID.AddInteger(scAddRecExpr); - for (unsigned i = 0, e = Ops.size(); i != e; ++i) - ID.AddPointer(Ops[i]); + for (const SCEV *Op : Ops) + ID.AddPointer(Op); ID.AddPointer(L); void *IP = nullptr; SCEVAddRecExpr *S = @@ -2939,8 +2939,8 @@ ScalarEvolution::getOrCreateMulExpr(ArrayRef<const SCEV *> Ops, SCEV::NoWrapFlags Flags) { FoldingSetNodeID ID; ID.AddInteger(scMulExpr); - for (unsigned i = 0, e = Ops.size(); i != e; ++i) - ID.AddPointer(Ops[i]); + for (const SCEV *Op : Ops) + ID.AddPointer(Op); void *IP = nullptr; SCEVMulExpr *S = static_cast<SCEVMulExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); @@ -3708,8 +3708,8 @@ SCEV *ScalarEvolution::findExistingSCEVInCache(SCEVTypes SCEVType, ArrayRef<const SCEV *> Ops) { FoldingSetNodeID ID; ID.AddInteger(SCEVType); - for (unsigned i = 0, e = Ops.size(); i != e; ++i) - ID.AddPointer(Ops[i]); + for (const SCEV *Op : Ops) + ID.AddPointer(Op); void *IP = nullptr; return UniqueSCEVs.FindNodeOrInsertPos(ID, IP); } @@ -4094,6 +4094,17 @@ void ScalarEvolution::eraseValueFromMap(Value *V) { } } +void ScalarEvolution::insertValueToMap(Value *V, const SCEV *S) { + // A recursive query may have already computed the SCEV. It should be + // equivalent, but may not necessarily be exactly the same, e.g. due to lazily + // inferred nowrap flags. + auto It = ValueExprMap.find_as(V); + if (It == ValueExprMap.end()) { + ValueExprMap.insert({SCEVCallbackVH(V, this), S}); + ExprValueMap[S].insert({V, nullptr}); + } +} + /// Return an existing SCEV if it exists, otherwise analyze the expression and /// create a new one. const SCEV *ScalarEvolution::getSCEV(Value *V) { @@ -4134,10 +4145,9 @@ const SCEV *ScalarEvolution::getExistingSCEV(Value *V) { ValueExprMapType::iterator I = ValueExprMap.find_as(V); if (I != ValueExprMap.end()) { const SCEV *S = I->second; - if (checkValidity(S)) - return S; - eraseValueFromMap(V); - forgetMemoizedResults(S); + assert(checkValidity(S) && + "existing SCEV has not been properly invalidated"); + return S; } return nullptr; } @@ -4430,44 +4440,6 @@ static void PushDefUseChildren(Instruction *I, } } -void ScalarEvolution::forgetSymbolicName(Instruction *PN, const SCEV *SymName) { - SmallVector<Instruction *, 16> Worklist; - SmallPtrSet<Instruction *, 8> Visited; - SmallVector<const SCEV *, 8> ToForget; - Visited.insert(PN); - Worklist.push_back(PN); - while (!Worklist.empty()) { - Instruction *I = Worklist.pop_back_val(); - - auto It = ValueExprMap.find_as(static_cast<Value *>(I)); - if (It != ValueExprMap.end()) { - const SCEV *Old = It->second; - - // Short-circuit the def-use traversal if the symbolic name - // ceases to appear in expressions. - if (Old != SymName && !hasOperand(Old, SymName)) - continue; - - // SCEVUnknown for a PHI either means that it has an unrecognized - // structure, it's a PHI that's in the progress of being computed - // by createNodeForPHI, or it's a single-value PHI. In the first case, - // additional loop trip count information isn't going to change anything. - // In the second case, createNodeForPHI will perform the necessary - // updates on its own when it gets to that point. In the third, we do - // want to forget the SCEVUnknown. - if (!isa<PHINode>(I) || - !isa<SCEVUnknown>(Old) || - (I != PN && Old == SymName)) { - eraseValueFromMap(It->first); - ToForget.push_back(Old); - } - } - - PushDefUseChildren(I, Worklist, Visited); - } - forgetMemoizedResults(ToForget); -} - namespace { /// Takes SCEV S and Loop L. For each AddRec sub-expression, use its start @@ -5335,15 +5307,17 @@ const SCEV *ScalarEvolution::createSimpleAffineAddRec(PHINode *PN, const SCEV *StartVal = getSCEV(StartValueV); const SCEV *PHISCEV = getAddRecExpr(StartVal, Accum, L, Flags); - - ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV; + insertValueToMap(PN, PHISCEV); // We can add Flags to the post-inc expression only if we // know that it is *undefined behavior* for BEValueV to // overflow. - if (auto *BEInst = dyn_cast<Instruction>(BEValueV)) - if (isLoopInvariant(Accum, L) && isAddRecNeverPoison(BEInst, L)) + if (auto *BEInst = dyn_cast<Instruction>(BEValueV)) { + assert(isLoopInvariant(Accum, L) && + "Accum is defined outside L, but is not invariant?"); + if (isAddRecNeverPoison(BEInst, L)) (void)getAddRecExpr(getAddExpr(StartVal, Accum), Accum, L, Flags); + } return PHISCEV; } @@ -5386,7 +5360,7 @@ const SCEV *ScalarEvolution::createAddRecFromPHI(PHINode *PN) { // Handle PHI node value symbolically. const SCEV *SymbolicName = getUnknown(PN); - ValueExprMap.insert({SCEVCallbackVH(PN, this), SymbolicName}); + insertValueToMap(PN, SymbolicName); // Using this symbolic name for the PHI, analyze the value coming around // the back-edge. @@ -5457,8 +5431,8 @@ const SCEV *ScalarEvolution::createAddRecFromPHI(PHINode *PN) { // Okay, for the entire analysis of this edge we assumed the PHI // to be symbolic. We now need to go back and purge all of the // entries for the scalars that use the symbolic expression. - forgetSymbolicName(PN, SymbolicName); - ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV; + forgetMemoizedResults(SymbolicName); + insertValueToMap(PN, PHISCEV); // We can add Flags to the post-inc expression only if we // know that it is *undefined behavior* for BEValueV to @@ -5489,8 +5463,8 @@ const SCEV *ScalarEvolution::createAddRecFromPHI(PHINode *PN) { // Okay, for the entire analysis of this edge we assumed the PHI // to be symbolic. We now need to go back and purge all of the // entries for the scalars that use the symbolic expression. - forgetSymbolicName(PN, SymbolicName); - ValueExprMap[SCEVCallbackVH(PN, this)] = Shifted; + forgetMemoizedResults(SymbolicName); + insertValueToMap(PN, Shifted); return Shifted; } } @@ -7598,62 +7572,19 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) { // Now that we know more about the trip count for this loop, forget any // existing SCEV values for PHI nodes in this loop since they are only // conservative estimates made without the benefit of trip count - // information. This is similar to the code in forgetLoop, except that - // it handles SCEVUnknown PHI nodes specially. + // information. This invalidation is not necessary for correctness, and is + // only done to produce more precise results. if (Result.hasAnyInfo()) { - SmallVector<Instruction *, 16> Worklist; - SmallPtrSet<Instruction *, 8> Discovered; + // Invalidate any expression using an addrec in this loop. SmallVector<const SCEV *, 8> ToForget; - PushLoopPHIs(L, Worklist, Discovered); - while (!Worklist.empty()) { - Instruction *I = Worklist.pop_back_val(); - - ValueExprMapType::iterator It = - ValueExprMap.find_as(static_cast<Value *>(I)); - if (It != ValueExprMap.end()) { - const SCEV *Old = It->second; - - // SCEVUnknown for a PHI either means that it has an unrecognized - // structure, or it's a PHI that's in the progress of being computed - // by createNodeForPHI. In the former case, additional loop trip - // count information isn't going to change anything. In the later - // case, createNodeForPHI will perform the necessary updates on its - // own when it gets to that point. - if (!isa<PHINode>(I) || !isa<SCEVUnknown>(Old)) { - eraseValueFromMap(It->first); - ToForget.push_back(Old); - } - if (PHINode *PN = dyn_cast<PHINode>(I)) - ConstantEvolutionLoopExitValue.erase(PN); - } - - // Since we don't need to invalidate anything for correctness and we're - // only invalidating to make SCEV's results more precise, we get to stop - // early to avoid invalidating too much. This is especially important in - // cases like: - // - // %v = f(pn0, pn1) // pn0 and pn1 used through some other phi node - // loop0: - // %pn0 = phi - // ... - // loop1: - // %pn1 = phi - // ... - // - // where both loop0 and loop1's backedge taken count uses the SCEV - // expression for %v. If we don't have the early stop below then in cases - // like the above, getBackedgeTakenInfo(loop1) will clear out the trip - // count for loop0 and getBackedgeTakenInfo(loop0) will clear out the trip - // count for loop1, effectively nullifying SCEV's trip count cache. - for (auto *U : I->users()) - if (auto *I = dyn_cast<Instruction>(U)) { - auto *LoopForUser = LI.getLoopFor(I->getParent()); - if (LoopForUser && L->contains(LoopForUser) && - Discovered.insert(I).second) - Worklist.push_back(I); - } - } + auto LoopUsersIt = LoopUsers.find(L); + if (LoopUsersIt != LoopUsers.end()) + append_range(ToForget, LoopUsersIt->second); forgetMemoizedResults(ToForget); + + // Invalidate constant-evolved loop header phis. + for (PHINode &PN : L->getHeader()->phis()) + ConstantEvolutionLoopExitValue.erase(&PN); } // Re-lookup the insert position, since the call to @@ -7672,10 +7603,12 @@ void ScalarEvolution::forgetAllLoops() { // result. BackedgeTakenCounts.clear(); PredicatedBackedgeTakenCounts.clear(); + BECountUsers.clear(); LoopPropertiesCache.clear(); ConstantEvolutionLoopExitValue.clear(); ValueExprMap.clear(); ValuesAtScopes.clear(); + ValuesAtScopesUsers.clear(); LoopDispositions.clear(); BlockDispositions.clear(); UnsignedRanges.clear(); @@ -7697,8 +7630,8 @@ void ScalarEvolution::forgetLoop(const Loop *L) { auto *CurrL = LoopWorklist.pop_back_val(); // Drop any stored trip count value. - BackedgeTakenCounts.erase(CurrL); - PredicatedBackedgeTakenCounts.erase(CurrL); + forgetBackedgeTakenCounts(CurrL, /* Predicated */ false); + forgetBackedgeTakenCounts(CurrL, /* Predicated */ true); // Drop information about predicated SCEV rewrites for this loop. for (auto I = PredicatedSCEVRewrites.begin(); @@ -7872,10 +7805,6 @@ bool ScalarEvolution::BackedgeTakenInfo::isConstantMaxOrZero( return MaxOrZero && !any_of(ExitNotTaken, PredicateNotAlwaysTrue); } -bool ScalarEvolution::BackedgeTakenInfo::hasOperand(const SCEV *S) const { - return Operands.contains(S); -} - ScalarEvolution::ExitLimit::ExitLimit(const SCEV *E) : ExitLimit(E, E, false, None) { } @@ -7916,19 +7845,6 @@ ScalarEvolution::ExitLimit::ExitLimit(const SCEV *E, const SCEV *M, : ExitLimit(E, M, MaxOrZero, None) { } -class SCEVRecordOperands { - SmallPtrSetImpl<const SCEV *> &Operands; - -public: - SCEVRecordOperands(SmallPtrSetImpl<const SCEV *> &Operands) - : Operands(Operands) {} - bool follow(const SCEV *S) { - Operands.insert(S); - return true; - } - bool isDone() { return false; } -}; - /// Allocate memory for BackedgeTakenInfo and copy the not-taken count of each /// computable exit into a persistent ExitNotTakenInfo array. ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo( @@ -7957,14 +7873,6 @@ ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo( assert((isa<SCEVCouldNotCompute>(ConstantMax) || isa<SCEVConstant>(ConstantMax)) && "No point in having a non-constant max backedge taken count!"); - - SCEVRecordOperands RecordOperands(Operands); - SCEVTraversal<SCEVRecordOperands> ST(RecordOperands); - if (!isa<SCEVCouldNotCompute>(ConstantMax)) - ST.visitAll(ConstantMax); - for (auto &ENT : ExitNotTaken) - if (!isa<SCEVCouldNotCompute>(ENT.ExactNotTaken)) - ST.visitAll(ENT.ExactNotTaken); } /// Compute the number of times the backedge of the specified loop will execute. @@ -8046,6 +7954,13 @@ ScalarEvolution::computeBackedgeTakenCount(const Loop *L, // The loop backedge will be taken the maximum or zero times if there's // a single exit that must be taken the maximum or zero times. bool MaxOrZero = (MustExitMaxOrZero && ExitingBlocks.size() == 1); + + // Remember which SCEVs are used in exit limits for invalidation purposes. + // We only care about non-constant SCEVs here, so we can ignore EL.MaxNotTaken + // and MaxBECount, which must be SCEVConstant. + for (const auto &Pair : ExitCounts) + if (!isa<SCEVConstant>(Pair.second.ExactNotTaken)) + BECountUsers[Pair.second.ExactNotTaken].insert({L, AllowPredicates}); return BackedgeTakenInfo(std::move(ExitCounts), CouldComputeBECount, MaxBECount, MaxOrZero); } @@ -8916,6 +8831,9 @@ const SCEV *ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) { LS.second = C; break; } + + if (!isa<SCEVConstant>(C)) + ValuesAtScopesUsers[C].push_back({L, V}); return C; } @@ -12387,7 +12305,7 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(const ConstantRange &Range, if (Range.contains(Val->getValue())) return SE.getCouldNotCompute(); // Something strange happened - // Ensure that the previous value is in the range. This is a sanity check. + // Ensure that the previous value is in the range. assert(Range.contains( EvaluateConstantChrecAtConstant(this, ConstantInt::get(SE.getContext(), ExitVal - 1), SE)->getValue()) && @@ -12531,9 +12449,11 @@ ScalarEvolution::ScalarEvolution(ScalarEvolution &&Arg) BackedgeTakenCounts(std::move(Arg.BackedgeTakenCounts)), PredicatedBackedgeTakenCounts( std::move(Arg.PredicatedBackedgeTakenCounts)), + BECountUsers(std::move(Arg.BECountUsers)), ConstantEvolutionLoopExitValue( std::move(Arg.ConstantEvolutionLoopExitValue)), ValuesAtScopes(std::move(Arg.ValuesAtScopes)), + ValuesAtScopesUsers(std::move(Arg.ValuesAtScopesUsers)), LoopDispositions(std::move(Arg.LoopDispositions)), LoopPropertiesCache(std::move(Arg.LoopPropertiesCache)), BlockDispositions(std::move(Arg.BlockDispositions)), @@ -12946,6 +12866,23 @@ bool ScalarEvolution::hasOperand(const SCEV *S, const SCEV *Op) const { return SCEVExprContains(S, [&](const SCEV *Expr) { return Expr == Op; }); } +void ScalarEvolution::forgetBackedgeTakenCounts(const Loop *L, + bool Predicated) { + auto &BECounts = + Predicated ? PredicatedBackedgeTakenCounts : BackedgeTakenCounts; + auto It = BECounts.find(L); + if (It != BECounts.end()) { + for (const ExitNotTakenInfo &ENT : It->second.ExitNotTaken) { + if (!isa<SCEVConstant>(ENT.ExactNotTaken)) { + auto UserIt = BECountUsers.find(ENT.ExactNotTaken); + assert(UserIt != BECountUsers.end()); + UserIt->second.erase({L, Predicated}); + } + } + BECounts.erase(It); + } +} + void ScalarEvolution::forgetMemoizedResults(ArrayRef<const SCEV *> SCEVs) { SmallPtrSet<const SCEV *, 8> ToForget(SCEVs.begin(), SCEVs.end()); SmallVector<const SCEV *, 8> Worklist(ToForget.begin(), ToForget.end()); @@ -12970,32 +12907,52 @@ void ScalarEvolution::forgetMemoizedResults(ArrayRef<const SCEV *> SCEVs) { else ++I; } - - auto RemoveSCEVFromBackedgeMap = [&ToForget]( - DenseMap<const Loop *, BackedgeTakenInfo> &Map) { - for (auto I = Map.begin(), E = Map.end(); I != E;) { - BackedgeTakenInfo &BEInfo = I->second; - if (any_of(ToForget, - [&BEInfo](const SCEV *S) { return BEInfo.hasOperand(S); })) - Map.erase(I++); - else - ++I; - } - }; - - RemoveSCEVFromBackedgeMap(BackedgeTakenCounts); - RemoveSCEVFromBackedgeMap(PredicatedBackedgeTakenCounts); } void ScalarEvolution::forgetMemoizedResultsImpl(const SCEV *S) { - ValuesAtScopes.erase(S); LoopDispositions.erase(S); BlockDispositions.erase(S); UnsignedRanges.erase(S); SignedRanges.erase(S); - ExprValueMap.erase(S); HasRecMap.erase(S); MinTrailingZerosCache.erase(S); + + auto ExprIt = ExprValueMap.find(S); + if (ExprIt != ExprValueMap.end()) { + for (auto &ValueAndOffset : ExprIt->second) { + if (ValueAndOffset.second == nullptr) { + auto ValueIt = ValueExprMap.find_as(ValueAndOffset.first); + if (ValueIt != ValueExprMap.end()) + ValueExprMap.erase(ValueIt); + } + } + ExprValueMap.erase(ExprIt); + } + + auto ScopeIt = ValuesAtScopes.find(S); + if (ScopeIt != ValuesAtScopes.end()) { + for (const auto &Pair : ScopeIt->second) + if (!isa_and_nonnull<SCEVConstant>(Pair.second)) + erase_value(ValuesAtScopesUsers[Pair.second], + std::make_pair(Pair.first, S)); + ValuesAtScopes.erase(ScopeIt); + } + + auto ScopeUserIt = ValuesAtScopesUsers.find(S); + if (ScopeUserIt != ValuesAtScopesUsers.end()) { + for (const auto &Pair : ScopeUserIt->second) + erase_value(ValuesAtScopes[Pair.second], std::make_pair(Pair.first, S)); + ValuesAtScopesUsers.erase(ScopeUserIt); + } + + auto BEUsersIt = BECountUsers.find(S); + if (BEUsersIt != BECountUsers.end()) { + // Work on a copy, as forgetBackedgeTakenCounts() will modify the original. + auto Copy = BEUsersIt->second; + for (const auto &Pair : Copy) + forgetBackedgeTakenCounts(Pair.getPointer(), Pair.getInt()); + BECountUsers.erase(BEUsersIt); + } } void @@ -13100,16 +13057,43 @@ void ScalarEvolution::verify() const { ValidLoops.insert(L); Worklist.append(L->begin(), L->end()); } - // Check for SCEV expressions referencing invalid/deleted loops. for (auto &KV : ValueExprMap) { - auto *AR = dyn_cast<SCEVAddRecExpr>(KV.second); - if (!AR) - continue; - assert(ValidLoops.contains(AR->getLoop()) && - "AddRec references invalid loop"); + // Check for SCEV expressions referencing invalid/deleted loops. + if (auto *AR = dyn_cast<SCEVAddRecExpr>(KV.second)) { + assert(ValidLoops.contains(AR->getLoop()) && + "AddRec references invalid loop"); + } + + // Check that the value is also part of the reverse map. + auto It = ExprValueMap.find(KV.second); + if (It == ExprValueMap.end() || !It->second.contains({KV.first, nullptr})) { + dbgs() << "Value " << *KV.first + << " is in ValueExprMap but not in ExprValueMap\n"; + std::abort(); + } + } + + for (const auto &KV : ExprValueMap) { + for (const auto &ValueAndOffset : KV.second) { + if (ValueAndOffset.second != nullptr) + continue; + + auto It = ValueExprMap.find_as(ValueAndOffset.first); + if (It == ValueExprMap.end()) { + dbgs() << "Value " << *ValueAndOffset.first + << " is in ExprValueMap but not in ValueExprMap\n"; + std::abort(); + } + if (It->second != KV.first) { + dbgs() << "Value " << *ValueAndOffset.first + << " mapped to " << *It->second + << " rather than " << *KV.first << "\n"; + std::abort(); + } + } } - // Verify intergity of SCEV users. + // Verify integrity of SCEV users. for (const auto &S : UniqueSCEVs) { SmallVector<const SCEV *, 4> Ops; collectUniqueOps(&S, Ops); @@ -13125,6 +13109,61 @@ void ScalarEvolution::verify() const { std::abort(); } } + + // Verify integrity of ValuesAtScopes users. + for (const auto &ValueAndVec : ValuesAtScopes) { + const SCEV *Value = ValueAndVec.first; + for (const auto &LoopAndValueAtScope : ValueAndVec.second) { + const Loop *L = LoopAndValueAtScope.first; + const SCEV *ValueAtScope = LoopAndValueAtScope.second; + if (!isa<SCEVConstant>(ValueAtScope)) { + auto It = ValuesAtScopesUsers.find(ValueAtScope); + if (It != ValuesAtScopesUsers.end() && + is_contained(It->second, std::make_pair(L, Value))) + continue; + dbgs() << "Value: " << *Value << ", Loop: " << *L << ", ValueAtScope: " + << ValueAtScope << " missing in ValuesAtScopesUsers\n"; + std::abort(); + } + } + } + + for (const auto &ValueAtScopeAndVec : ValuesAtScopesUsers) { + const SCEV *ValueAtScope = ValueAtScopeAndVec.first; + for (const auto &LoopAndValue : ValueAtScopeAndVec.second) { + const Loop *L = LoopAndValue.first; + const SCEV *Value = LoopAndValue.second; + assert(!isa<SCEVConstant>(Value)); + auto It = ValuesAtScopes.find(Value); + if (It != ValuesAtScopes.end() && + is_contained(It->second, std::make_pair(L, ValueAtScope))) + continue; + dbgs() << "Value: " << *Value << ", Loop: " << *L << ", ValueAtScope: " + << ValueAtScope << " missing in ValuesAtScopes\n"; + std::abort(); + } + } + + // Verify integrity of BECountUsers. + auto VerifyBECountUsers = [&](bool Predicated) { + auto &BECounts = + Predicated ? PredicatedBackedgeTakenCounts : BackedgeTakenCounts; + for (const auto &LoopAndBEInfo : BECounts) { + for (const ExitNotTakenInfo &ENT : LoopAndBEInfo.second.ExitNotTaken) { + if (!isa<SCEVConstant>(ENT.ExactNotTaken)) { + auto UserIt = BECountUsers.find(ENT.ExactNotTaken); + if (UserIt != BECountUsers.end() && + UserIt->second.contains({ LoopAndBEInfo.first, Predicated })) + continue; + dbgs() << "Value " << *ENT.ExactNotTaken << " for loop " + << *LoopAndBEInfo.first << " missing from BECountUsers\n"; + std::abort(); + } + } + } + }; + VerifyBECountUsers(/* Predicated */ false); + VerifyBECountUsers(/* Predicated */ true); } bool ScalarEvolution::invalidate( diff --git a/llvm/lib/Analysis/StackSafetyAnalysis.cpp b/llvm/lib/Analysis/StackSafetyAnalysis.cpp index 74cc39b7f2c0..54f3605ee033 100644 --- a/llvm/lib/Analysis/StackSafetyAnalysis.cpp +++ b/llvm/lib/Analysis/StackSafetyAnalysis.cpp @@ -14,12 +14,14 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ModuleSummaryAnalysis.h" +#include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/StackLifetime.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/ModuleSummaryIndex.h" @@ -117,7 +119,7 @@ template <typename CalleeTy> struct UseInfo { // Access range if the address (alloca or parameters). // It is allowed to be empty-set when there are no known accesses. ConstantRange Range; - std::map<const Instruction *, ConstantRange> Accesses; + std::set<const Instruction *> UnsafeAccesses; // List of calls which pass address as an argument. // Value is offset range of address from base address (alloca or calling @@ -131,10 +133,9 @@ template <typename CalleeTy> struct UseInfo { UseInfo(unsigned PointerSize) : Range{PointerSize, false} {} void updateRange(const ConstantRange &R) { Range = unionNoWrap(Range, R); } - void addRange(const Instruction *I, const ConstantRange &R) { - auto Ins = Accesses.emplace(I, R); - if (!Ins.second) - Ins.first->second = unionNoWrap(Ins.first->second, R); + void addRange(const Instruction *I, const ConstantRange &R, bool IsSafe) { + if (!IsSafe) + UnsafeAccesses.insert(I); updateRange(R); } }; @@ -230,7 +231,7 @@ struct StackSafetyInfo::InfoTy { struct StackSafetyGlobalInfo::InfoTy { GVToSSI Info; SmallPtrSet<const AllocaInst *, 8> SafeAllocas; - std::map<const Instruction *, bool> AccessIsUnsafe; + std::set<const Instruction *> UnsafeAccesses; }; namespace { @@ -253,6 +254,11 @@ class StackSafetyLocalAnalysis { void analyzeAllUses(Value *Ptr, UseInfo<GlobalValue> &AS, const StackLifetime &SL); + + bool isSafeAccess(const Use &U, AllocaInst *AI, const SCEV *AccessSize); + bool isSafeAccess(const Use &U, AllocaInst *AI, Value *V); + bool isSafeAccess(const Use &U, AllocaInst *AI, TypeSize AccessSize); + public: StackSafetyLocalAnalysis(Function &F, ScalarEvolution &SE) : F(F), DL(F.getParent()->getDataLayout()), SE(SE), @@ -333,6 +339,56 @@ ConstantRange StackSafetyLocalAnalysis::getMemIntrinsicAccessRange( return getAccessRange(U, Base, SizeRange); } +bool StackSafetyLocalAnalysis::isSafeAccess(const Use &U, AllocaInst *AI, + Value *V) { + return isSafeAccess(U, AI, SE.getSCEV(V)); +} + +bool StackSafetyLocalAnalysis::isSafeAccess(const Use &U, AllocaInst *AI, + TypeSize TS) { + if (TS.isScalable()) + return false; + auto *CalculationTy = IntegerType::getIntNTy(SE.getContext(), PointerSize); + const SCEV *SV = SE.getConstant(CalculationTy, TS.getFixedSize()); + return isSafeAccess(U, AI, SV); +} + +bool StackSafetyLocalAnalysis::isSafeAccess(const Use &U, AllocaInst *AI, + const SCEV *AccessSize) { + + if (!AI) + return true; + if (isa<SCEVCouldNotCompute>(AccessSize)) + return false; + + const auto *I = cast<Instruction>(U.getUser()); + + auto ToCharPtr = [&](const SCEV *V) { + auto *PtrTy = IntegerType::getInt8PtrTy(SE.getContext()); + return SE.getTruncateOrZeroExtend(V, PtrTy); + }; + + const SCEV *AddrExp = ToCharPtr(SE.getSCEV(U.get())); + const SCEV *BaseExp = ToCharPtr(SE.getSCEV(AI)); + const SCEV *Diff = SE.getMinusSCEV(AddrExp, BaseExp); + if (isa<SCEVCouldNotCompute>(Diff)) + return false; + + auto Size = getStaticAllocaSizeRange(*AI); + + auto *CalculationTy = IntegerType::getIntNTy(SE.getContext(), PointerSize); + auto ToDiffTy = [&](const SCEV *V) { + return SE.getTruncateOrZeroExtend(V, CalculationTy); + }; + const SCEV *Min = ToDiffTy(SE.getConstant(Size.getLower())); + const SCEV *Max = SE.getMinusSCEV(ToDiffTy(SE.getConstant(Size.getUpper())), + ToDiffTy(AccessSize)); + return SE.evaluatePredicateAt(ICmpInst::Predicate::ICMP_SGE, Diff, Min, I) + .getValueOr(false) && + SE.evaluatePredicateAt(ICmpInst::Predicate::ICMP_SLE, Diff, Max, I) + .getValueOr(false); +} + /// The function analyzes all local uses of Ptr (alloca or argument) and /// calculates local access range and all function calls where it was used. void StackSafetyLocalAnalysis::analyzeAllUses(Value *Ptr, @@ -341,7 +397,7 @@ void StackSafetyLocalAnalysis::analyzeAllUses(Value *Ptr, SmallPtrSet<const Value *, 16> Visited; SmallVector<const Value *, 8> WorkList; WorkList.push_back(Ptr); - const AllocaInst *AI = dyn_cast<AllocaInst>(Ptr); + AllocaInst *AI = dyn_cast<AllocaInst>(Ptr); // A DFS search through all uses of the alloca in bitcasts/PHI/GEPs/etc. while (!WorkList.empty()) { @@ -356,11 +412,13 @@ void StackSafetyLocalAnalysis::analyzeAllUses(Value *Ptr, switch (I->getOpcode()) { case Instruction::Load: { if (AI && !SL.isAliveAfter(AI, I)) { - US.addRange(I, UnknownRange); + US.addRange(I, UnknownRange, /*IsSafe=*/false); break; } - US.addRange(I, - getAccessRange(UI, Ptr, DL.getTypeStoreSize(I->getType()))); + auto TypeSize = DL.getTypeStoreSize(I->getType()); + auto AccessRange = getAccessRange(UI, Ptr, TypeSize); + bool Safe = isSafeAccess(UI, AI, TypeSize); + US.addRange(I, AccessRange, Safe); break; } @@ -370,16 +428,17 @@ void StackSafetyLocalAnalysis::analyzeAllUses(Value *Ptr, case Instruction::Store: { if (V == I->getOperand(0)) { // Stored the pointer - conservatively assume it may be unsafe. - US.addRange(I, UnknownRange); + US.addRange(I, UnknownRange, /*IsSafe=*/false); break; } if (AI && !SL.isAliveAfter(AI, I)) { - US.addRange(I, UnknownRange); + US.addRange(I, UnknownRange, /*IsSafe=*/false); break; } - US.addRange( - I, getAccessRange( - UI, Ptr, DL.getTypeStoreSize(I->getOperand(0)->getType()))); + auto TypeSize = DL.getTypeStoreSize(I->getOperand(0)->getType()); + auto AccessRange = getAccessRange(UI, Ptr, TypeSize); + bool Safe = isSafeAccess(UI, AI, TypeSize); + US.addRange(I, AccessRange, Safe); break; } @@ -387,7 +446,7 @@ void StackSafetyLocalAnalysis::analyzeAllUses(Value *Ptr, // Information leak. // FIXME: Process parameters correctly. This is a leak only if we return // alloca. - US.addRange(I, UnknownRange); + US.addRange(I, UnknownRange, /*IsSafe=*/false); break; case Instruction::Call: @@ -396,12 +455,20 @@ void StackSafetyLocalAnalysis::analyzeAllUses(Value *Ptr, break; if (AI && !SL.isAliveAfter(AI, I)) { - US.addRange(I, UnknownRange); + US.addRange(I, UnknownRange, /*IsSafe=*/false); break; } - if (const MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) { - US.addRange(I, getMemIntrinsicAccessRange(MI, UI, Ptr)); + auto AccessRange = getMemIntrinsicAccessRange(MI, UI, Ptr); + bool Safe = false; + if (const auto *MTI = dyn_cast<MemTransferInst>(MI)) { + if (MTI->getRawSource() != UI && MTI->getRawDest() != UI) + Safe = true; + } else if (MI->getRawDest() != UI) { + Safe = true; + } + Safe = Safe || isSafeAccess(UI, AI, MI->getLength()); + US.addRange(I, AccessRange, Safe); break; } @@ -412,15 +479,16 @@ void StackSafetyLocalAnalysis::analyzeAllUses(Value *Ptr, } if (!CB.isArgOperand(&UI)) { - US.addRange(I, UnknownRange); + US.addRange(I, UnknownRange, /*IsSafe=*/false); break; } unsigned ArgNo = CB.getArgOperandNo(&UI); if (CB.isByValArgument(ArgNo)) { - US.addRange(I, getAccessRange( - UI, Ptr, - DL.getTypeStoreSize(CB.getParamByValType(ArgNo)))); + auto TypeSize = DL.getTypeStoreSize(CB.getParamByValType(ArgNo)); + auto AccessRange = getAccessRange(UI, Ptr, TypeSize); + bool Safe = isSafeAccess(UI, AI, TypeSize); + US.addRange(I, AccessRange, Safe); break; } @@ -430,7 +498,7 @@ void StackSafetyLocalAnalysis::analyzeAllUses(Value *Ptr, const GlobalValue *Callee = dyn_cast<GlobalValue>(CB.getCalledOperand()->stripPointerCasts()); if (!Callee) { - US.addRange(I, UnknownRange); + US.addRange(I, UnknownRange, /*IsSafe=*/false); break; } @@ -827,8 +895,8 @@ const StackSafetyGlobalInfo::InfoTy &StackSafetyGlobalInfo::getInfo() const { Info->SafeAllocas.insert(AI); ++NumAllocaStackSafe; } - for (const auto &A : KV.second.Accesses) - Info->AccessIsUnsafe[A.first] |= !AIRange.contains(A.second); + Info->UnsafeAccesses.insert(KV.second.UnsafeAccesses.begin(), + KV.second.UnsafeAccesses.end()); } } @@ -903,11 +971,7 @@ bool StackSafetyGlobalInfo::isSafe(const AllocaInst &AI) const { bool StackSafetyGlobalInfo::stackAccessIsSafe(const Instruction &I) const { const auto &Info = getInfo(); - auto It = Info.AccessIsUnsafe.find(&I); - if (It == Info.AccessIsUnsafe.end()) { - return true; - } - return !It->second; + return Info.UnsafeAccesses.find(&I) == Info.UnsafeAccesses.end(); } void StackSafetyGlobalInfo::print(raw_ostream &O) const { diff --git a/llvm/lib/Analysis/SyncDependenceAnalysis.cpp b/llvm/lib/Analysis/SyncDependenceAnalysis.cpp index 59582cd3a198..ff833b55bbce 100644 --- a/llvm/lib/Analysis/SyncDependenceAnalysis.cpp +++ b/llvm/lib/Analysis/SyncDependenceAnalysis.cpp @@ -15,21 +15,18 @@ // The SyncDependenceAnalysis is used in the DivergenceAnalysis to model // control-induced divergence in phi nodes. // -// -- Summary -- -// The SyncDependenceAnalysis lazily computes sync dependences [3]. -// The analysis evaluates the disjoint path criterion [2] by a reduction -// to SSA construction. The SSA construction algorithm is implemented as -// a simple data-flow analysis [1]. // -// [1] "A Simple, Fast Dominance Algorithm", SPI '01, Cooper, Harvey and Kennedy -// [2] "Efficiently Computing Static Single Assignment Form -// and the Control Dependence Graph", TOPLAS '91, -// Cytron, Ferrante, Rosen, Wegman and Zadeck -// [3] "Improving Performance of OpenCL on CPUs", CC '12, Karrenberg and Hack -// [4] "Divergence Analysis", TOPLAS '13, Sampaio, Souza, Collange and Pereira +// -- Reference -- +// The algorithm is presented in Section 5 of +// +// An abstract interpretation for SPMD divergence +// on reducible control flow graphs. +// Julian Rosemann, Simon Moll and Sebastian Hack +// POPL '21 +// // // -- Sync dependence -- -// Sync dependence [4] characterizes the control flow aspect of the +// Sync dependence characterizes the control flow aspect of the // propagation of branch divergence. For example, // // %cond = icmp slt i32 %tid, 10 @@ -46,9 +43,10 @@ // because the branch "br i1 %cond" depends on %tid and affects which value %a // is assigned to. // +// // -- Reduction to SSA construction -- // There are two disjoint paths from A to X, if a certain variant of SSA -// construction places a phi node in X under the following set-up scheme [2]. +// construction places a phi node in X under the following set-up scheme. // // This variant of SSA construction ignores incoming undef values. // That is paths from the entry without a definition do not result in @@ -63,6 +61,7 @@ // D E // \ / // F +// // Assume that A contains a divergent branch. We are interested // in the set of all blocks where each block is reachable from A // via two disjoint paths. This would be the set {D, F} in this @@ -70,6 +69,7 @@ // To generally reduce this query to SSA construction we introduce // a virtual variable x and assign to x different values in each // successor block of A. +// // entry // / \ // A \ @@ -79,23 +79,41 @@ // D E // \ / // F +// // Our flavor of SSA construction for x will construct the following +// // entry // / \ // A \ // / \ Y // x0 = 0 x1 = 1 / // \ / \ / -// x2=phi E +// x2 = phi E // \ / -// x3=phi +// x3 = phi +// // The blocks D and F contain phi nodes and are thus each reachable // by two disjoins paths from A. // // -- Remarks -- -// In case of loop exits we need to check the disjoint path criterion for loops -// [2]. To this end, we check whether the definition of x differs between the -// loop exit and the loop header (_after_ SSA construction). +// * In case of loop exits we need to check the disjoint path criterion for loops. +// To this end, we check whether the definition of x differs between the +// loop exit and the loop header (_after_ SSA construction). +// +// -- Known Limitations & Future Work -- +// * The algorithm requires reducible loops because the implementation +// implicitly performs a single iteration of the underlying data flow analysis. +// This was done for pragmatism, simplicity and speed. +// +// Relevant related work for extending the algorithm to irreducible control: +// A simple algorithm for global data flow analysis problems. +// Matthew S. Hecht and Jeffrey D. Ullman. +// SIAM Journal on Computing, 4(4):519–532, December 1975. +// +// * Another reason for requiring reducible loops is that points of +// synchronization in irreducible loops aren't 'obvious' - there is no unique +// header where threads 'should' synchronize when entering or coming back +// around from the latch. // //===----------------------------------------------------------------------===// #include "llvm/Analysis/SyncDependenceAnalysis.h" @@ -128,8 +146,9 @@ using namespace llvm; // // We cannot use the vanilla (R)PO computation of LLVM because: // * We (virtually) modify the CFG. -// * We want a loop-compact block enumeration, that is the numbers assigned by -// the traveral to the blocks of a loop are an interval. +// * We want a loop-compact block enumeration, that is the numbers assigned to +// blocks of a loop form an interval +// using POCB = std::function<void(const BasicBlock &)>; using VisitedSet = std::set<const BasicBlock *>; using BlockStack = std::vector<const BasicBlock *>; diff --git a/llvm/lib/Analysis/TargetLibraryInfo.cpp b/llvm/lib/Analysis/TargetLibraryInfo.cpp index 7326ba74c071..72fbd5ad3f68 100644 --- a/llvm/lib/Analysis/TargetLibraryInfo.cpp +++ b/llvm/lib/Analysis/TargetLibraryInfo.cpp @@ -166,8 +166,8 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, return; } - // memset_pattern16 is only available on iOS 3.0 and Mac OS X 10.5 and later. - // All versions of watchOS support it. + // memset_pattern{4,8,16} is only available on iOS 3.0 and Mac OS X 10.5 and + // later. All versions of watchOS support it. if (T.isMacOSX()) { // available IO unlocked variants on Mac OS X TLI.setAvailable(LibFunc_getc_unlocked); @@ -175,12 +175,20 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, TLI.setAvailable(LibFunc_putc_unlocked); TLI.setAvailable(LibFunc_putchar_unlocked); - if (T.isMacOSXVersionLT(10, 5)) + if (T.isMacOSXVersionLT(10, 5)) { + TLI.setUnavailable(LibFunc_memset_pattern4); + TLI.setUnavailable(LibFunc_memset_pattern8); TLI.setUnavailable(LibFunc_memset_pattern16); + } } else if (T.isiOS()) { - if (T.isOSVersionLT(3, 0)) + if (T.isOSVersionLT(3, 0)) { + TLI.setUnavailable(LibFunc_memset_pattern4); + TLI.setUnavailable(LibFunc_memset_pattern8); TLI.setUnavailable(LibFunc_memset_pattern16); + } } else if (!T.isWatchOS()) { + TLI.setUnavailable(LibFunc_memset_pattern4); + TLI.setUnavailable(LibFunc_memset_pattern8); TLI.setUnavailable(LibFunc_memset_pattern16); } @@ -684,7 +692,6 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, TLI.setUnavailable(LibFunc_strcat_chk); TLI.setUnavailable(LibFunc_strcpy_chk); TLI.setUnavailable(LibFunc_strlcat_chk); - TLI.setUnavailable(LibFunc_strlcat_chk); TLI.setUnavailable(LibFunc_strlcpy_chk); TLI.setUnavailable(LibFunc_strlen_chk); TLI.setUnavailable(LibFunc_strncat_chk); @@ -1523,6 +1530,8 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy, FTy.getParamType(2)->isPointerTy() && FTy.getParamType(3)->isIntegerTy()); + case LibFunc_memset_pattern4: + case LibFunc_memset_pattern8: case LibFunc_memset_pattern16: return (!FTy.isVarArg() && NumParams == 3 && FTy.getParamType(0)->isPointerTy() && diff --git a/llvm/lib/Analysis/VFABIDemangling.cpp b/llvm/lib/Analysis/VFABIDemangling.cpp index 8a34a34eb307..7573975a3dd3 100644 --- a/llvm/lib/Analysis/VFABIDemangling.cpp +++ b/llvm/lib/Analysis/VFABIDemangling.cpp @@ -445,7 +445,6 @@ Optional<VFInfo> VFABI::tryDemangleForVFABI(StringRef MangledName, VF = EC.getKnownMinValue(); } - // Sanity checks. // 1. We don't accept a zero lanes vectorization factor. // 2. We don't accept the demangling if the vector function is not // present in the module. diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp index 5bce1eaa59a0..5feabd876e3a 100644 --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -124,8 +124,8 @@ void LLParser::restoreParsingState(const SlotMapping *Slots) { std::make_pair(I.first, std::make_pair(I.second, LocTy()))); } -/// validateEndOfModule - Do final validity and sanity checks at the end of the -/// module. +/// validateEndOfModule - Do final validity and basic correctness checks at the +/// end of the module. bool LLParser::validateEndOfModule(bool UpgradeDebugInfo) { if (!M) return false; @@ -271,7 +271,7 @@ bool LLParser::validateEndOfModule(bool UpgradeDebugInfo) { return false; } -/// Do final validity and sanity checks at the end of the index. +/// Do final validity and basic correctness checks at the end of the index. bool LLParser::validateEndOfIndex() { if (!Index) return false; @@ -2989,9 +2989,10 @@ BasicBlock *LLParser::PerFunctionState::defineBB(const std::string &Name, /// parseValID - parse an abstract value that doesn't necessarily have a /// type implied. For example, if we parse "4" we don't know what integer type /// it has. The value will later be combined with its type and checked for -/// sanity. PFS is used to convert function-local operands of metadata (since -/// metadata operands are not just parsed here but also converted to values). -/// PFS can be null when we are not parsing metadata values inside a function. +/// basic correctness. PFS is used to convert function-local operands of +/// metadata (since metadata operands are not just parsed here but also +/// converted to values). PFS can be null when we are not parsing metadata +/// values inside a function. bool LLParser::parseValID(ValID &ID, PerFunctionState *PFS, Type *ExpectedTy) { ID.Loc = Lex.getLoc(); switch (Lex.getKind()) { diff --git a/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp b/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp index 2723105b092f..d7bcb0d7f575 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp @@ -957,8 +957,8 @@ Error BitcodeAnalyzer::parseBlock(unsigned BlockID, unsigned IndentLevel, O->OS.write_escaped(Blob, /*hex=*/true) << "'"; } else { bool BlobIsPrintable = true; - for (unsigned i = 0, e = Blob.size(); i != e; ++i) - if (!isPrint(static_cast<unsigned char>(Blob[i]))) { + for (char C : Blob) + if (!isPrint(static_cast<unsigned char>(C))) { BlobIsPrintable = false; break; } diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index c568461e62b0..993cb1de8c02 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -3996,8 +3996,8 @@ Error BitcodeReader::parseFunctionBody(Function *F) { // See if anything took the address of blocks in this function. auto BBFRI = BasicBlockFwdRefs.find(F); if (BBFRI == BasicBlockFwdRefs.end()) { - for (unsigned i = 0, e = FunctionBBs.size(); i != e; ++i) - FunctionBBs[i] = BasicBlock::Create(Context, "", F); + for (BasicBlock *&BB : FunctionBBs) + BB = BasicBlock::Create(Context, "", F); } else { auto &BBRefs = BBFRI->second; // Check for invalid basic block references. @@ -4605,9 +4605,8 @@ Error BitcodeReader::parseFunctionBody(Function *F) { CaseVals.push_back(ConstantInt::get(Context, Low)); } BasicBlock *DestBB = getBasicBlock(Record[CurIdx++]); - for (SmallVector<ConstantInt*, 1>::iterator cvi = CaseVals.begin(), - cve = CaseVals.end(); cvi != cve; ++cvi) - SI->addCase(*cvi, DestBB); + for (ConstantInt *Cst : CaseVals) + SI->addCase(Cst, DestBB); } I = SI; break; diff --git a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp index 6df5a4a64d51..60530d7f7a00 100644 --- a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp +++ b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp @@ -444,7 +444,8 @@ class MetadataLoader::MetadataLoaderImpl { uint64_t GlobalDeclAttachmentPos = 0; #ifndef NDEBUG - /// Sanity check that we end up parsing all of the global decl attachments. + /// Baisic correctness check that we end up parsing all of the global decl + /// attachments. unsigned NumGlobalDeclAttachSkipped = 0; unsigned NumGlobalDeclAttachParsed = 0; #endif @@ -917,7 +918,7 @@ Expected<bool> MetadataLoader::MetadataLoaderImpl::loadGlobalDeclAttachments() { case BitstreamEntry::Error: return error("Malformed block"); case BitstreamEntry::EndBlock: - // Sanity check that we parsed them all. + // Check that we parsed them all. assert(NumGlobalDeclAttachSkipped == NumGlobalDeclAttachParsed); return true; case BitstreamEntry::Record: @@ -929,7 +930,7 @@ Expected<bool> MetadataLoader::MetadataLoaderImpl::loadGlobalDeclAttachments() { return MaybeCode.takeError(); if (MaybeCode.get() != bitc::METADATA_GLOBAL_DECL_ATTACHMENT) { // Anything other than a global decl attachment signals the end of - // these records. sanity check that we parsed them all. + // these records. Check that we parsed them all. assert(NumGlobalDeclAttachSkipped == NumGlobalDeclAttachParsed); return true; } diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index 1e9a9197aed7..e2354c40844a 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -596,10 +596,10 @@ static void writeStringRecord(BitstreamWriter &Stream, unsigned Code, SmallVector<unsigned, 64> Vals; // Code: [strchar x N] - for (unsigned i = 0, e = Str.size(); i != e; ++i) { - if (AbbrevToUse && !BitCodeAbbrevOp::isChar6(Str[i])) + for (char C : Str) { + if (AbbrevToUse && !BitCodeAbbrevOp::isChar6(C)) AbbrevToUse = 0; - Vals.push_back(Str[i]); + Vals.push_back(C); } // Emit the finished record. @@ -914,8 +914,7 @@ void ModuleBitcodeWriter::writeTypeTable() { TypeVals.clear(); // Loop over all of the types, emitting each in turn. - for (unsigned i = 0, e = TypeList.size(); i != e; ++i) { - Type *T = TypeList[i]; + for (Type *T : TypeList) { int AbbrevToUse = 0; unsigned Code = 0; @@ -3343,19 +3342,18 @@ void ModuleBitcodeWriter::writeFunction( DILocation *LastDL = nullptr; // Finally, emit all the instructions, in order. - for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) - for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); - I != E; ++I) { - writeInstruction(*I, InstID, Vals); + for (const BasicBlock &BB : F) + for (const Instruction &I : BB) { + writeInstruction(I, InstID, Vals); - if (!I->getType()->isVoidTy()) + if (!I.getType()->isVoidTy()) ++InstID; // If the instruction has metadata, write a metadata attachment later. - NeedsMetadataAttachment |= I->hasMetadataOtherThanDebugLoc(); + NeedsMetadataAttachment |= I.hasMetadataOtherThanDebugLoc(); // If the instruction has a debug location, emit it. - DILocation *DL = I->getDebugLoc(); + DILocation *DL = I.getDebugLoc(); if (!DL) continue; @@ -4429,9 +4427,9 @@ void ModuleBitcodeWriter::write() { // Emit function bodies. DenseMap<const Function *, uint64_t> FunctionToBitcodeIndex; - for (Module::const_iterator F = M.begin(), E = M.end(); F != E; ++F) - if (!F->isDeclaration()) - writeFunction(*F, FunctionToBitcodeIndex); + for (const Function &F : M) + if (!F.isDeclaration()) + writeFunction(F, FunctionToBitcodeIndex); // Need to write after the above call to WriteFunction which populates // the summary information in the index. diff --git a/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp b/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp index 9465a3b11c8f..07e0708e68c3 100644 --- a/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp +++ b/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp @@ -1148,8 +1148,8 @@ void ValueEnumerator::purgeFunction() { ValueMap.erase(Values[i].first); for (unsigned i = NumModuleMDs, e = MDs.size(); i != e; ++i) MetadataMap.erase(MDs[i]); - for (unsigned i = 0, e = BasicBlocks.size(); i != e; ++i) - ValueMap.erase(BasicBlocks[i]); + for (const BasicBlock *BB : BasicBlocks) + ValueMap.erase(BB); Values.resize(NumModuleValues); MDs.resize(NumModuleMDs); diff --git a/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp index 87a3cede601b..5984063627b0 100644 --- a/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -354,8 +354,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction( // dead, or because only a subregister is live at the def. If we // don't do this the dead def will be incorrectly merged into the // previous def. - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI.getOperand(i); + for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg() || !MO.isDef()) continue; Register Reg = MO.getReg(); if (Reg == 0) continue; @@ -407,8 +406,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction( // Scan the register defs for this instruction and update // live-ranges. - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI.getOperand(i); + for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg() || !MO.isDef()) continue; Register Reg = MO.getReg(); if (Reg == 0) continue; @@ -495,8 +493,7 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr &MI, LLVM_DEBUG(dbgs() << "\tKill Group:"); unsigned FirstReg = 0; - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI.getOperand(i); + for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg()) continue; Register Reg = MO.getReg(); if (Reg == 0) continue; @@ -762,11 +759,8 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( // ...need a map from MI to SUnit. std::map<MachineInstr *, const SUnit *> MISUnitMap; - for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { - const SUnit *SU = &SUnits[i]; - MISUnitMap.insert(std::pair<MachineInstr *, const SUnit *>(SU->getInstr(), - SU)); - } + for (const SUnit &SU : SUnits) + MISUnitMap.insert(std::make_pair(SU.getInstr(), &SU)); // Track progress along the critical path through the SUnit graph as // we walk the instructions. This is needed for regclasses that only @@ -774,12 +768,11 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( const SUnit *CriticalPathSU = nullptr; MachineInstr *CriticalPathMI = nullptr; if (CriticalPathSet.any()) { - for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { - const SUnit *SU = &SUnits[i]; + for (const SUnit &SU : SUnits) { if (!CriticalPathSU || - ((SU->getDepth() + SU->Latency) > + ((SU.getDepth() + SU.Latency) > (CriticalPathSU->getDepth() + CriticalPathSU->Latency))) { - CriticalPathSU = SU; + CriticalPathSU = &SU; } } assert(CriticalPathSU && "Failed to find SUnit critical path"); @@ -839,8 +832,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( // but don't cause any anti-dependence breaking themselves) if (!MI.isKill()) { // Attempt to break each anti-dependency... - for (unsigned i = 0, e = Edges.size(); i != e; ++i) { - const SDep *Edge = Edges[i]; + for (const SDep *Edge : Edges) { SUnit *NextSU = Edge->getSUnit(); if ((Edge->getKind() != SDep::Anti) && diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index cc848d28a9a7..828cb760b82e 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -809,9 +809,9 @@ void AsmPrinter::emitFunctionHeader() { // so that we don't get references to undefined symbols. std::vector<MCSymbol*> DeadBlockSyms; MMI->takeDeletedSymbolsForFunction(&F, DeadBlockSyms); - for (unsigned i = 0, e = DeadBlockSyms.size(); i != e; ++i) { + for (MCSymbol *DeadBlockSym : DeadBlockSyms) { OutStreamer->AddComment("Address taken block that was later removed"); - OutStreamer->emitLabel(DeadBlockSyms[i]); + OutStreamer->emitLabel(DeadBlockSym); } if (CurrentFnBegin) { @@ -910,8 +910,7 @@ static void emitKill(const MachineInstr *MI, AsmPrinter &AP) { std::string Str; raw_string_ostream OS(Str); OS << "kill:"; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &Op = MI->getOperand(i); + for (const MachineOperand &Op : MI->operands()) { assert(Op.isReg() && "KILL instruction must have only register operands"); OS << ' ' << (Op.isDef() ? "def " : "killed ") << printReg(Op.getReg(), AP.MF->getSubtarget().getRegisterInfo()); @@ -2150,8 +2149,7 @@ void AsmPrinter::emitJumpTableInfo() { SmallPtrSet<const MachineBasicBlock*, 16> EmittedSets; const TargetLowering *TLI = MF->getSubtarget().getTargetLowering(); const MCExpr *Base = TLI->getPICJumpTableRelocBaseExpr(MF,JTI,OutContext); - for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii) { - const MachineBasicBlock *MBB = JTBBs[ii]; + for (const MachineBasicBlock *MBB : JTBBs) { if (!EmittedSets.insert(MBB).second) continue; @@ -2177,8 +2175,8 @@ void AsmPrinter::emitJumpTableInfo() { MCSymbol* JTISymbol = GetJTISymbol(JTI); OutStreamer->emitLabel(JTISymbol); - for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii) - emitJumpTableEntry(MJTI, JTBBs[ii], JTI); + for (const MachineBasicBlock *MBB : JTBBs) + emitJumpTableEntry(MJTI, MBB, JTI); } if (!JTInDiffSection) OutStreamer->emitDataRegion(MCDR_DataRegionEnd); diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index ef1abc47701a..5d0cadefdbf7 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -128,191 +128,29 @@ void AsmPrinter::emitInlineAsm(StringRef Str, const MCSubtargetInfo &STI, emitInlineAsmEnd(STI, &TAP->getSTI()); } -static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI, - MachineModuleInfo *MMI, const MCAsmInfo *MAI, - AsmPrinter *AP, uint64_t LocCookie, - raw_ostream &OS) { - // Switch to the inline assembly variant. - OS << "\t.intel_syntax\n\t"; - - int CurVariant = -1; // The number of the {.|.|.} region we are in. - const char *LastEmitted = AsmStr; // One past the last character emitted. - unsigned NumOperands = MI->getNumOperands(); - int AsmPrinterVariant = 1; // X86MCAsmInfo.cpp's AsmWriterFlavorTy::Intel. - - while (*LastEmitted) { - switch (*LastEmitted) { - default: { - // Not a special case, emit the string section literally. - const char *LiteralEnd = LastEmitted+1; - while (*LiteralEnd && *LiteralEnd != '{' && *LiteralEnd != '|' && - *LiteralEnd != '}' && *LiteralEnd != '$' && *LiteralEnd != '\n') - ++LiteralEnd; - if (CurVariant == -1 || CurVariant == AsmPrinterVariant) - OS.write(LastEmitted, LiteralEnd - LastEmitted); - LastEmitted = LiteralEnd; - break; - } - case '\n': - ++LastEmitted; // Consume newline character. - OS << '\n'; // Indent code with newline. - break; - case '$': { - ++LastEmitted; // Consume '$' character. - bool Done = true; - - // Handle escapes. - switch (*LastEmitted) { - default: Done = false; break; - case '$': - ++LastEmitted; // Consume second '$' character. - break; - case '(': // $( -> same as GCC's { character. - ++LastEmitted; // Consume '(' character. - if (CurVariant != -1) - report_fatal_error("Nested variants found in inline asm string: '" + - Twine(AsmStr) + "'"); - CurVariant = 0; // We're in the first variant now. - break; - case '|': - ++LastEmitted; // Consume '|' character. - if (CurVariant == -1) - OS << '|'; // This is gcc's behavior for | outside a variant. - else - ++CurVariant; // We're in the next variant. - break; - case ')': // $) -> same as GCC's } char. - ++LastEmitted; // Consume ')' character. - if (CurVariant == -1) - OS << '}'; // This is gcc's behavior for } outside a variant. - else - CurVariant = -1; - break; - } - if (Done) break; - - bool HasCurlyBraces = false; - if (*LastEmitted == '{') { // ${variable} - ++LastEmitted; // Consume '{' character. - HasCurlyBraces = true; - } - - // If we have ${:foo}, then this is not a real operand reference, it is a - // "magic" string reference, just like in .td files. Arrange to call - // PrintSpecial. - if (HasCurlyBraces && *LastEmitted == ':') { - ++LastEmitted; - const char *StrStart = LastEmitted; - const char *StrEnd = strchr(StrStart, '}'); - if (!StrEnd) - report_fatal_error("Unterminated ${:foo} operand in inline asm" - " string: '" + Twine(AsmStr) + "'"); - if (CurVariant == -1 || CurVariant == AsmPrinterVariant) - AP->PrintSpecial(MI, OS, StringRef(StrStart, StrEnd - StrStart)); - LastEmitted = StrEnd+1; - break; - } - - const char *IDStart = LastEmitted; - const char *IDEnd = IDStart; - while (isDigit(*IDEnd)) - ++IDEnd; - - unsigned Val; - if (StringRef(IDStart, IDEnd-IDStart).getAsInteger(10, Val)) - report_fatal_error("Bad $ operand number in inline asm string: '" + - Twine(AsmStr) + "'"); - LastEmitted = IDEnd; - - if (Val >= NumOperands - 1) - report_fatal_error("Invalid $ operand number in inline asm string: '" + - Twine(AsmStr) + "'"); - - char Modifier[2] = { 0, 0 }; - - if (HasCurlyBraces) { - // If we have curly braces, check for a modifier character. This - // supports syntax like ${0:u}, which correspond to "%u0" in GCC asm. - if (*LastEmitted == ':') { - ++LastEmitted; // Consume ':' character. - if (*LastEmitted == 0) - report_fatal_error("Bad ${:} expression in inline asm string: '" + - Twine(AsmStr) + "'"); - - Modifier[0] = *LastEmitted; - ++LastEmitted; // Consume modifier character. - } - - if (*LastEmitted != '}') - report_fatal_error("Bad ${} expression in inline asm string: '" + - Twine(AsmStr) + "'"); - ++LastEmitted; // Consume '}' character. - } - - // Okay, we finally have a value number. Ask the target to print this - // operand! - if (CurVariant == -1 || CurVariant == AsmPrinterVariant) { - unsigned OpNo = InlineAsm::MIOp_FirstOperand; - - bool Error = false; - - // Scan to find the machine operand number for the operand. - for (; Val; --Val) { - if (OpNo >= MI->getNumOperands()) - break; - unsigned OpFlags = MI->getOperand(OpNo).getImm(); - OpNo += InlineAsm::getNumOperandRegisters(OpFlags) + 1; - } - - // We may have a location metadata attached to the end of the - // instruction, and at no point should see metadata at any - // other point while processing. It's an error if so. - if (OpNo >= MI->getNumOperands() || MI->getOperand(OpNo).isMetadata()) { - Error = true; - } else { - unsigned OpFlags = MI->getOperand(OpNo).getImm(); - ++OpNo; // Skip over the ID number. - - // FIXME: Shouldn't arch-independent output template handling go into - // PrintAsmOperand? - // Labels are target independent. - if (MI->getOperand(OpNo).isBlockAddress()) { - const BlockAddress *BA = MI->getOperand(OpNo).getBlockAddress(); - MCSymbol *Sym = AP->GetBlockAddressSymbol(BA); - Sym->print(OS, AP->MAI); - MMI->getContext().registerInlineAsmLabel(Sym); - } else if (InlineAsm::isMemKind(OpFlags)) { - Error = AP->PrintAsmMemoryOperand( - MI, OpNo, Modifier[0] ? Modifier : nullptr, OS); - } else { - Error = AP->PrintAsmOperand(MI, OpNo, - Modifier[0] ? Modifier : nullptr, OS); - } - } - if (Error) { - std::string msg; - raw_string_ostream Msg(msg); - Msg << "invalid operand in inline asm: '" << AsmStr << "'"; - MMI->getModule()->getContext().emitError(LocCookie, Msg.str()); - } - } - break; - } - } +static void EmitInlineAsmStr(const char *AsmStr, const MachineInstr *MI, + MachineModuleInfo *MMI, const MCAsmInfo *MAI, + AsmPrinter *AP, uint64_t LocCookie, + raw_ostream &OS) { + bool InputIsIntelDialect = MI->getInlineAsmDialect() == InlineAsm::AD_Intel; + + if (InputIsIntelDialect) { + // Switch to the inline assembly variant. + OS << "\t.intel_syntax\n\t"; } - OS << "\n\t.att_syntax\n" << (char)0; // null terminate string. -} -static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI, - MachineModuleInfo *MMI, const MCAsmInfo *MAI, - AsmPrinter *AP, uint64_t LocCookie, - raw_ostream &OS) { int CurVariant = -1; // The number of the {.|.|.} region we are in. const char *LastEmitted = AsmStr; // One past the last character emitted. unsigned NumOperands = MI->getNumOperands(); - int AsmPrinterVariant = MMI->getTarget().unqualifiedInlineAsmVariant(); - if (MAI->getEmitGNUAsmStartIndentationMarker()) + int AsmPrinterVariant; + if (InputIsIntelDialect) + AsmPrinterVariant = 1; // X86MCAsmInfo.cpp's AsmWriterFlavorTy::Intel. + else + AsmPrinterVariant = MMI->getTarget().unqualifiedInlineAsmVariant(); + + // FIXME: Should this happen for `asm inteldialect` as well? + if (!InputIsIntelDialect && MAI->getEmitGNUAsmStartIndentationMarker()) OS << '\t'; while (*LastEmitted) { @@ -340,8 +178,9 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI, switch (*LastEmitted) { default: Done = false; break; case '$': // $$ -> $ - if (CurVariant == -1 || CurVariant == AsmPrinterVariant) - OS << '$'; + if (!InputIsIntelDialect) + if (CurVariant == -1 || CurVariant == AsmPrinterVariant) + OS << '$'; ++LastEmitted; // Consume second '$' character. break; case '(': // $( -> same as GCC's { character. @@ -480,6 +319,8 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI, } } } + if (InputIsIntelDialect) + OS << "\n\t.att_syntax"; OS << '\n' << (char)0; // null terminate string. } @@ -515,9 +356,8 @@ void AsmPrinter::emitInlineAsm(const MachineInstr *MI) const { // it. uint64_t LocCookie = 0; const MDNode *LocMD = nullptr; - for (unsigned i = MI->getNumOperands(); i != 0; --i) { - if (MI->getOperand(i-1).isMetadata() && - (LocMD = MI->getOperand(i-1).getMetadata()) && + for (const MachineOperand &MO : llvm::reverse(MI->operands())) { + if (MO.isMetadata() && (LocMD = MO.getMetadata()) && LocMD->getNumOperands() != 0) { if (const ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(LocMD->getOperand(0))) { @@ -533,10 +373,7 @@ void AsmPrinter::emitInlineAsm(const MachineInstr *MI) const { raw_svector_ostream OS(StringData); AsmPrinter *AP = const_cast<AsmPrinter*>(this); - if (MI->getInlineAsmDialect() == InlineAsm::AD_ATT) - EmitGCCInlineAsmStr(AsmStr, MI, MMI, MAI, AP, LocCookie, OS); - else - EmitMSInlineAsmStr(AsmStr, MI, MMI, MAI, AP, LocCookie, OS); + EmitInlineAsmStr(AsmStr, MI, MMI, MAI, AP, LocCookie, OS); // Emit warnings if we use reserved registers on the clobber list, as // that might lead to undefined behaviour. diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index 922c91840520..0d2736178f0f 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -521,8 +521,8 @@ DIE &DwarfCompileUnit::updateSubprogramScopeDIE(const DISubprogram *SP) { } // Construct a DIE for this scope. -void DwarfCompileUnit::constructScopeDIE( - LexicalScope *Scope, SmallVectorImpl<DIE *> &FinalChildren) { +void DwarfCompileUnit::constructScopeDIE(LexicalScope *Scope, + DIE &ParentScopeDIE) { if (!Scope || !Scope->getScopeNode()) return; @@ -533,46 +533,27 @@ void DwarfCompileUnit::constructScopeDIE( "constructSubprogramScopeDIE for non-inlined " "subprograms"); - SmallVector<DIE *, 8> Children; - - // We try to create the scope DIE first, then the children DIEs. This will - // avoid creating un-used children then removing them later when we find out - // the scope DIE is null. - DIE *ScopeDIE; + // Emit inlined subprograms. if (Scope->getParent() && isa<DISubprogram>(DS)) { - ScopeDIE = constructInlinedScopeDIE(Scope); + DIE *ScopeDIE = constructInlinedScopeDIE(Scope); if (!ScopeDIE) return; - // We create children when the scope DIE is not null. - createScopeChildrenDIE(Scope, Children); - } else { - // Early exit when we know the scope DIE is going to be null. - if (DD->isLexicalScopeDIENull(Scope)) - return; - - bool HasNonScopeChildren = false; - // We create children here when we know the scope DIE is not going to be - // null and the children will be added to the scope DIE. - createScopeChildrenDIE(Scope, Children, &HasNonScopeChildren); - - // If there are only other scopes as children, put them directly in the - // parent instead, as this scope would serve no purpose. - if (!HasNonScopeChildren) { - FinalChildren.insert(FinalChildren.end(), - std::make_move_iterator(Children.begin()), - std::make_move_iterator(Children.end())); - return; - } - ScopeDIE = constructLexicalScopeDIE(Scope); - assert(ScopeDIE && "Scope DIE should not be null."); + ParentScopeDIE.addChild(ScopeDIE); + createAndAddScopeChildren(Scope, *ScopeDIE); + return; } - // Add children - for (auto &I : Children) - ScopeDIE->addChild(std::move(I)); + // Early exit when we know the scope DIE is going to be null. + if (DD->isLexicalScopeDIENull(Scope)) + return; + + // Emit lexical blocks. + DIE *ScopeDIE = constructLexicalScopeDIE(Scope); + assert(ScopeDIE && "Scope DIE should not be null."); - FinalChildren.push_back(std::move(ScopeDIE)); + ParentScopeDIE.addChild(ScopeDIE); + createAndAddScopeChildren(Scope, *ScopeDIE); } void DwarfCompileUnit::addScopeRangeList(DIE &ScopeDIE, @@ -1013,42 +994,6 @@ sortLocalVars(SmallVectorImpl<DbgVariable *> &Input) { return Result; } -DIE *DwarfCompileUnit::createScopeChildrenDIE(LexicalScope *Scope, - SmallVectorImpl<DIE *> &Children, - bool *HasNonScopeChildren) { - assert(Children.empty()); - DIE *ObjectPointer = nullptr; - - // Emit function arguments (order is significant). - auto Vars = DU->getScopeVariables().lookup(Scope); - for (auto &DV : Vars.Args) - Children.push_back(constructVariableDIE(*DV.second, *Scope, ObjectPointer)); - - // Emit local variables. - auto Locals = sortLocalVars(Vars.Locals); - for (DbgVariable *DV : Locals) - Children.push_back(constructVariableDIE(*DV, *Scope, ObjectPointer)); - - // Skip imported directives in gmlt-like data. - if (!includeMinimalInlineScopes()) { - // There is no need to emit empty lexical block DIE. - for (const auto *IE : ImportedEntities[Scope->getScopeNode()]) - Children.push_back( - constructImportedEntityDIE(cast<DIImportedEntity>(IE))); - } - - if (HasNonScopeChildren) - *HasNonScopeChildren = !Children.empty(); - - for (DbgLabel *DL : DU->getScopeLabels().lookup(Scope)) - Children.push_back(constructLabelDIE(*DL, *Scope)); - - for (LexicalScope *LS : Scope->getChildren()) - constructScopeDIE(LS, Children); - - return ObjectPointer; -} - DIE &DwarfCompileUnit::constructSubprogramScopeDIE(const DISubprogram *Sub, LexicalScope *Scope) { DIE &ScopeDIE = updateSubprogramScopeDIE(Sub); @@ -1079,13 +1024,48 @@ DIE &DwarfCompileUnit::constructSubprogramScopeDIE(const DISubprogram *Sub, DIE *DwarfCompileUnit::createAndAddScopeChildren(LexicalScope *Scope, DIE &ScopeDIE) { - // We create children when the scope DIE is not null. - SmallVector<DIE *, 8> Children; - DIE *ObjectPointer = createScopeChildrenDIE(Scope, Children); + DIE *ObjectPointer = nullptr; + + // Emit function arguments (order is significant). + auto Vars = DU->getScopeVariables().lookup(Scope); + for (auto &DV : Vars.Args) + ScopeDIE.addChild(constructVariableDIE(*DV.second, *Scope, ObjectPointer)); + + // Emit local variables. + auto Locals = sortLocalVars(Vars.Locals); + for (DbgVariable *DV : Locals) + ScopeDIE.addChild(constructVariableDIE(*DV, *Scope, ObjectPointer)); + + // Emit imported entities (skipped in gmlt-like data). + if (!includeMinimalInlineScopes()) { + for (const auto *IE : ImportedEntities[Scope->getScopeNode()]) + ScopeDIE.addChild(constructImportedEntityDIE(cast<DIImportedEntity>(IE))); + } + + // Emit labels. + for (DbgLabel *DL : DU->getScopeLabels().lookup(Scope)) + ScopeDIE.addChild(constructLabelDIE(*DL, *Scope)); - // Add children - for (auto &I : Children) - ScopeDIE.addChild(std::move(I)); + // Emit inner lexical scopes. + auto needToEmitLexicalScope = [this](LexicalScope *LS) { + if (isa<DISubprogram>(LS->getScopeNode())) + return true; + auto Vars = DU->getScopeVariables().lookup(LS); + if (!Vars.Args.empty() || !Vars.Locals.empty()) + return true; + if (!includeMinimalInlineScopes() && + !ImportedEntities[LS->getScopeNode()].empty()) + return true; + return false; + }; + for (LexicalScope *LS : Scope->getChildren()) { + // If the lexical block doesn't have non-scope children, skip + // its emission and put its children directly to the parent scope. + if (needToEmitLexicalScope(LS)) + constructScopeDIE(LS, ScopeDIE); + else + createAndAddScopeChildren(LS, ScopeDIE); + } return ObjectPointer; } diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index 6e9261087686..fb03982b5e4a 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -191,8 +191,7 @@ public: /// variables. DIE &updateSubprogramScopeDIE(const DISubprogram *SP); - void constructScopeDIE(LexicalScope *Scope, - SmallVectorImpl<DIE *> &FinalChildren); + void constructScopeDIE(LexicalScope *Scope, DIE &ParentScopeDIE); /// A helper function to construct a RangeSpanList for a given /// lexical scope. @@ -220,11 +219,6 @@ public: /// Construct a DIE for the given DbgLabel. DIE *constructLabelDIE(DbgLabel &DL, const LexicalScope &Scope); - /// A helper function to create children of a Scope DIE. - DIE *createScopeChildrenDIE(LexicalScope *Scope, - SmallVectorImpl<DIE *> &Children, - bool *HasNonScopeChildren = nullptr); - void createBaseTypeDIEs(); /// Construct a DIE for this subprogram scope. diff --git a/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp index 150f19324834..39f40b172c1b 100644 --- a/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp @@ -162,9 +162,7 @@ bool EHStreamer::callToNoUnwindFunction(const MachineInstr *MI) { bool MarkedNoUnwind = false; bool SawFunc = false; - for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) { - const MachineOperand &MO = MI->getOperand(I); - + for (const MachineOperand &MO : MI->operands()) { if (!MO.isGlobal()) continue; const Function *F = dyn_cast<Function>(MO.getGlobal()); @@ -386,8 +384,8 @@ MCSymbol *EHStreamer::emitExceptionTable() { SmallVector<const LandingPadInfo *, 64> LandingPads; LandingPads.reserve(PadInfos.size()); - for (unsigned i = 0, N = PadInfos.size(); i != N; ++i) - LandingPads.push_back(&PadInfos[i]); + for (const LandingPadInfo &LPI : PadInfos) + LandingPads.push_back(&LPI); // Order landing pads lexicographically by type id. llvm::sort(LandingPads, [](const LandingPadInfo *L, const LandingPadInfo *R) { diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp index 5ac8f49a9522..64dadc82b48b 100644 --- a/llvm/lib/CodeGen/BranchFolding.cpp +++ b/llvm/lib/CodeGen/BranchFolding.cpp @@ -1013,8 +1013,8 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { // If this is a large problem, avoid visiting the same basic blocks // multiple times. if (MergePotentials.size() == TailMergeThreshold) - for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i) - TriedMerging.insert(MergePotentials[i].getBlock()); + for (const MergePotentialsElt &Elt : MergePotentials) + TriedMerging.insert(Elt.getBlock()); // See if we can do any tail merging on those. if (MergePotentials.size() >= 2) diff --git a/llvm/lib/CodeGen/BranchRelaxation.cpp b/llvm/lib/CodeGen/BranchRelaxation.cpp index 50825ccf9bac..eda0f37fdeb7 100644 --- a/llvm/lib/CodeGen/BranchRelaxation.cpp +++ b/llvm/lib/CodeGen/BranchRelaxation.cpp @@ -513,9 +513,7 @@ bool BranchRelaxation::relaxBranchInstructions() { // Relaxing branches involves creating new basic blocks, so re-eval // end() for termination. - for (MachineFunction::iterator I = MF->begin(); I != MF->end(); ++I) { - MachineBasicBlock &MBB = *I; - + for (MachineBasicBlock &MBB : *MF) { // Empty block? MachineBasicBlock::iterator Last = MBB.getLastNonDebugInstr(); if (Last == MBB.end()) diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp index e0e2db9f4725..bbdd8aab502e 100644 --- a/llvm/lib/CodeGen/CodeGen.cpp +++ b/llvm/lib/CodeGen/CodeGen.cpp @@ -58,8 +58,10 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeLiveVariablesPass(Registry); initializeLocalStackSlotPassPass(Registry); initializeLowerIntrinsicsPass(Registry); + initializeMIRAddFSDiscriminatorsPass(Registry); initializeMIRCanonicalizerPass(Registry); initializeMIRNamerPass(Registry); + initializeMIRProfileLoaderPassPass(Registry); initializeMachineBlockFrequencyInfoPass(Registry); initializeMachineBlockPlacementPass(Registry); initializeMachineBlockPlacementStatsPass(Registry); diff --git a/llvm/lib/CodeGen/CommandFlags.cpp b/llvm/lib/CodeGen/CommandFlags.cpp index a1ff02178ffa..3bed81d5841d 100644 --- a/llvm/lib/CodeGen/CommandFlags.cpp +++ b/llvm/lib/CodeGen/CommandFlags.cpp @@ -90,7 +90,7 @@ CGOPT(bool, EnableAddrsig) CGOPT(bool, EmitCallSiteInfo) CGOPT(bool, EnableMachineFunctionSplitter) CGOPT(bool, EnableDebugEntryValues) -CGOPT(bool, ValueTrackingVariableLocations) +CGOPT_EXP(bool, ValueTrackingVariableLocations) CGOPT(bool, ForceDwarfFrameSection) CGOPT(bool, XRayOmitFunctionIndex) CGOPT(bool, DebugStrictDwarf) @@ -534,12 +534,17 @@ codegen::InitTargetOptionsFromCodeGenFlags(const Triple &TheTriple) { Options.EmitAddrsig = getEnableAddrsig(); Options.EmitCallSiteInfo = getEmitCallSiteInfo(); Options.EnableDebugEntryValues = getEnableDebugEntryValues(); - Options.ValueTrackingVariableLocations = getValueTrackingVariableLocations(); Options.ForceDwarfFrameSection = getForceDwarfFrameSection(); Options.XRayOmitFunctionIndex = getXRayOmitFunctionIndex(); Options.DebugStrictDwarf = getDebugStrictDwarf(); Options.LoopAlignment = getAlignLoops(); + if (auto Opt = getExplicitValueTrackingVariableLocations()) + Options.ValueTrackingVariableLocations = *Opt; + else + Options.ValueTrackingVariableLocations = + getDefaultValueTrackingVariableLocations(TheTriple); + Options.MCOptions = mc::InitMCTargetOptionsFromFlags(); Options.ThreadModel = getThreadModel(); @@ -692,3 +697,9 @@ void codegen::setFunctionAttributes(StringRef CPU, StringRef Features, for (Function &F : M) setFunctionAttributes(CPU, Features, F); } + +bool codegen::getDefaultValueTrackingVariableLocations(const llvm::Triple &T) { + if (T.getArch() == llvm::Triple::x86_64) + return true; + return false; +} diff --git a/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp b/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp index 981f5973fee8..4e98d49206b5 100644 --- a/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -370,9 +370,7 @@ CriticalAntiDepBreaker::isNewRegClobberedByRefs(RegRefIter RegRefBegin, // Handle cases in which this instruction defines NewReg. MachineInstr *MI = RefOper->getParent(); - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &CheckOper = MI->getOperand(i); - + for (const MachineOperand &CheckOper : MI->operands()) { if (CheckOper.isRegMask() && CheckOper.clobbersPhysReg(NewReg)) return true; @@ -462,11 +460,10 @@ BreakAntiDependencies(const std::vector<SUnit> &SUnits, // Find the node at the bottom of the critical path. const SUnit *Max = nullptr; - for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { - const SUnit *SU = &SUnits[i]; - MISUnitMap[SU->getInstr()] = SU; - if (!Max || SU->getDepth() + SU->Latency > Max->getDepth() + Max->Latency) - Max = SU; + for (const SUnit &SU : SUnits) { + MISUnitMap[SU.getInstr()] = &SU; + if (!Max || SU.getDepth() + SU.Latency > Max->getDepth() + Max->Latency) + Max = &SU; } assert(Max && "Failed to find bottom of the critical path"); @@ -621,8 +618,7 @@ BreakAntiDependencies(const std::vector<SUnit> &SUnits, // is invalid. If the instruction defines other registers, // save a list of them so that we don't pick a new register // that overlaps any of them. - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI.getOperand(i); + for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg()) continue; Register Reg = MO.getReg(); if (Reg == 0) continue; diff --git a/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp b/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp index c6c0b79cd7e7..0bb186a02416 100644 --- a/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp +++ b/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp @@ -76,8 +76,7 @@ bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const { return false; // Examine each operand. - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); + for (const MachineOperand &MO : MI->operands()) { if (MO.isReg() && MO.isDef()) { Register Reg = MO.getReg(); if (Register::isPhysicalRegister(Reg)) { @@ -87,7 +86,7 @@ bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const { } else { if (MO.isDead()) { #ifndef NDEBUG - // Sanity check on uses of this dead register. All of them should be + // Baisc check on the register. All of them should be // 'undef'. for (auto &U : MRI->use_nodbg_operands(Reg)) assert(U.isUndef() && "'Undef' use on a 'dead' register is found!"); @@ -152,8 +151,7 @@ bool DeadMachineInstructionElim::eliminateDeadMI(MachineFunction &MF) { } // Record the physreg defs. - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI.getOperand(i); + for (const MachineOperand &MO : MI.operands()) { if (MO.isReg() && MO.isDef()) { Register Reg = MO.getReg(); if (Register::isPhysicalRegister(Reg)) { @@ -171,8 +169,7 @@ bool DeadMachineInstructionElim::eliminateDeadMI(MachineFunction &MF) { } // Record the physreg uses, after the defs, in case a physreg is // both defined and used in the same instruction. - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI.getOperand(i); + for (const MachineOperand &MO : MI.operands()) { if (MO.isReg() && MO.isUse()) { Register Reg = MO.getReg(); if (Register::isPhysicalRegister(Reg)) { diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 3a52959d54bf..755b3b844570 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -27,6 +27,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/IR/DataLayout.h" #include "llvm/Support/Casting.h" @@ -3732,8 +3733,7 @@ void CombinerHelper::applyExtendThroughPhis(MachineInstr &MI, Builder.setInstrAndDebugLoc(MI); auto NewPhi = Builder.buildInstrNoInsert(TargetOpcode::G_PHI); NewPhi.addDef(DstReg); - for (unsigned SrcIdx = 1; SrcIdx < MI.getNumOperands(); ++SrcIdx) { - auto &MO = MI.getOperand(SrcIdx); + for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) { if (!MO.isReg()) { NewPhi.addMBB(MO.getMBB()); continue; @@ -3825,8 +3825,7 @@ bool CombinerHelper::matchExtractAllEltsFromBuildVector( unsigned NumElts = DstTy.getNumElements(); SmallBitVector ExtractedElts(NumElts); - for (auto &II : make_range(MRI.use_instr_nodbg_begin(DstReg), - MRI.use_instr_nodbg_end())) { + for (MachineInstr &II : MRI.use_nodbg_instructions(DstReg)) { if (II.getOpcode() != TargetOpcode::G_EXTRACT_VECTOR_ELT) return false; auto Cst = getIConstantVRegVal(II.getOperand(2).getReg(), MRI); @@ -3868,6 +3867,51 @@ void CombinerHelper::applyBuildFnNoErase( MatchInfo(Builder); } +bool CombinerHelper::matchOrShiftToFunnelShift(MachineInstr &MI, + BuildFnTy &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_OR); + + Register Dst = MI.getOperand(0).getReg(); + LLT Ty = MRI.getType(Dst); + unsigned BitWidth = Ty.getScalarSizeInBits(); + + Register ShlSrc, ShlAmt, LShrSrc, LShrAmt; + unsigned FshOpc = 0; + + // Match (or (shl x, amt), (lshr y, sub(bw, amt))). + if (mi_match( + Dst, MRI, + // m_GOr() handles the commuted version as well. + m_GOr(m_GShl(m_Reg(ShlSrc), m_Reg(ShlAmt)), + m_GLShr(m_Reg(LShrSrc), m_GSub(m_SpecificICstOrSplat(BitWidth), + m_Reg(LShrAmt)))))) { + FshOpc = TargetOpcode::G_FSHL; + + // Match (or (shl x, sub(bw, amt)), (lshr y, amt)). + } else if (mi_match(Dst, MRI, + m_GOr(m_GLShr(m_Reg(LShrSrc), m_Reg(LShrAmt)), + m_GShl(m_Reg(ShlSrc), + m_GSub(m_SpecificICstOrSplat(BitWidth), + m_Reg(ShlAmt)))))) { + FshOpc = TargetOpcode::G_FSHR; + + } else { + return false; + } + + if (ShlAmt != LShrAmt) + return false; + + LLT AmtTy = MRI.getType(ShlAmt); + if (!isLegalOrBeforeLegalizer({FshOpc, {Ty, AmtTy}})) + return false; + + MatchInfo = [=](MachineIRBuilder &B) { + B.buildInstr(FshOpc, {Dst}, {ShlSrc, LShrSrc, ShlAmt}); + }; + return true; +} + /// Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate. bool CombinerHelper::matchFunnelShiftToRotate(MachineInstr &MI) { unsigned Opc = MI.getOpcode(); @@ -4499,20 +4543,9 @@ bool CombinerHelper::matchNarrowBinopFeedingAnd( bool CombinerHelper::matchMulOBy2(MachineInstr &MI, BuildFnTy &MatchInfo) { unsigned Opc = MI.getOpcode(); assert(Opc == TargetOpcode::G_UMULO || Opc == TargetOpcode::G_SMULO); - // Check for a constant 2 or a splat of 2 on the RHS. - auto RHS = MI.getOperand(3).getReg(); - bool IsVector = MRI.getType(RHS).isVector(); - if (!IsVector && !mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICst(2))) - return false; - if (IsVector) { - // FIXME: There's no mi_match pattern for this yet. - auto *RHSDef = getDefIgnoringCopies(RHS, MRI); - if (!RHSDef) - return false; - auto Splat = getBuildVectorConstantSplat(*RHSDef, MRI); - if (!Splat || *Splat != 2) - return false; - } + + if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(2))) + return false; MatchInfo = [=, &MI](MachineIRBuilder &B) { Observer.changingInstr(MI); @@ -4760,6 +4793,556 @@ bool CombinerHelper::matchRedundantNegOperands(MachineInstr &MI, return true; } +/// Checks if \p MI is TargetOpcode::G_FMUL and contractable either +/// due to global flags or MachineInstr flags. +static bool isContractableFMul(MachineInstr &MI, bool AllowFusionGlobally) { + if (MI.getOpcode() != TargetOpcode::G_FMUL) + return false; + return AllowFusionGlobally || MI.getFlag(MachineInstr::MIFlag::FmContract); +} + +static bool hasMoreUses(const MachineInstr &MI0, const MachineInstr &MI1, + const MachineRegisterInfo &MRI) { + return std::distance(MRI.use_instr_nodbg_begin(MI0.getOperand(0).getReg()), + MRI.use_instr_nodbg_end()) > + std::distance(MRI.use_instr_nodbg_begin(MI1.getOperand(0).getReg()), + MRI.use_instr_nodbg_end()); +} + +bool CombinerHelper::canCombineFMadOrFMA(MachineInstr &MI, + bool &AllowFusionGlobally, + bool &HasFMAD, bool &Aggressive, + bool CanReassociate) { + + auto *MF = MI.getMF(); + const auto &TLI = *MF->getSubtarget().getTargetLowering(); + const TargetOptions &Options = MF->getTarget().Options; + LLT DstType = MRI.getType(MI.getOperand(0).getReg()); + + if (CanReassociate && + !(Options.UnsafeFPMath || MI.getFlag(MachineInstr::MIFlag::FmReassoc))) + return false; + + // Floating-point multiply-add with intermediate rounding. + HasFMAD = (LI && TLI.isFMADLegal(MI, DstType)); + // Floating-point multiply-add without intermediate rounding. + bool HasFMA = TLI.isFMAFasterThanFMulAndFAdd(*MF, DstType) && + isLegalOrBeforeLegalizer({TargetOpcode::G_FMA, {DstType}}); + // No valid opcode, do not combine. + if (!HasFMAD && !HasFMA) + return false; + + AllowFusionGlobally = Options.AllowFPOpFusion == FPOpFusion::Fast || + Options.UnsafeFPMath || HasFMAD; + // If the addition is not contractable, do not combine. + if (!AllowFusionGlobally && !MI.getFlag(MachineInstr::MIFlag::FmContract)) + return false; + + Aggressive = TLI.enableAggressiveFMAFusion(DstType); + return true; +} + +bool CombinerHelper::matchCombineFAddFMulToFMadOrFMA( + MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_FADD); + + bool AllowFusionGlobally, HasFMAD, Aggressive; + if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive)) + return false; + + MachineInstr *LHS = MRI.getVRegDef(MI.getOperand(1).getReg()); + MachineInstr *RHS = MRI.getVRegDef(MI.getOperand(2).getReg()); + unsigned PreferredFusedOpcode = + HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA; + + // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)), + // prefer to fold the multiply with fewer uses. + if (Aggressive && isContractableFMul(*LHS, AllowFusionGlobally) && + isContractableFMul(*RHS, AllowFusionGlobally)) { + if (hasMoreUses(*LHS, *RHS, MRI)) + std::swap(LHS, RHS); + } + + // fold (fadd (fmul x, y), z) -> (fma x, y, z) + if (isContractableFMul(*LHS, AllowFusionGlobally) && + (Aggressive || MRI.hasOneNonDBGUse(LHS->getOperand(0).getReg()))) { + MatchInfo = [=, &MI](MachineIRBuilder &B) { + B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()}, + {LHS->getOperand(1).getReg(), LHS->getOperand(2).getReg(), + RHS->getOperand(0).getReg()}); + }; + return true; + } + + // fold (fadd x, (fmul y, z)) -> (fma y, z, x) + if (isContractableFMul(*RHS, AllowFusionGlobally) && + (Aggressive || MRI.hasOneNonDBGUse(RHS->getOperand(0).getReg()))) { + MatchInfo = [=, &MI](MachineIRBuilder &B) { + B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()}, + {RHS->getOperand(1).getReg(), RHS->getOperand(2).getReg(), + LHS->getOperand(0).getReg()}); + }; + return true; + } + + return false; +} + +bool CombinerHelper::matchCombineFAddFpExtFMulToFMadOrFMA( + MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_FADD); + + bool AllowFusionGlobally, HasFMAD, Aggressive; + if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive)) + return false; + + const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering(); + MachineInstr *LHS = MRI.getVRegDef(MI.getOperand(1).getReg()); + MachineInstr *RHS = MRI.getVRegDef(MI.getOperand(2).getReg()); + LLT DstType = MRI.getType(MI.getOperand(0).getReg()); + + unsigned PreferredFusedOpcode = + HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA; + + // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)), + // prefer to fold the multiply with fewer uses. + if (Aggressive && isContractableFMul(*LHS, AllowFusionGlobally) && + isContractableFMul(*RHS, AllowFusionGlobally)) { + if (hasMoreUses(*LHS, *RHS, MRI)) + std::swap(LHS, RHS); + } + + // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z) + MachineInstr *FpExtSrc; + if (mi_match(LHS->getOperand(0).getReg(), MRI, + m_GFPExt(m_MInstr(FpExtSrc))) && + isContractableFMul(*FpExtSrc, AllowFusionGlobally) && + TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType, + MRI.getType(FpExtSrc->getOperand(1).getReg()))) { + MatchInfo = [=, &MI](MachineIRBuilder &B) { + auto FpExtX = B.buildFPExt(DstType, FpExtSrc->getOperand(1).getReg()); + auto FpExtY = B.buildFPExt(DstType, FpExtSrc->getOperand(2).getReg()); + B.buildInstr( + PreferredFusedOpcode, {MI.getOperand(0).getReg()}, + {FpExtX.getReg(0), FpExtY.getReg(0), RHS->getOperand(0).getReg()}); + }; + return true; + } + + // fold (fadd z, (fpext (fmul x, y))) -> (fma (fpext x), (fpext y), z) + // Note: Commutes FADD operands. + if (mi_match(RHS->getOperand(0).getReg(), MRI, + m_GFPExt(m_MInstr(FpExtSrc))) && + isContractableFMul(*FpExtSrc, AllowFusionGlobally) && + TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType, + MRI.getType(FpExtSrc->getOperand(1).getReg()))) { + MatchInfo = [=, &MI](MachineIRBuilder &B) { + auto FpExtX = B.buildFPExt(DstType, FpExtSrc->getOperand(1).getReg()); + auto FpExtY = B.buildFPExt(DstType, FpExtSrc->getOperand(2).getReg()); + B.buildInstr( + PreferredFusedOpcode, {MI.getOperand(0).getReg()}, + {FpExtX.getReg(0), FpExtY.getReg(0), LHS->getOperand(0).getReg()}); + }; + return true; + } + + return false; +} + +bool CombinerHelper::matchCombineFAddFMAFMulToFMadOrFMA( + MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_FADD); + + bool AllowFusionGlobally, HasFMAD, Aggressive; + if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive, true)) + return false; + + MachineInstr *LHS = MRI.getVRegDef(MI.getOperand(1).getReg()); + MachineInstr *RHS = MRI.getVRegDef(MI.getOperand(2).getReg()); + LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); + + unsigned PreferredFusedOpcode = + HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA; + + // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)), + // prefer to fold the multiply with fewer uses. + if (Aggressive && isContractableFMul(*LHS, AllowFusionGlobally) && + isContractableFMul(*RHS, AllowFusionGlobally)) { + if (hasMoreUses(*LHS, *RHS, MRI)) + std::swap(LHS, RHS); + } + + MachineInstr *FMA = nullptr; + Register Z; + // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y, (fma u, v, z)) + if (LHS->getOpcode() == PreferredFusedOpcode && + (MRI.getVRegDef(LHS->getOperand(3).getReg())->getOpcode() == + TargetOpcode::G_FMUL) && + MRI.hasOneNonDBGUse(LHS->getOperand(0).getReg()) && + MRI.hasOneNonDBGUse(LHS->getOperand(3).getReg())) { + FMA = LHS; + Z = RHS->getOperand(0).getReg(); + } + // fold (fadd z, (fma x, y, (fmul u, v))) -> (fma x, y, (fma u, v, z)) + else if (RHS->getOpcode() == PreferredFusedOpcode && + (MRI.getVRegDef(RHS->getOperand(3).getReg())->getOpcode() == + TargetOpcode::G_FMUL) && + MRI.hasOneNonDBGUse(RHS->getOperand(0).getReg()) && + MRI.hasOneNonDBGUse(RHS->getOperand(3).getReg())) { + Z = LHS->getOperand(0).getReg(); + FMA = RHS; + } + + if (FMA) { + MachineInstr *FMulMI = MRI.getVRegDef(FMA->getOperand(3).getReg()); + Register X = FMA->getOperand(1).getReg(); + Register Y = FMA->getOperand(2).getReg(); + Register U = FMulMI->getOperand(1).getReg(); + Register V = FMulMI->getOperand(2).getReg(); + + MatchInfo = [=, &MI](MachineIRBuilder &B) { + Register InnerFMA = MRI.createGenericVirtualRegister(DstTy); + B.buildInstr(PreferredFusedOpcode, {InnerFMA}, {U, V, Z}); + B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()}, + {X, Y, InnerFMA}); + }; + return true; + } + + return false; +} + +bool CombinerHelper::matchCombineFAddFpExtFMulToFMadOrFMAAggressive( + MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_FADD); + + bool AllowFusionGlobally, HasFMAD, Aggressive; + if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive)) + return false; + + if (!Aggressive) + return false; + + const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering(); + LLT DstType = MRI.getType(MI.getOperand(0).getReg()); + MachineInstr *LHS = MRI.getVRegDef(MI.getOperand(1).getReg()); + MachineInstr *RHS = MRI.getVRegDef(MI.getOperand(2).getReg()); + + unsigned PreferredFusedOpcode = + HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA; + + // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)), + // prefer to fold the multiply with fewer uses. + if (Aggressive && isContractableFMul(*LHS, AllowFusionGlobally) && + isContractableFMul(*RHS, AllowFusionGlobally)) { + if (hasMoreUses(*LHS, *RHS, MRI)) + std::swap(LHS, RHS); + } + + // Builds: (fma x, y, (fma (fpext u), (fpext v), z)) + auto buildMatchInfo = [=, &MI](Register U, Register V, Register Z, Register X, + Register Y, MachineIRBuilder &B) { + Register FpExtU = B.buildFPExt(DstType, U).getReg(0); + Register FpExtV = B.buildFPExt(DstType, V).getReg(0); + Register InnerFMA = + B.buildInstr(PreferredFusedOpcode, {DstType}, {FpExtU, FpExtV, Z}) + .getReg(0); + B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()}, + {X, Y, InnerFMA}); + }; + + MachineInstr *FMulMI, *FMAMI; + // fold (fadd (fma x, y, (fpext (fmul u, v))), z) + // -> (fma x, y, (fma (fpext u), (fpext v), z)) + if (LHS->getOpcode() == PreferredFusedOpcode && + mi_match(LHS->getOperand(3).getReg(), MRI, m_GFPExt(m_MInstr(FMulMI))) && + isContractableFMul(*FMulMI, AllowFusionGlobally) && + TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType, + MRI.getType(FMulMI->getOperand(0).getReg()))) { + MatchInfo = [=](MachineIRBuilder &B) { + buildMatchInfo(FMulMI->getOperand(1).getReg(), + FMulMI->getOperand(2).getReg(), + RHS->getOperand(0).getReg(), LHS->getOperand(1).getReg(), + LHS->getOperand(2).getReg(), B); + }; + return true; + } + + // fold (fadd (fpext (fma x, y, (fmul u, v))), z) + // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z)) + // FIXME: This turns two single-precision and one double-precision + // operation into two double-precision operations, which might not be + // interesting for all targets, especially GPUs. + if (mi_match(LHS->getOperand(0).getReg(), MRI, m_GFPExt(m_MInstr(FMAMI))) && + FMAMI->getOpcode() == PreferredFusedOpcode) { + MachineInstr *FMulMI = MRI.getVRegDef(FMAMI->getOperand(3).getReg()); + if (isContractableFMul(*FMulMI, AllowFusionGlobally) && + TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType, + MRI.getType(FMAMI->getOperand(0).getReg()))) { + MatchInfo = [=](MachineIRBuilder &B) { + Register X = FMAMI->getOperand(1).getReg(); + Register Y = FMAMI->getOperand(2).getReg(); + X = B.buildFPExt(DstType, X).getReg(0); + Y = B.buildFPExt(DstType, Y).getReg(0); + buildMatchInfo(FMulMI->getOperand(1).getReg(), + FMulMI->getOperand(2).getReg(), + RHS->getOperand(0).getReg(), X, Y, B); + }; + + return true; + } + } + + // fold (fadd z, (fma x, y, (fpext (fmul u, v))) + // -> (fma x, y, (fma (fpext u), (fpext v), z)) + if (RHS->getOpcode() == PreferredFusedOpcode && + mi_match(RHS->getOperand(3).getReg(), MRI, m_GFPExt(m_MInstr(FMulMI))) && + isContractableFMul(*FMulMI, AllowFusionGlobally) && + TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType, + MRI.getType(FMulMI->getOperand(0).getReg()))) { + MatchInfo = [=](MachineIRBuilder &B) { + buildMatchInfo(FMulMI->getOperand(1).getReg(), + FMulMI->getOperand(2).getReg(), + LHS->getOperand(0).getReg(), RHS->getOperand(1).getReg(), + RHS->getOperand(2).getReg(), B); + }; + return true; + } + + // fold (fadd z, (fpext (fma x, y, (fmul u, v))) + // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z)) + // FIXME: This turns two single-precision and one double-precision + // operation into two double-precision operations, which might not be + // interesting for all targets, especially GPUs. + if (mi_match(RHS->getOperand(0).getReg(), MRI, m_GFPExt(m_MInstr(FMAMI))) && + FMAMI->getOpcode() == PreferredFusedOpcode) { + MachineInstr *FMulMI = MRI.getVRegDef(FMAMI->getOperand(3).getReg()); + if (isContractableFMul(*FMulMI, AllowFusionGlobally) && + TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType, + MRI.getType(FMAMI->getOperand(0).getReg()))) { + MatchInfo = [=](MachineIRBuilder &B) { + Register X = FMAMI->getOperand(1).getReg(); + Register Y = FMAMI->getOperand(2).getReg(); + X = B.buildFPExt(DstType, X).getReg(0); + Y = B.buildFPExt(DstType, Y).getReg(0); + buildMatchInfo(FMulMI->getOperand(1).getReg(), + FMulMI->getOperand(2).getReg(), + LHS->getOperand(0).getReg(), X, Y, B); + }; + return true; + } + } + + return false; +} + +bool CombinerHelper::matchCombineFSubFMulToFMadOrFMA( + MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_FSUB); + + bool AllowFusionGlobally, HasFMAD, Aggressive; + if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive)) + return false; + + MachineInstr *LHS = MRI.getVRegDef(MI.getOperand(1).getReg()); + MachineInstr *RHS = MRI.getVRegDef(MI.getOperand(2).getReg()); + LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); + + // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)), + // prefer to fold the multiply with fewer uses. + int FirstMulHasFewerUses = true; + if (isContractableFMul(*LHS, AllowFusionGlobally) && + isContractableFMul(*RHS, AllowFusionGlobally) && + hasMoreUses(*LHS, *RHS, MRI)) + FirstMulHasFewerUses = false; + + unsigned PreferredFusedOpcode = + HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA; + + // fold (fsub (fmul x, y), z) -> (fma x, y, -z) + if (FirstMulHasFewerUses && + (isContractableFMul(*LHS, AllowFusionGlobally) && + (Aggressive || MRI.hasOneNonDBGUse(LHS->getOperand(0).getReg())))) { + MatchInfo = [=, &MI](MachineIRBuilder &B) { + Register NegZ = B.buildFNeg(DstTy, RHS->getOperand(0).getReg()).getReg(0); + B.buildInstr( + PreferredFusedOpcode, {MI.getOperand(0).getReg()}, + {LHS->getOperand(1).getReg(), LHS->getOperand(2).getReg(), NegZ}); + }; + return true; + } + // fold (fsub x, (fmul y, z)) -> (fma -y, z, x) + else if ((isContractableFMul(*RHS, AllowFusionGlobally) && + (Aggressive || MRI.hasOneNonDBGUse(RHS->getOperand(0).getReg())))) { + MatchInfo = [=, &MI](MachineIRBuilder &B) { + Register NegY = B.buildFNeg(DstTy, RHS->getOperand(1).getReg()).getReg(0); + B.buildInstr( + PreferredFusedOpcode, {MI.getOperand(0).getReg()}, + {NegY, RHS->getOperand(2).getReg(), LHS->getOperand(0).getReg()}); + }; + return true; + } + + return false; +} + +bool CombinerHelper::matchCombineFSubFNegFMulToFMadOrFMA( + MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_FSUB); + + bool AllowFusionGlobally, HasFMAD, Aggressive; + if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive)) + return false; + + Register LHSReg = MI.getOperand(1).getReg(); + Register RHSReg = MI.getOperand(2).getReg(); + LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); + + unsigned PreferredFusedOpcode = + HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA; + + MachineInstr *FMulMI; + // fold (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z)) + if (mi_match(LHSReg, MRI, m_GFNeg(m_MInstr(FMulMI))) && + (Aggressive || (MRI.hasOneNonDBGUse(LHSReg) && + MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg()))) && + isContractableFMul(*FMulMI, AllowFusionGlobally)) { + MatchInfo = [=, &MI](MachineIRBuilder &B) { + Register NegX = + B.buildFNeg(DstTy, FMulMI->getOperand(1).getReg()).getReg(0); + Register NegZ = B.buildFNeg(DstTy, RHSReg).getReg(0); + B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()}, + {NegX, FMulMI->getOperand(2).getReg(), NegZ}); + }; + return true; + } + + // fold (fsub x, (fneg (fmul, y, z))) -> (fma y, z, x) + if (mi_match(RHSReg, MRI, m_GFNeg(m_MInstr(FMulMI))) && + (Aggressive || (MRI.hasOneNonDBGUse(RHSReg) && + MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg()))) && + isContractableFMul(*FMulMI, AllowFusionGlobally)) { + MatchInfo = [=, &MI](MachineIRBuilder &B) { + B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()}, + {FMulMI->getOperand(1).getReg(), + FMulMI->getOperand(2).getReg(), LHSReg}); + }; + return true; + } + + return false; +} + +bool CombinerHelper::matchCombineFSubFpExtFMulToFMadOrFMA( + MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_FSUB); + + bool AllowFusionGlobally, HasFMAD, Aggressive; + if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive)) + return false; + + Register LHSReg = MI.getOperand(1).getReg(); + Register RHSReg = MI.getOperand(2).getReg(); + LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); + + unsigned PreferredFusedOpcode = + HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA; + + MachineInstr *FMulMI; + // fold (fsub (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), (fneg z)) + if (mi_match(LHSReg, MRI, m_GFPExt(m_MInstr(FMulMI))) && + isContractableFMul(*FMulMI, AllowFusionGlobally) && + (Aggressive || MRI.hasOneNonDBGUse(LHSReg))) { + MatchInfo = [=, &MI](MachineIRBuilder &B) { + Register FpExtX = + B.buildFPExt(DstTy, FMulMI->getOperand(1).getReg()).getReg(0); + Register FpExtY = + B.buildFPExt(DstTy, FMulMI->getOperand(2).getReg()).getReg(0); + Register NegZ = B.buildFNeg(DstTy, RHSReg).getReg(0); + B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()}, + {FpExtX, FpExtY, NegZ}); + }; + return true; + } + + // fold (fsub x, (fpext (fmul y, z))) -> (fma (fneg (fpext y)), (fpext z), x) + if (mi_match(RHSReg, MRI, m_GFPExt(m_MInstr(FMulMI))) && + isContractableFMul(*FMulMI, AllowFusionGlobally) && + (Aggressive || MRI.hasOneNonDBGUse(RHSReg))) { + MatchInfo = [=, &MI](MachineIRBuilder &B) { + Register FpExtY = + B.buildFPExt(DstTy, FMulMI->getOperand(1).getReg()).getReg(0); + Register NegY = B.buildFNeg(DstTy, FpExtY).getReg(0); + Register FpExtZ = + B.buildFPExt(DstTy, FMulMI->getOperand(2).getReg()).getReg(0); + B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()}, + {NegY, FpExtZ, LHSReg}); + }; + return true; + } + + return false; +} + +bool CombinerHelper::matchCombineFSubFpExtFNegFMulToFMadOrFMA( + MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_FSUB); + + bool AllowFusionGlobally, HasFMAD, Aggressive; + if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive)) + return false; + + const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering(); + LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); + Register LHSReg = MI.getOperand(1).getReg(); + Register RHSReg = MI.getOperand(2).getReg(); + + unsigned PreferredFusedOpcode = + HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA; + + auto buildMatchInfo = [=](Register Dst, Register X, Register Y, Register Z, + MachineIRBuilder &B) { + Register FpExtX = B.buildFPExt(DstTy, X).getReg(0); + Register FpExtY = B.buildFPExt(DstTy, Y).getReg(0); + B.buildInstr(PreferredFusedOpcode, {Dst}, {FpExtX, FpExtY, Z}); + }; + + MachineInstr *FMulMI; + // fold (fsub (fpext (fneg (fmul x, y))), z) -> + // (fneg (fma (fpext x), (fpext y), z)) + // fold (fsub (fneg (fpext (fmul x, y))), z) -> + // (fneg (fma (fpext x), (fpext y), z)) + if ((mi_match(LHSReg, MRI, m_GFPExt(m_GFNeg(m_MInstr(FMulMI)))) || + mi_match(LHSReg, MRI, m_GFNeg(m_GFPExt(m_MInstr(FMulMI))))) && + isContractableFMul(*FMulMI, AllowFusionGlobally) && + TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstTy, + MRI.getType(FMulMI->getOperand(0).getReg()))) { + MatchInfo = [=, &MI](MachineIRBuilder &B) { + Register FMAReg = MRI.createGenericVirtualRegister(DstTy); + buildMatchInfo(FMAReg, FMulMI->getOperand(1).getReg(), + FMulMI->getOperand(2).getReg(), RHSReg, B); + B.buildFNeg(MI.getOperand(0).getReg(), FMAReg); + }; + return true; + } + + // fold (fsub x, (fpext (fneg (fmul y, z)))) -> (fma (fpext y), (fpext z), x) + // fold (fsub x, (fneg (fpext (fmul y, z)))) -> (fma (fpext y), (fpext z), x) + if ((mi_match(RHSReg, MRI, m_GFPExt(m_GFNeg(m_MInstr(FMulMI)))) || + mi_match(RHSReg, MRI, m_GFNeg(m_GFPExt(m_MInstr(FMulMI))))) && + isContractableFMul(*FMulMI, AllowFusionGlobally) && + TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstTy, + MRI.getType(FMulMI->getOperand(0).getReg()))) { + MatchInfo = [=, &MI](MachineIRBuilder &B) { + buildMatchInfo(MI.getOperand(0).getReg(), FMulMI->getOperand(1).getReg(), + FMulMI->getOperand(2).getReg(), LHSReg, B); + }; + return true; + } + + return false; +} + bool CombinerHelper::tryCombine(MachineInstr &MI) { if (tryCombineCopy(MI)) return true; diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index c74bec7dfc0d..e09cd26eb0c1 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -585,8 +585,8 @@ simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, // FIXME: What does the original arg index mean here? SmallVector<CallLowering::ArgInfo, 3> Args; - for (unsigned i = 1; i < MI.getNumOperands(); i++) - Args.push_back({MI.getOperand(i).getReg(), OpType, 0}); + for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) + Args.push_back({MO.getReg(), OpType, 0}); return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), OpType, 0}, Args); } @@ -1500,8 +1500,8 @@ LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx, LLT WideDstTy = LLT::scalar(NumMerge * WideSize); // Decompose the original operands if they don't evenly divide. - for (int I = 1, E = MI.getNumOperands(); I != E; ++I) { - Register SrcReg = MI.getOperand(I).getReg(); + for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) { + Register SrcReg = MO.getReg(); if (GCD == SrcSize) { Unmerges.push_back(SrcReg); } else { @@ -4037,8 +4037,8 @@ LegalizerHelper::fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx, // Break into a common type SmallVector<Register, 16> Parts; - for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) - extractGCDType(Parts, GCDTy, MI.getOperand(I).getReg()); + for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) + extractGCDType(Parts, GCDTy, MO.getReg()); // Build the requested new merge, padding with undef. LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts, @@ -7782,7 +7782,6 @@ LegalizerHelper::lowerMemcpy(MachineInstr &MI, Register Dst, Register Src, // of that value loaded. This can result in a sequence of loads and stores // mixed types, depending on what the target specifies as good types to use. unsigned CurrOffset = 0; - LLT PtrTy = MRI.getType(Src); unsigned Size = KnownLen; for (auto CopyTy : MemOps) { // Issuing an unaligned load / store pair that overlaps with the previous @@ -7800,15 +7799,19 @@ LegalizerHelper::lowerMemcpy(MachineInstr &MI, Register Dst, Register Src, Register LoadPtr = Src; Register Offset; if (CurrOffset != 0) { - Offset = MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset) + LLT SrcTy = MRI.getType(Src); + Offset = MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset) .getReg(0); - LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0); + LoadPtr = MIB.buildPtrAdd(SrcTy, Src, Offset).getReg(0); } auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO); // Create the store. - Register StorePtr = - CurrOffset == 0 ? Dst : MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0); + Register StorePtr = Dst; + if (CurrOffset != 0) { + LLT DstTy = MRI.getType(Dst); + StorePtr = MIB.buildPtrAdd(DstTy, Dst, Offset).getReg(0); + } MIB.buildStore(LdVal, StorePtr, *StoreMMO); CurrOffset += CopyTy.getSizeInBytes(); Size -= CopyTy.getSizeInBytes(); @@ -7885,7 +7888,6 @@ LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src, // Apart from that, this loop is pretty much doing the same thing as the // memcpy codegen function. unsigned CurrOffset = 0; - LLT PtrTy = MRI.getType(Src); SmallVector<Register, 16> LoadVals; for (auto CopyTy : MemOps) { // Construct MMO for the load. @@ -7895,9 +7897,10 @@ LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src, // Create the load. Register LoadPtr = Src; if (CurrOffset != 0) { + LLT SrcTy = MRI.getType(Src); auto Offset = - MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset); - LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0); + MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset); + LoadPtr = MIB.buildPtrAdd(SrcTy, Src, Offset).getReg(0); } LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0)); CurrOffset += CopyTy.getSizeInBytes(); @@ -7912,9 +7915,10 @@ LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src, Register StorePtr = Dst; if (CurrOffset != 0) { + LLT DstTy = MRI.getType(Dst); auto Offset = - MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset); - StorePtr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0); + MIB.buildConstant(LLT::scalar(DstTy.getSizeInBits()), CurrOffset); + StorePtr = MIB.buildPtrAdd(DstTy, Dst, Offset).getReg(0); } MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO); CurrOffset += CopyTy.getSizeInBytes(); diff --git a/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp b/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp index 1a2102e3ef21..650500c7eb31 100644 --- a/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp +++ b/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp @@ -123,7 +123,7 @@ const RegisterBank *RegisterBankInfo::getRegBankFromConstraints( Register Reg = MI.getOperand(OpIdx).getReg(); const RegisterBank &RegBank = getRegBankFromRegClass(*RC, MRI.getType(Reg)); - // Sanity check that the target properly implemented getRegBankFromRegClass. + // Check that the target properly implemented getRegBankFromRegClass. assert(RegBank.covers(*RC) && "The mapping of the register bank does not make sense"); return &RegBank; diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp index 1a440c064a59..b0b84763e922 100644 --- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -834,10 +834,9 @@ bool llvm::isKnownToBeAPowerOfTwo(Register Reg, const MachineRegisterInfo &MRI, case TargetOpcode::G_BUILD_VECTOR: { // TODO: Probably should have a recursion depth guard since you could have // bitcasted vector elements. - for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) { - if (!isKnownToBeAPowerOfTwo(MI.getOperand(I).getReg(), MRI, KB)) + for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) + if (!isKnownToBeAPowerOfTwo(MO.getReg(), MRI, KB)) return false; - } return true; } @@ -845,8 +844,8 @@ bool llvm::isKnownToBeAPowerOfTwo(Register Reg, const MachineRegisterInfo &MRI, // Only handle constants since we would need to know if number of leading // zeros is greater than the truncation amount. const unsigned BitWidth = Ty.getScalarSizeInBits(); - for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) { - auto Const = getIConstantVRegVal(MI.getOperand(I).getReg(), MRI); + for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) { + auto Const = getIConstantVRegVal(MO.getReg(), MRI); if (!Const || !Const->zextOrTrunc(BitWidth).isPowerOf2()) return false; } @@ -1031,16 +1030,22 @@ Optional<ValueAndVReg> getAnyConstantSplat(Register VReg, return SplatValAndReg; } -bool isBuildVectorConstantSplat(const MachineInstr &MI, - const MachineRegisterInfo &MRI, - int64_t SplatValue, bool AllowUndef) { - if (auto SplatValAndReg = - getAnyConstantSplat(MI.getOperand(0).getReg(), MRI, AllowUndef)) +} // end anonymous namespace + +bool llvm::isBuildVectorConstantSplat(const Register Reg, + const MachineRegisterInfo &MRI, + int64_t SplatValue, bool AllowUndef) { + if (auto SplatValAndReg = getAnyConstantSplat(Reg, MRI, AllowUndef)) return mi_match(SplatValAndReg->VReg, MRI, m_SpecificICst(SplatValue)); return false; } -} // end anonymous namespace +bool llvm::isBuildVectorConstantSplat(const MachineInstr &MI, + const MachineRegisterInfo &MRI, + int64_t SplatValue, bool AllowUndef) { + return isBuildVectorConstantSplat(MI.getOperand(0).getReg(), MRI, SplatValue, + AllowUndef); +} Optional<int64_t> llvm::getBuildVectorConstantSplat(const MachineInstr &MI, diff --git a/llvm/lib/CodeGen/GlobalMerge.cpp b/llvm/lib/CodeGen/GlobalMerge.cpp index 6c1ce4c1efb0..bbd9006a5d8c 100644 --- a/llvm/lib/CodeGen/GlobalMerge.cpp +++ b/llvm/lib/CodeGen/GlobalMerge.cpp @@ -399,8 +399,7 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals, // having a single global, but is aggressive enough for any other case. if (GlobalMergeIgnoreSingleUse) { BitVector AllGlobals(Globals.size()); - for (size_t i = 0, e = UsedGlobalSets.size(); i != e; ++i) { - const UsedGlobalSet &UGS = UsedGlobalSets[e - i - 1]; + for (const UsedGlobalSet &UGS : llvm::reverse(UsedGlobalSets)) { if (UGS.UsageCount == 0) continue; if (UGS.Globals.count() > 1) @@ -418,8 +417,7 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals, BitVector PickedGlobals(Globals.size()); bool Changed = false; - for (size_t i = 0, e = UsedGlobalSets.size(); i != e; ++i) { - const UsedGlobalSet &UGS = UsedGlobalSets[e - i - 1]; + for (const UsedGlobalSet &UGS : llvm::reverse(UsedGlobalSets)) { if (UGS.UsageCount == 0) continue; if (PickedGlobals.anyCommon(UGS.Globals)) diff --git a/llvm/lib/CodeGen/IndirectBrExpandPass.cpp b/llvm/lib/CodeGen/IndirectBrExpandPass.cpp index e4606daba352..2d38a44d5a33 100644 --- a/llvm/lib/CodeGen/IndirectBrExpandPass.cpp +++ b/llvm/lib/CodeGen/IndirectBrExpandPass.cpp @@ -260,10 +260,12 @@ bool IndirectBrExpandPass::runOnFunction(Function &F) { if (DTU) { // If there were multiple indirectbr's, they may have common successors, // but in the dominator tree, we only track unique edges. - SmallPtrSet<BasicBlock *, 8> UniqueSuccessors(BBs.begin(), BBs.end()); - Updates.reserve(Updates.size() + UniqueSuccessors.size()); - for (BasicBlock *BB : UniqueSuccessors) - Updates.push_back({DominatorTree::Insert, SwitchBB, BB}); + SmallPtrSet<BasicBlock *, 8> UniqueSuccessors; + Updates.reserve(Updates.size() + BBs.size()); + for (BasicBlock *BB : BBs) { + if (UniqueSuccessors.insert(BB).second) + Updates.push_back({DominatorTree::Insert, SwitchBB, BB}); + } DTU->applyUpdates(Updates); } diff --git a/llvm/lib/CodeGen/InlineSpiller.cpp b/llvm/lib/CodeGen/InlineSpiller.cpp index 64e1f4351456..fc5ac45752ca 100644 --- a/llvm/lib/CodeGen/InlineSpiller.cpp +++ b/llvm/lib/CodeGen/InlineSpiller.cpp @@ -274,11 +274,9 @@ static Register isFullCopyOf(const MachineInstr &MI, Register Reg) { } static void getVDefInterval(const MachineInstr &MI, LiveIntervals &LIS) { - for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) { - const MachineOperand &MO = MI.getOperand(I); + for (const MachineOperand &MO : MI.operands()) if (MO.isReg() && MO.isDef() && Register::isVirtualRegister(MO.getReg())) LIS.getInterval(MO.getReg()); - } } /// isSnippet - Identify if a live interval is a snippet that should be spilled. @@ -583,11 +581,9 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) { if (!ParentVNI) { LLVM_DEBUG(dbgs() << "\tadding <undef> flags: "); - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI.getOperand(i); + for (MachineOperand &MO : MI.operands()) if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg()) MO.setIsUndef(); - } LLVM_DEBUG(dbgs() << UseIdx << '\t' << MI); return true; } diff --git a/llvm/lib/CodeGen/LatencyPriorityQueue.cpp b/llvm/lib/CodeGen/LatencyPriorityQueue.cpp index c3e0553418a5..fab6b8d10a33 100644 --- a/llvm/lib/CodeGen/LatencyPriorityQueue.cpp +++ b/llvm/lib/CodeGen/LatencyPriorityQueue.cpp @@ -73,11 +73,9 @@ void LatencyPriorityQueue::push(SUnit *SU) { // Look at all of the successors of this node. Count the number of nodes that // this node is the sole unscheduled node for. unsigned NumNodesBlocking = 0; - for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); - I != E; ++I) { - if (getSingleUnscheduledPred(I->getSUnit()) == SU) + for (const SDep &Succ : SU->Succs) + if (getSingleUnscheduledPred(Succ.getSUnit()) == SU) ++NumNodesBlocking; - } NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking; Queue.push_back(SU); diff --git a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp index a4eb3094612b..cf62b0e5d7e8 100644 --- a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp +++ b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp @@ -544,8 +544,7 @@ public: // Re-state the variable location: if there's no replacement then NewLoc // is None and a $noreg DBG_VALUE will be created. Otherwise, a DBG_VALUE // identifying the alternative location will be emitted. - const DIExpression *Expr = ActiveVLocIt->second.Properties.DIExpr; - DbgValueProperties Properties(Expr, false); + const DbgValueProperties &Properties = ActiveVLocIt->second.Properties; PendingDbgValues.push_back(MTracker->emitLoc(NewLoc, Var, Properties)); // Update machine locations <=> variable locations maps. Defer updating @@ -836,6 +835,15 @@ MachineInstrBuilder MLocTracker::emitLoc(Optional<LocIdx> MLoc, unsigned Base = Spill.SpillBase; MIB.addReg(Base); MIB.addImm(0); + + // Being on the stack makes this location indirect; if it was _already_ + // indirect though, we need to add extra indirection. See this test for + // a scenario where this happens: + // llvm/test/DebugInfo/X86/spill-nontrivial-param.ll + if (Properties.Indirect) { + std::vector<uint64_t> Elts = {dwarf::DW_OP_deref}; + Expr = DIExpression::append(Expr, Elts); + } } else { // This is a stack location with a weird subregister offset: emit an undef // DBG_VALUE instead. @@ -1288,6 +1296,24 @@ void InstrRefBasedLDV::transferRegisterDef(MachineInstr &MI) { } else if (MI.isMetaInstruction()) return; + // We always ignore SP defines on call instructions, they don't actually + // change the value of the stack pointer... except for win32's _chkstk. This + // is rare: filter quickly for the common case (no stack adjustments, not a + // call, etc). If it is a call that modifies SP, recognise the SP register + // defs. + bool CallChangesSP = false; + if (AdjustsStackInCalls && MI.isCall() && MI.getOperand(0).isSymbol() && + !strcmp(MI.getOperand(0).getSymbolName(), StackProbeSymbolName.data())) + CallChangesSP = true; + + // Test whether we should ignore a def of this register due to it being part + // of the stack pointer. + auto IgnoreSPAlias = [this, &MI, CallChangesSP](Register R) -> bool { + if (CallChangesSP) + return false; + return MI.isCall() && MTracker->SPAliases.count(R); + }; + // Find the regs killed by MI, and find regmasks of preserved regs. // Max out the number of statically allocated elements in `DeadRegs`, as this // prevents fallback to std::set::count() operations. @@ -1298,7 +1324,7 @@ void InstrRefBasedLDV::transferRegisterDef(MachineInstr &MI) { // Determine whether the operand is a register def. if (MO.isReg() && MO.isDef() && MO.getReg() && Register::isPhysicalRegister(MO.getReg()) && - !(MI.isCall() && MTracker->SPAliases.count(MO.getReg()))) { + !IgnoreSPAlias(MO.getReg())) { // Remove ranges of all aliased registers. for (MCRegAliasIterator RAI(MO.getReg(), TRI, true); RAI.isValid(); ++RAI) // FIXME: Can we break out of this loop early if no insertion occurs? @@ -1347,6 +1373,9 @@ void InstrRefBasedLDV::transferRegisterDef(MachineInstr &MI) { continue; Register Reg = MTracker->LocIdxToLocID[L.Idx]; + if (IgnoreSPAlias(Reg)) + continue; + for (auto *MO : RegMaskPtrs) if (MO->clobbersPhysReg(Reg)) TTracker->clobberMloc(L.Idx, MI.getIterator(), false); @@ -1628,9 +1657,10 @@ bool InstrRefBasedLDV::transferRegisterCopy(MachineInstr &MI) { /// fragments of that DILocalVariable which overlap. This reduces work during /// the data-flow stage from "Find any overlapping fragments" to "Check if the /// known-to-overlap fragments are present". -/// \param MI A previously unprocessed DEBUG_VALUE instruction to analyze for +/// \param MI A previously unprocessed debug instruction to analyze for /// fragment usage. void InstrRefBasedLDV::accumulateFragmentMap(MachineInstr &MI) { + assert(MI.isDebugValue() || MI.isDebugRef()); DebugVariable MIVar(MI.getDebugVariable(), MI.getDebugExpression(), MI.getDebugLoc()->getInlinedAt()); FragmentInfo ThisFragment = MIVar.getFragmentOrDefault(); @@ -1732,7 +1762,7 @@ void InstrRefBasedLDV::produceMLocTransferFunction( for (auto &MI : MBB) { process(MI); // Also accumulate fragment map. - if (MI.isDebugValue()) + if (MI.isDebugValue() || MI.isDebugRef()) accumulateFragmentMap(MI); // Create a map from the instruction number (if present) to the @@ -2322,15 +2352,8 @@ Optional<ValueIDNum> InstrRefBasedLDV::pickVPHILoc( bool InstrRefBasedLDV::vlocJoin( MachineBasicBlock &MBB, LiveIdxT &VLOCOutLocs, - SmallPtrSet<const MachineBasicBlock *, 8> &InScopeBlocks, SmallPtrSet<const MachineBasicBlock *, 8> &BlocksToExplore, DbgValue &LiveIn) { - // To emulate VarLocBasedImpl, process this block if it's not in scope but - // _does_ assign a variable value. No live-ins for this scope are transferred - // in though, so we can return immediately. - if (InScopeBlocks.count(&MBB) == 0 && !ArtificialBlocks.count(&MBB)) - return false; - LLVM_DEBUG(dbgs() << "join MBB: " << MBB.getNumber() << "\n"); bool Changed = false; @@ -2466,11 +2489,10 @@ void InstrRefBasedLDV::buildVLocValueMap(const DILocation *DILoc, // "blocks that are potentially in scope. See comment at start of vlocJoin. SmallPtrSet<const MachineBasicBlock *, 8> InScopeBlocks = BlocksToExplore; - // Old LiveDebugValues tracks variable locations that come out of blocks - // not in scope, where DBG_VALUEs occur. This is something we could - // legitimately ignore, but lets allow it for now. - if (EmulateOldLDV) - BlocksToExplore.insert(AssignBlocks.begin(), AssignBlocks.end()); + // VarLoc LiveDebugValues tracks variable locations that are defined in + // blocks not in scope. This is something we could legitimately ignore, but + // lets allow it for now for the sake of coverage. + BlocksToExplore.insert(AssignBlocks.begin(), AssignBlocks.end()); // We also need to propagate variable values through any artificial blocks // that immediately follow blocks in scope. @@ -2635,7 +2657,7 @@ void InstrRefBasedLDV::buildVLocValueMap(const DILocation *DILoc, // Join values from predecessors. Updates LiveInIdx, and writes output // into JoinedInLocs. bool InLocsChanged = - vlocJoin(*MBB, LiveOutIdx, InScopeBlocks, BlocksToExplore, *LiveIn); + vlocJoin(*MBB, LiveOutIdx, BlocksToExplore, *LiveIn); SmallVector<const MachineBasicBlock *, 8> Preds; for (const auto *Pred : MBB->predecessors()) @@ -2730,6 +2752,8 @@ void InstrRefBasedLDV::buildVLocValueMap(const DILocation *DILoc, continue; if (BlockLiveIn->Kind == DbgValue::VPHI) BlockLiveIn->Kind = DbgValue::Def; + assert(BlockLiveIn->Properties.DIExpr->getFragmentInfo() == + Var.getFragment() && "Fragment info missing during value prop"); Output[MBB->getNumber()].push_back(std::make_pair(Var, *BlockLiveIn)); } } // Per-variable loop. @@ -2879,6 +2903,12 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF, MFI = &MF.getFrameInfo(); LS.initialize(MF); + const auto &STI = MF.getSubtarget(); + AdjustsStackInCalls = MFI->adjustsStack() && + STI.getFrameLowering()->stackProbeFunctionModifiesSP(); + if (AdjustsStackInCalls) + StackProbeSymbolName = STI.getTargetLowering()->getStackProbeSymbolName(MF); + MTracker = new MLocTracker(MF, *TII, *TRI, *MF.getSubtarget().getTargetLowering()); VTracker = nullptr; @@ -2895,7 +2925,7 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF, ++MaxNumBlocks; MLocTransfer.resize(MaxNumBlocks); - vlocs.resize(MaxNumBlocks); + vlocs.resize(MaxNumBlocks, VLocTracker(OverlapFragments, EmptyExpr)); SavedLiveIns.resize(MaxNumBlocks); initialSetup(MF); @@ -3040,6 +3070,8 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF, BBNumToRPO.clear(); DebugInstrNumToInstr.clear(); DebugPHINumToValue.clear(); + OverlapFragments.clear(); + SeenFragments.clear(); return Changed; } diff --git a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h index d96ef6d4f6e5..789205e61cdb 100644 --- a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h +++ b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h @@ -655,6 +655,14 @@ public: const DbgValueProperties &Properties); }; +/// Types for recording sets of variable fragments that overlap. For a given +/// local variable, we record all other fragments of that variable that could +/// overlap it, to reduce search time. +using FragmentOfVar = + std::pair<const DILocalVariable *, DIExpression::FragmentInfo>; +using OverlapMap = + DenseMap<FragmentOfVar, SmallVector<DIExpression::FragmentInfo, 1>>; + /// Collection of DBG_VALUEs observed when traversing a block. Records each /// variable and the value the DBG_VALUE refers to. Requires the machine value /// location dataflow algorithm to have run already, so that values can be @@ -672,9 +680,12 @@ public: MapVector<DebugVariable, DbgValue> Vars; DenseMap<DebugVariable, const DILocation *> Scopes; MachineBasicBlock *MBB = nullptr; + const OverlapMap &OverlappingFragments; + DbgValueProperties EmptyProperties; public: - VLocTracker() {} + VLocTracker(const OverlapMap &O, const DIExpression *EmptyExpr) + : OverlappingFragments(O), EmptyProperties(EmptyExpr, false) {} void defVar(const MachineInstr &MI, const DbgValueProperties &Properties, Optional<ValueIDNum> ID) { @@ -689,6 +700,8 @@ public: if (!Result.second) Result.first->second = Rec; Scopes[Var] = MI.getDebugLoc().get(); + + considerOverlaps(Var, MI.getDebugLoc().get()); } void defVar(const MachineInstr &MI, const MachineOperand &MO) { @@ -704,16 +717,37 @@ public: if (!Result.second) Result.first->second = Rec; Scopes[Var] = MI.getDebugLoc().get(); + + considerOverlaps(Var, MI.getDebugLoc().get()); } -}; -/// Types for recording sets of variable fragments that overlap. For a given -/// local variable, we record all other fragments of that variable that could -/// overlap it, to reduce search time. -using FragmentOfVar = - std::pair<const DILocalVariable *, DIExpression::FragmentInfo>; -using OverlapMap = - DenseMap<FragmentOfVar, SmallVector<DIExpression::FragmentInfo, 1>>; + void considerOverlaps(const DebugVariable &Var, const DILocation *Loc) { + auto Overlaps = OverlappingFragments.find( + {Var.getVariable(), Var.getFragmentOrDefault()}); + if (Overlaps == OverlappingFragments.end()) + return; + + // Otherwise: terminate any overlapped variable locations. + for (auto FragmentInfo : Overlaps->second) { + // The "empty" fragment is stored as DebugVariable::DefaultFragment, so + // that it overlaps with everything, however its cannonical representation + // in a DebugVariable is as "None". + Optional<DIExpression::FragmentInfo> OptFragmentInfo = FragmentInfo; + if (DebugVariable::isDefaultFragment(FragmentInfo)) + OptFragmentInfo = None; + + DebugVariable Overlapped(Var.getVariable(), OptFragmentInfo, + Var.getInlinedAt()); + DbgValue Rec = DbgValue(EmptyProperties, DbgValue::Undef); + + // Attempt insertion; overwrite if it's already mapped. + auto Result = Vars.insert(std::make_pair(Overlapped, Rec)); + if (!Result.second) + Result.first->second = Rec; + Scopes[Overlapped] = Loc; + } + } +}; // XXX XXX docs class InstrRefBasedLDV : public LDVImpl { @@ -817,6 +851,16 @@ private: OverlapMap OverlapFragments; VarToFragments SeenFragments; + /// True if we need to examine call instructions for stack clobbers. We + /// normally assume that they don't clobber SP, but stack probes on Windows + /// do. + bool AdjustsStackInCalls = false; + + /// If AdjustsStackInCalls is true, this holds the name of the target's stack + /// probe function, which is the function we expect will alter the stack + /// pointer. + StringRef StackProbeSymbolName; + /// Tests whether this instruction is a spill to a stack slot. bool isSpillInstruction(const MachineInstr &MI, MachineFunction *MF); @@ -962,7 +1006,6 @@ private: /// \returns true if any live-ins change value, either from value propagation /// or PHI elimination. bool vlocJoin(MachineBasicBlock &MBB, LiveIdxT &VLOCOutLocs, - SmallPtrSet<const MachineBasicBlock *, 8> &InScopeBlocks, SmallPtrSet<const MachineBasicBlock *, 8> &BlocksToExplore, DbgValue &LiveIn); diff --git a/llvm/lib/CodeGen/LiveDebugVariables.cpp b/llvm/lib/CodeGen/LiveDebugVariables.cpp index dcd546f9c6db..5f976bf43c5b 100644 --- a/llvm/lib/CodeGen/LiveDebugVariables.cpp +++ b/llvm/lib/CodeGen/LiveDebugVariables.cpp @@ -1875,34 +1875,57 @@ void LDVImpl::emitDebugValues(VirtRegMap *VRM) { LLVM_DEBUG(dbgs() << "********** EMITTING INSTR REFERENCES **********\n"); - // Re-insert any debug instrs back in the position they were. Ordering - // is preserved by vector. We must re-insert in the same order to ensure that - // debug instructions don't swap, which could re-order assignments. - for (auto &P : StashedDebugInstrs) { - SlotIndex Idx = P.Idx; + // Re-insert any debug instrs back in the position they were. We must + // re-insert in the same order to ensure that debug instructions don't swap, + // which could re-order assignments. Do so in a batch -- once we find the + // insert position, insert all instructions at the same SlotIdx. They are + // guaranteed to appear in-sequence in StashedDebugInstrs because we insert + // them in order. + for (auto StashIt = StashedDebugInstrs.begin(); + StashIt != StashedDebugInstrs.end(); ++StashIt) { + SlotIndex Idx = StashIt->Idx; + MachineBasicBlock *MBB = StashIt->MBB; + MachineInstr *MI = StashIt->MI; + + auto EmitInstsHere = [this, &StashIt, MBB, Idx, + MI](MachineBasicBlock::iterator InsertPos) { + // Insert this debug instruction. + MBB->insert(InsertPos, MI); + + // Look at subsequent stashed debug instructions: if they're at the same + // index, insert those too. + auto NextItem = std::next(StashIt); + while (NextItem != StashedDebugInstrs.end() && NextItem->Idx == Idx) { + assert(NextItem->MBB == MBB && "Instrs with same slot index should be" + "in the same block"); + MBB->insert(InsertPos, NextItem->MI); + StashIt = NextItem; + NextItem = std::next(StashIt); + }; + }; // Start block index: find the first non-debug instr in the block, and // insert before it. - if (Idx == Slots->getMBBStartIdx(P.MBB)) { + if (Idx == Slots->getMBBStartIdx(MBB)) { MachineBasicBlock::iterator InsertPos = - findInsertLocation(P.MBB, Idx, *LIS, BBSkipInstsMap); - P.MBB->insert(InsertPos, P.MI); + findInsertLocation(MBB, Idx, *LIS, BBSkipInstsMap); + EmitInstsHere(InsertPos); continue; } if (MachineInstr *Pos = Slots->getInstructionFromIndex(Idx)) { // Insert at the end of any debug instructions. auto PostDebug = std::next(Pos->getIterator()); - PostDebug = skipDebugInstructionsForward(PostDebug, P.MBB->instr_end()); - P.MBB->insert(PostDebug, P.MI); + PostDebug = skipDebugInstructionsForward(PostDebug, MBB->instr_end()); + EmitInstsHere(PostDebug); } else { // Insert position disappeared; walk forwards through slots until we // find a new one. - SlotIndex End = Slots->getMBBEndIdx(P.MBB); + SlotIndex End = Slots->getMBBEndIdx(MBB); for (; Idx < End; Idx = Slots->getNextNonNullIndex(Idx)) { Pos = Slots->getInstructionFromIndex(Idx); if (Pos) { - P.MBB->insert(Pos->getIterator(), P.MI); + EmitInstsHere(Pos->getIterator()); break; } } @@ -1911,8 +1934,8 @@ void LDVImpl::emitDebugValues(VirtRegMap *VRM) { // insert! It's not safe to discard any debug instructions; place them // in front of the first terminator, or in front of end(). if (Idx >= End) { - auto TermIt = P.MBB->getFirstTerminator(); - P.MBB->insert(TermIt, P.MI); + auto TermIt = MBB->getFirstTerminator(); + EmitInstsHere(TermIt); } } } diff --git a/llvm/lib/CodeGen/LiveRangeEdit.cpp b/llvm/lib/CodeGen/LiveRangeEdit.cpp index d91ff734ad8f..6380c4bfd6e6 100644 --- a/llvm/lib/CodeGen/LiveRangeEdit.cpp +++ b/llvm/lib/CodeGen/LiveRangeEdit.cpp @@ -108,8 +108,7 @@ bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI, SlotIndex UseIdx) const { OrigIdx = OrigIdx.getRegSlot(true); UseIdx = std::max(UseIdx, UseIdx.getRegSlot(true)); - for (unsigned i = 0, e = OrigMI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = OrigMI->getOperand(i); + for (const MachineOperand &MO : OrigMI->operands()) { if (!MO.isReg() || !MO.getReg() || !MO.readsReg()) continue; @@ -425,15 +424,8 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr *> &Dead, // The new intervals would have to be spilled anyway so its not worth it. // Also they currently aren't spilled so creating them and not spilling // them results in incorrect code. - bool BeingSpilled = false; - for (unsigned i = 0, e = RegsBeingSpilled.size(); i != e; ++i) { - if (VReg == RegsBeingSpilled[i]) { - BeingSpilled = true; - break; - } - } - - if (BeingSpilled) continue; + if (llvm::is_contained(RegsBeingSpilled, VReg)) + continue; // LI may have been separated, create new intervals. LI->RenumberValues(); diff --git a/llvm/lib/CodeGen/LiveRangeUtils.h b/llvm/lib/CodeGen/LiveRangeUtils.h index dace05f1ad95..ada5c5be484a 100644 --- a/llvm/lib/CodeGen/LiveRangeUtils.h +++ b/llvm/lib/CodeGen/LiveRangeUtils.h @@ -18,7 +18,7 @@ namespace llvm { /// Helper function that distributes live range value numbers and the -/// corresponding segments of a master live range \p LR to a list of newly +/// corresponding segments of a primary live range \p LR to a list of newly /// created live ranges \p SplitLRs. \p VNIClasses maps each value number in \p /// LR to 0 meaning it should stay or to 1..N meaning it should go to a specific /// live range in the \p SplitLRs array. diff --git a/llvm/lib/CodeGen/LiveVariables.cpp b/llvm/lib/CodeGen/LiveVariables.cpp index 51ba4b7e53eb..e8744797707b 100644 --- a/llvm/lib/CodeGen/LiveVariables.cpp +++ b/llvm/lib/CodeGen/LiveVariables.cpp @@ -58,9 +58,9 @@ void LiveVariables::getAnalysisUsage(AnalysisUsage &AU) const { MachineInstr * LiveVariables::VarInfo::findKill(const MachineBasicBlock *MBB) const { - for (unsigned i = 0, e = Kills.size(); i != e; ++i) - if (Kills[i]->getParent() == MBB) - return Kills[i]; + for (MachineInstr *MI : Kills) + if (MI->getParent() == MBB) + return MI; return nullptr; } @@ -811,8 +811,8 @@ bool LiveVariables::isLiveOut(Register Reg, const MachineBasicBlock &MBB) { LiveVariables::VarInfo &VI = getVarInfo(Reg); SmallPtrSet<const MachineBasicBlock *, 8> Kills; - for (unsigned i = 0, e = VI.Kills.size(); i != e; ++i) - Kills.insert(VI.Kills[i]->getParent()); + for (MachineInstr *MI : VI.Kills) + Kills.insert(MI->getParent()); // Loop over all of the successors of the basic block, checking to see if // the value is either live in the block, or if it is killed in the block. diff --git a/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp b/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp index 2e99c8595cbd..ee2387d1e8e6 100644 --- a/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp +++ b/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp @@ -316,14 +316,14 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { // than that, but the increased register pressure makes that a // tricky thing to balance. Investigate if re-materializing these // becomes an issue. - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + for (const MachineOperand &MO : MI.operands()) { // Consider replacing all frame index operands that reference // an object allocated in the local block. - if (MI.getOperand(i).isFI()) { + if (MO.isFI()) { // Don't try this with values not in the local block. - if (!MFI.isObjectPreAllocated(MI.getOperand(i).getIndex())) + if (!MFI.isObjectPreAllocated(MO.getIndex())) break; - int Idx = MI.getOperand(i).getIndex(); + int Idx = MO.getIndex(); int64_t LocalOffset = LocalOffsets[Idx]; if (!TRI->needsFrameBaseReg(&MI, LocalOffset)) break; diff --git a/llvm/lib/CodeGen/MIRSampleProfile.cpp b/llvm/lib/CodeGen/MIRSampleProfile.cpp index 90ecc6fc68fc..b742ad9823c9 100644 --- a/llvm/lib/CodeGen/MIRSampleProfile.cpp +++ b/llvm/lib/CodeGen/MIRSampleProfile.cpp @@ -314,6 +314,8 @@ bool MIRProfileLoaderPass::runOnMachineFunction(MachineFunction &MF) { } bool Changed = MIRSampleLoader->runOnFunction(MF); + if (Changed) + MBFI->calculate(MF, *MBFI->getMBPI(), *&getAnalysis<MachineLoopInfo>()); if (ViewBFIAfter && ViewBlockLayoutWithBFI != GVDT_None && (ViewBlockFreqFuncName.empty() || diff --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp index 366d06871245..310c2721c3bd 100644 --- a/llvm/lib/CodeGen/MachineFunction.cpp +++ b/llvm/lib/CodeGen/MachineFunction.cpp @@ -1170,9 +1170,10 @@ auto MachineFunction::salvageCopySSA(MachineInstr &MI) void MachineFunction::finalizeDebugInstrRefs() { auto *TII = getSubtarget().getInstrInfo(); - auto MakeDbgValue = [&](MachineInstr &MI) { + auto MakeUndefDbgValue = [&](MachineInstr &MI) { const MCInstrDesc &RefII = TII->get(TargetOpcode::DBG_VALUE); MI.setDesc(RefII); + MI.getOperand(0).setReg(0); MI.getOperand(1).ChangeToRegister(0, false); }; @@ -1187,15 +1188,15 @@ void MachineFunction::finalizeDebugInstrRefs() { Register Reg = MI.getOperand(0).getReg(); // Some vregs can be deleted as redundant in the meantime. Mark those - // as DBG_VALUE $noreg. - if (Reg == 0) { - MakeDbgValue(MI); + // as DBG_VALUE $noreg. Additionally, some normal instructions are + // quickly deleted, leaving dangling references to vregs with no def. + if (Reg == 0 || !RegInfo->hasOneDef(Reg)) { + MakeUndefDbgValue(MI); continue; } assert(Reg.isVirtual()); MachineInstr &DefMI = *RegInfo->def_instr_begin(Reg); - assert(RegInfo->hasOneDef(Reg)); // If we've found a copy-like instruction, follow it back to the // instruction that defines the source value, see salvageCopySSA docs @@ -1327,9 +1328,9 @@ bool MachineJumpTableInfo::ReplaceMBBInJumpTable(unsigned Idx, assert(Old != New && "Not making a change?"); bool MadeChange = false; MachineJumpTableEntry &JTE = JumpTables[Idx]; - for (size_t j = 0, e = JTE.MBBs.size(); j != e; ++j) - if (JTE.MBBs[j] == Old) { - JTE.MBBs[j] = New; + for (MachineBasicBlock *&MBB : JTE.MBBs) + if (MBB == Old) { + MBB = New; MadeChange = true; } return MadeChange; @@ -1342,8 +1343,8 @@ void MachineJumpTableInfo::print(raw_ostream &OS) const { for (unsigned i = 0, e = JumpTables.size(); i != e; ++i) { OS << printJumpTableEntryReference(i) << ':'; - for (unsigned j = 0, f = JumpTables[i].MBBs.size(); j != f; ++j) - OS << ' ' << printMBBReference(*JumpTables[i].MBBs[j]); + for (const MachineBasicBlock *MBB : JumpTables[i].MBBs) + OS << ' ' << printMBBReference(*MBB); if (i != e) OS << '\n'; } diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp index 5c4f75e9ceb9..aaa80432d2f2 100644 --- a/llvm/lib/CodeGen/MachineInstr.cpp +++ b/llvm/lib/CodeGen/MachineInstr.cpp @@ -1490,12 +1490,10 @@ bool MachineInstr::allDefsAreDead() const { /// instruction to this instruction. void MachineInstr::copyImplicitOps(MachineFunction &MF, const MachineInstr &MI) { - for (unsigned i = MI.getDesc().getNumOperands(), e = MI.getNumOperands(); - i != e; ++i) { - const MachineOperand &MO = MI.getOperand(i); + for (const MachineOperand &MO : + llvm::drop_begin(MI.operands(), MI.getDesc().getNumOperands())) if ((MO.isReg() && MO.isImplicit()) || MO.isRegMask()) addOperand(MF, MO); - } } bool MachineInstr::hasComplexRegisterTies() const { diff --git a/llvm/lib/CodeGen/MachineOperand.cpp b/llvm/lib/CodeGen/MachineOperand.cpp index 4d080e1a4f82..680dbe54ffaf 100644 --- a/llvm/lib/CodeGen/MachineOperand.cpp +++ b/llvm/lib/CodeGen/MachineOperand.cpp @@ -1071,7 +1071,9 @@ void MachineMemOperand::refineAlignment(const MachineMemOperand *MMO) { // The Value and Offset may differ due to CSE. But the flags and size // should be the same. assert(MMO->getFlags() == getFlags() && "Flags mismatch!"); - assert(MMO->getSize() == getSize() && "Size mismatch!"); + assert((MMO->getSize() == ~UINT64_C(0) || getSize() == ~UINT64_C(0) || + MMO->getSize() == getSize()) && + "Size mismatch!"); if (MMO->getBaseAlign() >= getBaseAlign()) { // Update the alignment value. diff --git a/llvm/lib/CodeGen/MachineOutliner.cpp b/llvm/lib/CodeGen/MachineOutliner.cpp index cfbccebaff3e..7783b5e0d3cc 100644 --- a/llvm/lib/CodeGen/MachineOutliner.cpp +++ b/llvm/lib/CodeGen/MachineOutliner.cpp @@ -617,20 +617,11 @@ MachineFunction *MachineOutliner::createOutlinedFunction( F->addFnAttr(Attribute::OptimizeForSize); F->addFnAttr(Attribute::MinSize); - // Include target features from an arbitrary candidate for the outlined - // function. This makes sure the outlined function knows what kinds of - // instructions are going into it. This is fine, since all parent functions - // must necessarily support the instructions that are in the outlined region. Candidate &FirstCand = OF.Candidates.front(); - const Function &ParentFn = FirstCand.getMF()->getFunction(); - if (ParentFn.hasFnAttribute("target-features")) - F->addFnAttr(ParentFn.getFnAttribute("target-features")); + const TargetInstrInfo &TII = + *FirstCand.getMF()->getSubtarget().getInstrInfo(); - // Set nounwind, so we don't generate eh_frame. - if (llvm::all_of(OF.Candidates, [](const outliner::Candidate &C) { - return C.getMF()->getFunction().hasFnAttribute(Attribute::NoUnwind); - })) - F->addFnAttr(Attribute::NoUnwind); + TII.mergeOutliningCandidateAttributes(*F, OF.Candidates); BasicBlock *EntryBB = BasicBlock::Create(C, "entry", F); IRBuilder<> Builder(EntryBB); @@ -639,8 +630,6 @@ MachineFunction *MachineOutliner::createOutlinedFunction( MachineModuleInfo &MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI(); MachineFunction &MF = MMI.getOrCreateMachineFunction(*F); MachineBasicBlock &MBB = *MF.CreateMachineBasicBlock(); - const TargetSubtargetInfo &STI = MF.getSubtarget(); - const TargetInstrInfo &TII = *STI.getInstrInfo(); // Insert the new function into the module. MF.insert(MF.begin(), &MBB); diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp index e18318386def..8d6459a627fa 100644 --- a/llvm/lib/CodeGen/MachinePipeliner.cpp +++ b/llvm/lib/CodeGen/MachinePipeliner.cpp @@ -1455,17 +1455,15 @@ void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) { int asap = 0; int zeroLatencyDepth = 0; SUnit *SU = &SUnits[I]; - for (SUnit::const_pred_iterator IP = SU->Preds.begin(), - EP = SU->Preds.end(); - IP != EP; ++IP) { - SUnit *pred = IP->getSUnit(); - if (IP->getLatency() == 0) + for (const SDep &P : SU->Preds) { + SUnit *pred = P.getSUnit(); + if (P.getLatency() == 0) zeroLatencyDepth = std::max(zeroLatencyDepth, getZeroLatencyDepth(pred) + 1); - if (ignoreDependence(*IP, true)) + if (ignoreDependence(P, true)) continue; - asap = std::max(asap, (int)(getASAP(pred) + IP->getLatency() - - getDistance(pred, SU, *IP) * MII)); + asap = std::max(asap, (int)(getASAP(pred) + P.getLatency() - + getDistance(pred, SU, P) * MII)); } maxASAP = std::max(maxASAP, asap); ScheduleInfo[I].ASAP = asap; @@ -1521,9 +1519,8 @@ static bool pred_L(SetVector<SUnit *> &NodeOrder, SmallSetVector<SUnit *, 8> &Preds, const NodeSet *S = nullptr) { Preds.clear(); - for (SetVector<SUnit *>::iterator I = NodeOrder.begin(), E = NodeOrder.end(); - I != E; ++I) { - for (const SDep &Pred : (*I)->Preds) { + for (const SUnit *SU : NodeOrder) { + for (const SDep &Pred : SU->Preds) { if (S && S->count(Pred.getSUnit()) == 0) continue; if (ignoreDependence(Pred, true)) @@ -1532,7 +1529,7 @@ static bool pred_L(SetVector<SUnit *> &NodeOrder, Preds.insert(Pred.getSUnit()); } // Back-edges are predecessors with an anti-dependence. - for (const SDep &Succ : (*I)->Succs) { + for (const SDep &Succ : SU->Succs) { if (Succ.getKind() != SDep::Anti) continue; if (S && S->count(Succ.getSUnit()) == 0) @@ -2546,8 +2543,7 @@ void SMSchedule::orderDependence(SwingSchedulerDAG *SSD, SUnit *SU, unsigned Pos = 0; for (std::deque<SUnit *>::iterator I = Insts.begin(), E = Insts.end(); I != E; ++I, ++Pos) { - for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) { - MachineOperand &MO = MI->getOperand(i); + for (MachineOperand &MO : MI->operands()) { if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg())) continue; diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp index 30745c7a5583..54c478645dcf 100644 --- a/llvm/lib/CodeGen/MachineSink.cpp +++ b/llvm/lib/CodeGen/MachineSink.cpp @@ -596,8 +596,7 @@ bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr &MI, // MI is cheap, we probably don't want to break the critical edge for it. // However, if this would allow some definitions of its source operands // to be sunk then it's probably worth it. - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI.getOperand(i); + for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg() || !MO.isUse()) continue; Register Reg = MO.getReg(); @@ -789,8 +788,7 @@ bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI, // If this instruction is inside a loop and sinking this instruction can make // more registers live range shorten, it is still prifitable. - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI.getOperand(i); + for (const MachineOperand &MO : MI.operands()) { // Ignore non-register operands. if (!MO.isReg()) continue; @@ -889,8 +887,7 @@ MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB, // SuccToSinkTo - This is the successor to sink this instruction to, once we // decide. MachineBasicBlock *SuccToSinkTo = nullptr; - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI.getOperand(i); + for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg()) continue; // Ignore non-register operands. Register Reg = MO.getReg(); @@ -1322,8 +1319,7 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore, // If the instruction to move defines a dead physical register which is live // when leaving the basic block, don't move it because it could turn into a // "zombie" define of that preg. E.g., EFLAGS. (<rdar://problem/8030636>) - for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) { - const MachineOperand &MO = MI.getOperand(I); + for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg() || MO.isUse()) continue; Register Reg = MO.getReg(); diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp index d6bb3e7c9e58..32078db76cf3 100644 --- a/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/llvm/lib/CodeGen/MachineVerifier.cpp @@ -1276,11 +1276,9 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { if (DstTy.getNumElements() != MI->getNumOperands() - 1) report("G_BUILD_VECTOR must have an operand for each elemement", MI); - for (unsigned i = 2; i < MI->getNumOperands(); ++i) { - if (MRI->getType(MI->getOperand(1).getReg()) != - MRI->getType(MI->getOperand(i).getReg())) + for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 2)) + if (MRI->getType(MI->getOperand(1).getReg()) != MRI->getType(MO.getReg())) report("G_BUILD_VECTOR source operand types are not homogeneous", MI); - } break; } @@ -1292,12 +1290,10 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { if (!DstTy.isVector() || SrcEltTy.isVector()) report("G_BUILD_VECTOR_TRUNC must produce a vector from scalar operands", MI); - for (unsigned i = 2; i < MI->getNumOperands(); ++i) { - if (MRI->getType(MI->getOperand(1).getReg()) != - MRI->getType(MI->getOperand(i).getReg())) + for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 2)) + if (MRI->getType(MI->getOperand(1).getReg()) != MRI->getType(MO.getReg())) report("G_BUILD_VECTOR_TRUNC source operand types are not homogeneous", MI); - } if (SrcEltTy.getSizeInBits() <= DstTy.getElementType().getSizeInBits()) report("G_BUILD_VECTOR_TRUNC source operand types are not larger than " "dest elt type", @@ -1316,11 +1312,9 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { if (MI->getNumOperands() < 3) report("G_CONCAT_VECTOR requires at least 2 source operands", MI); - for (unsigned i = 2; i < MI->getNumOperands(); ++i) { - if (MRI->getType(MI->getOperand(1).getReg()) != - MRI->getType(MI->getOperand(i).getReg())) + for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 2)) + if (MRI->getType(MI->getOperand(1).getReg()) != MRI->getType(MO.getReg())) report("G_CONCAT_VECTOR source operand types are not homogeneous", MI); - } if (DstTy.getNumElements() != SrcTy.getNumElements() * (MI->getNumOperands() - 1)) report("G_CONCAT_VECTOR num dest and source elements should match", MI); @@ -3063,9 +3057,9 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, SlotIndex PEnd = LiveInts->getMBBEndIdx(Pred); // Predecessor of landing pad live-out on last call. if (MFI->isEHPad()) { - for (auto I = Pred->rbegin(), E = Pred->rend(); I != E; ++I) { - if (I->isCall()) { - PEnd = Indexes->getInstructionIndex(*I).getBoundaryIndex(); + for (const MachineInstr &MI : llvm::reverse(*Pred)) { + if (MI.isCall()) { + PEnd = Indexes->getInstructionIndex(MI).getBoundaryIndex(); break; } } diff --git a/llvm/lib/CodeGen/ModuloSchedule.cpp b/llvm/lib/CodeGen/ModuloSchedule.cpp index 8b3cdfab4d42..aaa6403cc978 100644 --- a/llvm/lib/CodeGen/ModuloSchedule.cpp +++ b/llvm/lib/CodeGen/ModuloSchedule.cpp @@ -73,8 +73,7 @@ void ModuloScheduleExpander::expand() { // stage difference for each use. Keep the maximum value. for (MachineInstr *MI : Schedule.getInstructions()) { int DefStage = Schedule.getStage(MI); - for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) { - MachineOperand &Op = MI->getOperand(i); + for (const MachineOperand &Op : MI->operands()) { if (!Op.isReg() || !Op.isDef()) continue; @@ -1006,8 +1005,7 @@ void ModuloScheduleExpander::updateInstruction(MachineInstr *NewMI, unsigned CurStageNum, unsigned InstrStageNum, ValueMapTy *VRMap) { - for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = NewMI->getOperand(i); + for (MachineOperand &MO : NewMI->operands()) { if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg())) continue; Register reg = MO.getReg(); diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp index 9a4f70a6070f..29a88480fd9f 100644 --- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp +++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp @@ -527,9 +527,9 @@ static void updateLiveness(MachineFunction &MF) { const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); - for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + for (const CalleeSavedInfo &I : CSI) { for (MachineBasicBlock *MBB : Visited) { - MCPhysReg Reg = CSI[i].getReg(); + MCPhysReg Reg = I.getReg(); // Add the callee-saved register as live-in. // It's killed at the spill. if (!MRI.isReserved(Reg) && !MBB->isLiveIn(Reg)) @@ -540,17 +540,16 @@ static void updateLiveness(MachineFunction &MF) { // each MBB between the prologue and epilogue so that it is not clobbered // before it is reloaded in the epilogue. The Visited set contains all // blocks outside of the region delimited by prologue/epilogue. - if (CSI[i].isSpilledToReg()) { + if (I.isSpilledToReg()) { for (MachineBasicBlock &MBB : MF) { if (Visited.count(&MBB)) continue; - MCPhysReg DstReg = CSI[i].getDstReg(); + MCPhysReg DstReg = I.getDstReg(); if (!MBB.isLiveIn(DstReg)) MBB.addLiveIn(DstReg); } } } - } /// Insert restore code for the callee-saved registers used in the function. @@ -902,9 +901,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) { // incoming stack pointer if a frame pointer is required and is closer // to the incoming rather than the final stack pointer. const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); - bool EarlyScavengingSlots = (TFI.hasFP(MF) && TFI.isFPCloseToIncomingSP() && - RegInfo->useFPForScavengingIndex(MF) && - !RegInfo->hasStackRealignment(MF)); + bool EarlyScavengingSlots = TFI.allocateScavengingFrameIndexesNearIncomingSP(MF); if (RS && EarlyScavengingSlots) { SmallVector<int, 2> SFIs; RS->getScavengingFrameIndices(SFIs); diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp index 68920e2e50df..6653145d3d2a 100644 --- a/llvm/lib/CodeGen/RegAllocFast.cpp +++ b/llvm/lib/CodeGen/RegAllocFast.cpp @@ -1258,8 +1258,7 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) { // Free registers occupied by defs. // Iterate operands in reverse order, so we see the implicit super register // defs first (we added them earlier in case of <def,read-undef>). - for (unsigned I = MI.getNumOperands(); I-- > 0;) { - MachineOperand &MO = MI.getOperand(I); + for (MachineOperand &MO : llvm::reverse(MI.operands())) { if (!MO.isReg() || !MO.isDef()) continue; @@ -1362,8 +1361,7 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) { // Free early clobbers. if (HasEarlyClobber) { - for (unsigned I = MI.getNumOperands(); I-- > 0; ) { - MachineOperand &MO = MI.getOperand(I); + for (MachineOperand &MO : llvm::reverse(MI.operands())) { if (!MO.isReg() || !MO.isDef() || !MO.isEarlyClobber()) continue; // subreg defs don't free the full register. We left the subreg number @@ -1440,8 +1438,7 @@ void RegAllocFast::handleBundle(MachineInstr &MI) { MachineBasicBlock::instr_iterator BundledMI = MI.getIterator(); ++BundledMI; while (BundledMI->isBundledWithPred()) { - for (unsigned I = 0; I < BundledMI->getNumOperands(); ++I) { - MachineOperand &MO = BundledMI->getOperand(I); + for (MachineOperand &MO : BundledMI->operands()) { if (!MO.isReg()) continue; diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp index 5a93b58e0baf..50411c177007 100644 --- a/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -199,7 +199,8 @@ class RAGreedy : public MachineFunctionPass, struct RegInfo { LiveRangeStage Stage = RS_New; - // Cascade - Eviction loop prevention. See canEvictInterference(). + // Cascade - Eviction loop prevention. See + // canEvictInterferenceBasedOnCost(). unsigned Cascade = 0; RegInfo() = default; @@ -207,13 +208,51 @@ class RAGreedy : public MachineFunctionPass, IndexedMap<RegInfo, VirtReg2IndexFunctor> ExtraRegInfo; + LiveRangeStage getStage(Register Reg) const { + return ExtraRegInfo[Reg].Stage; + } + LiveRangeStage getStage(const LiveInterval &VirtReg) const { - return ExtraRegInfo[VirtReg.reg()].Stage; + return getStage(VirtReg.reg()); + } + + void setStage(Register Reg, LiveRangeStage Stage) { + ExtraRegInfo.resize(MRI->getNumVirtRegs()); + ExtraRegInfo[Reg].Stage = Stage; } void setStage(const LiveInterval &VirtReg, LiveRangeStage Stage) { + setStage(VirtReg.reg(), Stage); + } + + /// Return the current stage of the register, if present, otherwise initialize + /// it and return that. + LiveRangeStage getOrInitStage(Register Reg) { + ExtraRegInfo.grow(Reg); + return getStage(Reg); + } + + unsigned getCascade(Register Reg) const { return ExtraRegInfo[Reg].Cascade; } + + void setCascade(Register Reg, unsigned Cascade) { ExtraRegInfo.resize(MRI->getNumVirtRegs()); - ExtraRegInfo[VirtReg.reg()].Stage = Stage; + ExtraRegInfo[Reg].Cascade = Cascade; + } + + unsigned getOrAssignNewCascade(Register Reg) { + unsigned Cascade = getCascade(Reg); + if (!Cascade) { + Cascade = NextCascade++; + setCascade(Reg, Cascade); + } + return Cascade; + } + + unsigned getCascadeOrCurrentNext(Register Reg) const { + unsigned Cascade = getCascade(Reg); + if (!Cascade) + Cascade = NextCascade; + return Cascade; } template<typename Iterator> @@ -410,8 +449,11 @@ private: void calcGapWeights(MCRegister, SmallVectorImpl<float> &); Register canReassign(LiveInterval &VirtReg, Register PrevReg) const; bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool) const; - bool canEvictInterference(LiveInterval &, MCRegister, bool, EvictionCost &, - const SmallVirtRegSet &) const; + bool canEvictInterferenceBasedOnCost(LiveInterval &, MCRegister, bool, + EvictionCost &, + const SmallVirtRegSet &) const; + bool canEvictHintInterference(LiveInterval &, MCRegister, + const SmallVirtRegSet &) const; bool canEvictInterferenceInRange(const LiveInterval &VirtReg, MCRegister PhysReg, SlotIndex Start, SlotIndex End, EvictionCost &MaxCost) const; @@ -683,15 +725,16 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) { assert(Reg.isVirtual() && "Can only enqueue virtual registers"); unsigned Prio; - ExtraRegInfo.grow(Reg); - if (ExtraRegInfo[Reg].Stage == RS_New) - ExtraRegInfo[Reg].Stage = RS_Assign; - - if (ExtraRegInfo[Reg].Stage == RS_Split) { + auto Stage = getOrInitStage(Reg); + if (Stage == RS_New) { + Stage = RS_Assign; + setStage(Reg, Stage); + } + if (Stage == RS_Split) { // Unsplit ranges that couldn't be allocated immediately are deferred until // everything else has been allocated. Prio = Size; - } else if (ExtraRegInfo[Reg].Stage == RS_Memory) { + } else if (Stage == RS_Memory) { // Memory operand should be considered last. // Change the priority such that Memory operand are assigned in // the reverse order that they came in. @@ -706,7 +749,7 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) { bool ForceGlobal = !ReverseLocal && (Size / SlotIndex::InstrDist) > (2 * RCI.getNumAllocatableRegs(&RC)); - if (ExtraRegInfo[Reg].Stage == RS_Assign && !ForceGlobal && !LI->empty() && + if (Stage == RS_Assign && !ForceGlobal && !LI->empty() && LIS->intervalIsInOneMBB(*LI)) { // Allocate original local ranges in linear instruction order. Since they // are singly defined, this produces optimal coloring in the absence of @@ -780,10 +823,8 @@ MCRegister RAGreedy::tryAssign(LiveInterval &VirtReg, if (Order.isHint(Hint)) { MCRegister PhysHint = Hint.asMCReg(); LLVM_DEBUG(dbgs() << "missed hint " << printReg(PhysHint, TRI) << '\n'); - EvictionCost MaxCost; - MaxCost.setBrokenHints(1); - if (canEvictInterference(VirtReg, PhysHint, true, MaxCost, - FixedRegisters)) { + + if (canEvictHintInterference(VirtReg, PhysHint, FixedRegisters)) { evictInterference(VirtReg, PhysHint, NewVRegs); return PhysHint; } @@ -864,8 +905,19 @@ bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint, return false; } -/// canEvictInterference - Return true if all interferences between VirtReg and -/// PhysReg can be evicted. +/// canEvictHintInterference - return true if the interference for VirtReg +/// on the PhysReg, which is VirtReg's hint, can be evicted in favor of VirtReg. +bool RAGreedy::canEvictHintInterference( + LiveInterval &VirtReg, MCRegister PhysReg, + const SmallVirtRegSet &FixedRegisters) const { + EvictionCost MaxCost; + MaxCost.setBrokenHints(1); + return canEvictInterferenceBasedOnCost(VirtReg, PhysReg, true, MaxCost, + FixedRegisters); +} + +/// canEvictInterferenceBasedOnCost - Return true if all interferences between +/// VirtReg and PhysReg can be evicted. /// /// @param VirtReg Live range that is about to be assigned. /// @param PhysReg Desired register for assignment. @@ -873,7 +925,7 @@ bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint, /// @param MaxCost Only look for cheaper candidates and update with new cost /// when returning true. /// @returns True when interference can be evicted cheaper than MaxCost. -bool RAGreedy::canEvictInterference( +bool RAGreedy::canEvictInterferenceBasedOnCost( LiveInterval &VirtReg, MCRegister PhysReg, bool IsHint, EvictionCost &MaxCost, const SmallVirtRegSet &FixedRegisters) const { // It is only possible to evict virtual register interference. @@ -1054,9 +1106,7 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, MCRegister PhysReg, // Make sure that VirtReg has a cascade number, and assign that cascade // number to every evicted register. These live ranges than then only be // evicted by a newer cascade, preventing infinite loops. - unsigned Cascade = ExtraRegInfo[VirtReg.reg()].Cascade; - if (!Cascade) - Cascade = ExtraRegInfo[VirtReg.reg()].Cascade = NextCascade++; + unsigned Cascade = getOrAssignNewCascade(VirtReg.reg()); LLVM_DEBUG(dbgs() << "evicting " << printReg(PhysReg, TRI) << " interference: Cascade " << Cascade << '\n'); @@ -1082,10 +1132,10 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, MCRegister PhysReg, LastEvicted.addEviction(PhysReg, VirtReg.reg(), Intf->reg()); Matrix->unassign(*Intf); - assert((ExtraRegInfo[Intf->reg()].Cascade < Cascade || + assert((getCascade(Intf->reg()) < Cascade || VirtReg.isSpillable() < Intf->isSpillable()) && "Cannot decrease cascade number, illegal eviction"); - ExtraRegInfo[Intf->reg()].Cascade = Cascade; + setCascade(Intf->reg(), Cascade); ++NumEvicted; NewVRegs.push_back(Intf->reg()); } @@ -1150,8 +1200,8 @@ MCRegister RAGreedy::tryFindEvictionCandidate( continue; } - if (!canEvictInterference(VirtReg, PhysReg, false, BestCost, - FixedRegisters)) + if (!canEvictInterferenceBasedOnCost(VirtReg, PhysReg, false, BestCost, + FixedRegisters)) continue; // Best so far. @@ -1756,7 +1806,6 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit, SE->finish(&IntvMap); DebugVars->splitRegister(Reg, LREdit.regs(), *LIS); - ExtraRegInfo.resize(MRI->getNumVirtRegs()); unsigned OrigBlocks = SA->getNumLiveBlocks(); // Sort out the new intervals created by splitting. We get four kinds: @@ -1765,10 +1814,10 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit, // - Block-local splits are candidates for local splitting. // - DCE leftovers should go back on the queue. for (unsigned I = 0, E = LREdit.size(); I != E; ++I) { - LiveInterval &Reg = LIS->getInterval(LREdit.get(I)); + const LiveInterval &Reg = LIS->getInterval(LREdit.get(I)); // Ignore old intervals from DCE. - if (getStage(Reg) != RS_New) + if (getOrInitStage(Reg.reg()) != RS_New) continue; // Remainder interval. Don't try splitting again, spill if it doesn't @@ -2012,13 +2061,11 @@ unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order, // Tell LiveDebugVariables about the new ranges. DebugVars->splitRegister(Reg, LREdit.regs(), *LIS); - ExtraRegInfo.resize(MRI->getNumVirtRegs()); - // Sort out the new intervals created by splitting. The remainder interval // goes straight to spilling, the new local ranges get to stay RS_New. for (unsigned I = 0, E = LREdit.size(); I != E; ++I) { - LiveInterval &LI = LIS->getInterval(LREdit.get(I)); - if (getStage(LI) == RS_New && IntvMap[I] == 0) + const LiveInterval &LI = LIS->getInterval(LREdit.get(I)); + if (getOrInitStage(LI.reg()) == RS_New && IntvMap[I] == 0) setStage(LI, RS_Spill); } @@ -2104,8 +2151,6 @@ RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order, SmallVector<unsigned, 8> IntvMap; SE->finish(&IntvMap); DebugVars->splitRegister(VirtReg.reg(), LREdit.regs(), *LIS); - ExtraRegInfo.resize(MRI->getNumVirtRegs()); - // Assign all new registers to RS_Spill. This was the last chance. setStage(LREdit.begin(), LREdit.end(), RS_Spill); return 0; @@ -2400,7 +2445,6 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, SmallVector<unsigned, 8> IntvMap; SE->finish(&IntvMap); DebugVars->splitRegister(VirtReg.reg(), LREdit.regs(), *LIS); - // If the new range has the same number of instructions as before, mark it as // RS_Split2 so the next split will be forced to make progress. Otherwise, // leave the new intervals as RS_New so they can compete. @@ -3021,7 +3065,7 @@ MCRegister RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, LiveRangeStage Stage = getStage(VirtReg); LLVM_DEBUG(dbgs() << StageName[Stage] << " Cascade " - << ExtraRegInfo[VirtReg.reg()].Cascade << '\n'); + << getCascade(VirtReg.reg()) << '\n'); // Try to evict a less worthy live range, but only for ranges from the primary // queue. The RS_Split ranges already failed to do this, and they should not @@ -3311,7 +3355,6 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops)); SE.reset(new SplitEditor(*SA, *AA, *LIS, *VRM, *DomTree, *MBFI, *VRAI)); ExtraRegInfo.clear(); - ExtraRegInfo.resize(MRI->getNumVirtRegs()); NextCascade = 1; IntfCache.init(MF, Matrix->getLiveUnions(), Indexes, LIS, TRI); GlobalCand.resize(32); // This will grow as needed. diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp index c847068bca90..4c8534cf2d01 100644 --- a/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -3908,20 +3908,20 @@ void RegisterCoalescer::lateLiveIntervalUpdate() { bool RegisterCoalescer:: copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList) { bool Progress = false; - for (unsigned i = 0, e = CurrList.size(); i != e; ++i) { - if (!CurrList[i]) + for (MachineInstr *&MI : CurrList) { + if (!MI) continue; // Skip instruction pointers that have already been erased, for example by // dead code elimination. - if (ErasedInstrs.count(CurrList[i])) { - CurrList[i] = nullptr; + if (ErasedInstrs.count(MI)) { + MI = nullptr; continue; } bool Again = false; - bool Success = joinCopy(CurrList[i], Again); + bool Success = joinCopy(MI, Again); Progress |= Success; if (Success || !Again) - CurrList[i] = nullptr; + MI = nullptr; } return Progress; } diff --git a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp index 3f013eb6024e..0e8e8338b46d 100644 --- a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -406,11 +406,10 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) { // register in later operands. The lanes of other defs will now be live // after this instruction, so these should not be treated as killed by the // instruction even though they appear to be killed in this one operand. - for (int I = OperIdx + 1, E = MI->getNumOperands(); I != E; ++I) { - const MachineOperand &OtherMO = MI->getOperand(I); + for (const MachineOperand &OtherMO : + llvm::drop_begin(MI->operands(), OperIdx + 1)) if (OtherMO.isReg() && OtherMO.isDef() && OtherMO.getReg() == Reg) KillLaneMask &= ~getLaneMaskForMO(OtherMO); - } } // Clear undef flag, we'll re-add it later once we know which subregister diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index ce400ea43f29..df5a041b87cd 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -4436,7 +4436,7 @@ SDValue DAGCombiner::visitREM(SDNode *N) { if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) { SDValue OptimizedDiv = isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N); - if (OptimizedDiv.getNode()) { + if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != N) { // If the equivalent Div node also exists, update its users. unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV; if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(), @@ -4464,6 +4464,9 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) { SDLoc DL(N); if (VT.isVector()) { + if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) + return FoldedVOp; + // fold (mulhs x, 0) -> 0 // do not return N0/N1, because undef node may exist. if (ISD::isConstantSplatVectorAllZeros(N0.getNode()) || @@ -4521,6 +4524,9 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) { SDLoc DL(N); if (VT.isVector()) { + if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) + return FoldedVOp; + // fold (mulhu x, 0) -> 0 // do not return N0/N1, because undef node may exist. if (ISD::isConstantSplatVectorAllZeros(N0.getNode()) || @@ -4779,6 +4785,106 @@ SDValue DAGCombiner::visitMULO(SDNode *N) { return SDValue(); } +// Function to calculate whether the Min/Max pair of SDNodes (potentially +// swapped around) make a signed saturate pattern, clamping to between -2^(BW-1) +// and 2^(BW-1)-1. Returns the node being clamped and the bitwidth of the clamp +// in BW. Should work with both SMIN/SMAX nodes and setcc/select combo. The +// operands are the same as SimplifySelectCC. N0<N1 ? N2 : N3 +static SDValue isSaturatingMinMax(SDValue N0, SDValue N1, SDValue N2, + SDValue N3, ISD::CondCode CC, unsigned &BW) { + auto isSignedMinMax = [&](SDValue N0, SDValue N1, SDValue N2, SDValue N3, + ISD::CondCode CC) { + // The compare and select operand should be the same or the select operands + // should be truncated versions of the comparison. + if (N0 != N2 && (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0))) + return 0; + // The constants need to be the same or a truncated version of each other. + ConstantSDNode *N1C = isConstOrConstSplat(N1); + ConstantSDNode *N3C = isConstOrConstSplat(N3); + if (!N1C || !N3C) + return 0; + const APInt &C1 = N1C->getAPIntValue(); + const APInt &C2 = N3C->getAPIntValue(); + if (C1.getBitWidth() < C2.getBitWidth() || + C1 != C2.sextOrSelf(C1.getBitWidth())) + return 0; + return CC == ISD::SETLT ? ISD::SMIN : (CC == ISD::SETGT ? ISD::SMAX : 0); + }; + + // Check the initial value is a SMIN/SMAX equivalent. + unsigned Opcode0 = isSignedMinMax(N0, N1, N2, N3, CC); + if (!Opcode0) + return SDValue(); + + SDValue N00, N01, N02, N03; + ISD::CondCode N0CC; + switch (N0.getOpcode()) { + case ISD::SMIN: + case ISD::SMAX: + N00 = N02 = N0.getOperand(0); + N01 = N03 = N0.getOperand(1); + N0CC = N0.getOpcode() == ISD::SMIN ? ISD::SETLT : ISD::SETGT; + break; + case ISD::SELECT_CC: + N00 = N0.getOperand(0); + N01 = N0.getOperand(1); + N02 = N0.getOperand(2); + N03 = N0.getOperand(3); + N0CC = cast<CondCodeSDNode>(N0.getOperand(4))->get(); + break; + case ISD::SELECT: + case ISD::VSELECT: + if (N0.getOperand(0).getOpcode() != ISD::SETCC) + return SDValue(); + N00 = N0.getOperand(0).getOperand(0); + N01 = N0.getOperand(0).getOperand(1); + N02 = N0.getOperand(1); + N03 = N0.getOperand(2); + N0CC = cast<CondCodeSDNode>(N0.getOperand(0).getOperand(2))->get(); + break; + default: + return SDValue(); + } + + unsigned Opcode1 = isSignedMinMax(N00, N01, N02, N03, N0CC); + if (!Opcode1 || Opcode0 == Opcode1) + return SDValue(); + + ConstantSDNode *MinCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N1 : N01); + ConstantSDNode *MaxCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N01 : N1); + if (!MinCOp || !MaxCOp || MinCOp->getValueType(0) != MaxCOp->getValueType(0)) + return SDValue(); + + const APInt &MinC = MinCOp->getAPIntValue(); + const APInt &MaxC = MaxCOp->getAPIntValue(); + APInt MinCPlus1 = MinC + 1; + if (-MaxC != MinCPlus1 || !MinCPlus1.isPowerOf2()) + return SDValue(); + BW = MinCPlus1.exactLogBase2() + 1; + return N02; +} + +static SDValue PerformMinMaxFpToSatCombine(SDValue N0, SDValue N1, SDValue N2, + SDValue N3, ISD::CondCode CC, + SelectionDAG &DAG) { + unsigned BW; + SDValue Fp = isSaturatingMinMax(N0, N1, N2, N3, CC, BW); + if (!Fp || Fp.getOpcode() != ISD::FP_TO_SINT) + return SDValue(); + EVT FPVT = Fp.getOperand(0).getValueType(); + EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW); + if (FPVT.isVector()) + NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT, + FPVT.getVectorElementCount()); + if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat( + ISD::FP_TO_SINT_SAT, Fp.getOperand(0).getValueType(), NewVT)) + return SDValue(); + SDLoc DL(Fp); + SDValue Sat = DAG.getNode(ISD::FP_TO_SINT_SAT, DL, NewVT, Fp.getOperand(0), + DAG.getValueType(NewVT.getScalarType())); + return DAG.getSExtOrTrunc(Sat, DL, N2->getValueType(0)); +} + SDValue DAGCombiner::visitIMINMAX(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -4817,6 +4923,11 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) { return DAG.getNode(AltOpcode, DL, VT, N0, N1); } + if (Opcode == ISD::SMIN || Opcode == ISD::SMAX) + if (SDValue S = PerformMinMaxFpToSatCombine( + N0, N1, N0, N1, Opcode == ISD::SMIN ? ISD::SETLT : ISD::SETGT, DAG)) + return S; + // Simplify the operands using demanded-bits information. if (SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); @@ -9940,9 +10051,8 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) { // If this is a masked load with an all ones mask, we can use a unmasked load. // FIXME: Can we do this for indexed, compressing, or truncating stores? - if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && - MST->isUnindexed() && !MST->isCompressingStore() && - !MST->isTruncatingStore()) + if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MST->isUnindexed() && + !MST->isCompressingStore() && !MST->isTruncatingStore()) return DAG.getStore(MST->getChain(), SDLoc(N), MST->getValue(), MST->getBasePtr(), MST->getMemOperand()); @@ -9997,9 +10107,8 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) { // If this is a masked load with an all ones mask, we can use a unmasked load. // FIXME: Can we do this for indexed, expanding, or extending loads? - if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && - MLD->isUnindexed() && !MLD->isExpandingLoad() && - MLD->getExtensionType() == ISD::NON_EXTLOAD) { + if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MLD->isUnindexed() && + !MLD->isExpandingLoad() && MLD->getExtensionType() == ISD::NON_EXTLOAD) { SDValue NewLd = DAG.getLoad(N->getValueType(0), SDLoc(N), MLD->getChain(), MLD->getBasePtr(), MLD->getMemOperand()); return CombineTo(N, NewLd, NewLd.getValue(1)); @@ -10138,6 +10247,9 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { return FMinMax; } + if (SDValue S = PerformMinMaxFpToSatCombine(LHS, RHS, N1, N2, CC, DAG)) + return S; + // If this select has a condition (setcc) with narrower operands than the // select, try to widen the compare to match the select width. // TODO: This should be extended to handle any constant. @@ -15007,7 +15119,7 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { // fold (fpext (load x)) -> (fpext (fptrunc (extload x))) if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && - TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) { + TLI.isLoadExtLegalOrCustom(ISD::EXTLOAD, VT, N0.getValueType())) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, LN0->getChain(), @@ -23034,6 +23146,9 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, DAG.getSExtOrTrunc(CC == ISD::SETLT ? N3 : N2, DL, VT)); } + if (SDValue S = PerformMinMaxFpToSatCombine(N0, N1, N2, N3, CC, DAG)) + return S; + return SDValue(); } diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index c1bb65409282..331e0325aea3 100644 --- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -765,7 +765,7 @@ InstrEmitter::EmitDbgInstrRef(SDDbgValue *SD, assert(!SD->isVariadic()); SDDbgOperand DbgOperand = SD->getLocationOps()[0]; MDNode *Var = SD->getVariable(); - MDNode *Expr = SD->getExpression(); + DIExpression *Expr = (DIExpression*)SD->getExpression(); DebugLoc DL = SD->getDebugLoc(); const MCInstrDesc &RefII = TII->get(TargetOpcode::DBG_INSTR_REF); @@ -775,6 +775,13 @@ InstrEmitter::EmitDbgInstrRef(SDDbgValue *SD, DbgOperand.getKind() == SDDbgOperand::CONST) return EmitDbgValueFromSingleOp(SD, VRBaseMap); + // Immediately fold any indirectness from the LLVM-IR intrinsic into the + // expression: + if (SD->isIndirect()) { + std::vector<uint64_t> Elts = {dwarf::DW_OP_deref}; + Expr = DIExpression::append(Expr, Elts); + } + // It may not be immediately possible to identify the MachineInstr that // defines a VReg, it can depend for example on the order blocks are // emitted in. When this happens, or when further analysis is needed later, diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index eb9d2286aeb4..08598eeded7a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -3553,9 +3553,10 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { // Node. Tmp1 = Node->getOperand(0); Tmp2 = Node->getOperand(1); - if (Tmp2.getOpcode() == ISD::SETCC) { - Tmp1 = DAG.getNode(ISD::BR_CC, dl, MVT::Other, - Tmp1, Tmp2.getOperand(2), + if (Tmp2.getOpcode() == ISD::SETCC && + TLI.isOperationLegalOrCustom(ISD::BR_CC, + Tmp2.getOperand(0).getValueType())) { + Tmp1 = DAG.getNode(ISD::BR_CC, dl, MVT::Other, Tmp1, Tmp2.getOperand(2), Tmp2.getOperand(0), Tmp2.getOperand(1), Node->getOperand(2)); } else { diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index 1f73c9eea104..98312f91d8c0 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -28,7 +28,7 @@ using namespace llvm; static cl::opt<bool> EnableExpensiveChecks("enable-legalize-types-checking", cl::Hidden); -/// Do extensive, expensive, sanity checking. +/// Do extensive, expensive, basic correctness checking. void DAGTypeLegalizer::PerformExpensiveChecks() { // If a node is not processed, then none of its values should be mapped by any // of PromotedIntegers, ExpandedIntegers, ..., ReplacedValues. @@ -534,7 +534,8 @@ SDNode *DAGTypeLegalizer::AnalyzeNewNode(SDNode *N) { // The node morphed into a different node. Normally for this to happen // the original node would have to be marked NewNode. However this can // in theory momentarily not be the case while ReplaceValueWith is doing - // its stuff. Mark the original node NewNode to help sanity checking. + // its stuff. Mark the original node NewNode to help basic correctness + // checking. N->setNodeId(NewNode); if (M->getNodeId() != NewNode && M->getNodeId() != Unanalyzed) // It morphed into a previously analyzed node - nothing more to do. diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 539c9cb9c256..7ec2638b1e71 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -1820,10 +1820,10 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, else std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, dl); - unsigned LoSize = MemoryLocation::getSizeOrUnknown(LoMemVT.getStoreSize()); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( - MLD->getPointerInfo(), MachineMemOperand::MOLoad, LoSize, Alignment, - MLD->getAAInfo(), MLD->getRanges()); + MLD->getPointerInfo(), MachineMemOperand::MOLoad, + MemoryLocation::UnknownSize, Alignment, MLD->getAAInfo(), + MLD->getRanges()); Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, Offset, MaskLo, PassThruLo, LoMemVT, MMO, MLD->getAddressingMode(), ExtType, @@ -1837,7 +1837,6 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, // Generate hi masked load. Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, dl, LoMemVT, DAG, MLD->isExpandingLoad()); - unsigned HiSize = MemoryLocation::getSizeOrUnknown(HiMemVT.getStoreSize()); MachinePointerInfo MPI; if (LoMemVT.isScalableVector()) @@ -1847,8 +1846,8 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, LoMemVT.getStoreSize().getFixedSize()); MMO = DAG.getMachineFunction().getMachineMemOperand( - MPI, MachineMemOperand::MOLoad, HiSize, Alignment, MLD->getAAInfo(), - MLD->getRanges()); + MPI, MachineMemOperand::MOLoad, MemoryLocation::UnknownSize, Alignment, + MLD->getAAInfo(), MLD->getRanges()); Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, Offset, MaskHi, PassThruHi, HiMemVT, MMO, MLD->getAddressingMode(), ExtType, @@ -2662,10 +2661,9 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, DAG.GetDependentSplitDestVTs(MemoryVT, DataLo.getValueType(), &HiIsEmpty); SDValue Lo, Hi, Res; - unsigned LoSize = MemoryLocation::getSizeOrUnknown(LoMemVT.getStoreSize()); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( - N->getPointerInfo(), MachineMemOperand::MOStore, LoSize, Alignment, - N->getAAInfo(), N->getRanges()); + N->getPointerInfo(), MachineMemOperand::MOStore, + MemoryLocation::UnknownSize, Alignment, N->getAAInfo(), N->getRanges()); Lo = DAG.getMaskedStore(Ch, DL, DataLo, Ptr, Offset, MaskLo, LoMemVT, MMO, N->getAddressingMode(), N->isTruncatingStore(), @@ -2689,10 +2687,9 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, MPI = N->getPointerInfo().getWithOffset( LoMemVT.getStoreSize().getFixedSize()); - unsigned HiSize = MemoryLocation::getSizeOrUnknown(HiMemVT.getStoreSize()); MMO = DAG.getMachineFunction().getMachineMemOperand( - MPI, MachineMemOperand::MOStore, HiSize, Alignment, N->getAAInfo(), - N->getRanges()); + MPI, MachineMemOperand::MOStore, MemoryLocation::UnknownSize, Alignment, + N->getAAInfo(), N->getRanges()); Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, Offset, MaskHi, HiMemVT, MMO, N->getAddressingMode(), N->isTruncatingStore(), diff --git a/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp index 55fe26eb64cd..2695ed36991c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp @@ -268,8 +268,8 @@ bool ResourcePriorityQueue::isResourceAvailable(SUnit *SU) { // Now see if there are no other dependencies // to instructions already in the packet. - for (unsigned i = 0, e = Packet.size(); i != e; ++i) - for (const SDep &Succ : Packet[i]->Succs) { + for (const SUnit *S : Packet) + for (const SDep &Succ : S->Succs) { // Since we do not add pseudos to packets, might as well // ignore order deps. if (Succ.isCtrl()) diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 95f7e43b151d..84e6d2a16422 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -706,8 +706,8 @@ void ScheduleDAGSDNodes::dump() const { #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void ScheduleDAGSDNodes::dumpSchedule() const { - for (unsigned i = 0, e = Sequence.size(); i != e; i++) { - if (SUnit *SU = Sequence[i]) + for (const SUnit *SU : Sequence) { + if (SU) dumpNode(*SU); else dbgs() << "**** NOOP ****\n"; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 008665d50233..c282e03387dd 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -406,8 +406,8 @@ bool ISD::isVPOpcode(unsigned Opcode) { switch (Opcode) { default: return false; -#define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) \ - case ISD::SDOPC: \ +#define BEGIN_REGISTER_VP_SDNODE(VPSD, ...) \ + case ISD::VPSD: \ return true; #include "llvm/IR/VPIntrinsics.def" } @@ -416,23 +416,25 @@ bool ISD::isVPOpcode(unsigned Opcode) { bool ISD::isVPBinaryOp(unsigned Opcode) { switch (Opcode) { default: - return false; -#define PROPERTY_VP_BINARYOP_SDNODE(SDOPC) \ - case ISD::SDOPC: \ - return true; + break; +#define BEGIN_REGISTER_VP_SDNODE(VPSD, ...) case ISD::VPSD: +#define VP_PROPERTY_BINARYOP return true; +#define END_REGISTER_VP_SDNODE(VPSD) break; #include "llvm/IR/VPIntrinsics.def" } + return false; } bool ISD::isVPReduction(unsigned Opcode) { switch (Opcode) { default: - return false; -#define PROPERTY_VP_REDUCTION_SDNODE(SDOPC) \ - case ISD::SDOPC: \ - return true; + break; +#define BEGIN_REGISTER_VP_SDNODE(VPSD, ...) case ISD::VPSD: +#define VP_PROPERTY_REDUCTION(STARTPOS, ...) return true; +#define END_REGISTER_VP_SDNODE(VPSD) break; #include "llvm/IR/VPIntrinsics.def" } + return false; } /// The operand position of the vector mask. @@ -440,8 +442,8 @@ Optional<unsigned> ISD::getVPMaskIdx(unsigned Opcode) { switch (Opcode) { default: return None; -#define BEGIN_REGISTER_VP_SDNODE(SDOPC, LEGALPOS, TDNAME, MASKPOS, ...) \ - case ISD::SDOPC: \ +#define BEGIN_REGISTER_VP_SDNODE(VPSD, LEGALPOS, TDNAME, MASKPOS, ...) \ + case ISD::VPSD: \ return MASKPOS; #include "llvm/IR/VPIntrinsics.def" } @@ -452,8 +454,8 @@ Optional<unsigned> ISD::getVPExplicitVectorLengthIdx(unsigned Opcode) { switch (Opcode) { default: return None; -#define BEGIN_REGISTER_VP_SDNODE(SDOPC, LEGALPOS, TDNAME, MASKPOS, EVLPOS) \ - case ISD::SDOPC: \ +#define BEGIN_REGISTER_VP_SDNODE(VPSD, LEGALPOS, TDNAME, MASKPOS, EVLPOS) \ + case ISD::VPSD: \ return EVLPOS; #include "llvm/IR/VPIntrinsics.def" } @@ -974,7 +976,7 @@ void SelectionDAG::DeallocateNode(SDNode *N) { } #ifndef NDEBUG -/// VerifySDNode - Sanity check the given SDNode. Aborts if it is invalid. +/// VerifySDNode - Check the given SDNode. Aborts if it is invalid. static void VerifySDNode(SDNode *N) { switch (N->getOpcode()) { default: @@ -4540,10 +4542,25 @@ bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const { } // FIXME: unify with llvm::haveNoCommonBitsSet. -// FIXME: could also handle masked merge pattern (X & ~M) op (Y & M) bool SelectionDAG::haveNoCommonBitsSet(SDValue A, SDValue B) const { assert(A.getValueType() == B.getValueType() && "Values must have the same type"); + // Match masked merge pattern (X & ~M) op (Y & M) + if (A->getOpcode() == ISD::AND && B->getOpcode() == ISD::AND) { + auto MatchNoCommonBitsPattern = [&](SDValue NotM, SDValue And) { + if (isBitwiseNot(NotM, true)) { + SDValue NotOperand = NotM->getOperand(0); + return NotOperand == And->getOperand(0) || + NotOperand == And->getOperand(1); + } + return false; + }; + if (MatchNoCommonBitsPattern(A->getOperand(0), B) || + MatchNoCommonBitsPattern(A->getOperand(1), B) || + MatchNoCommonBitsPattern(B->getOperand(0), A) || + MatchNoCommonBitsPattern(B->getOperand(1), A)) + return true; + } return KnownBits::haveNoCommonBitsSet(computeKnownBits(A), computeKnownBits(B)); } @@ -5070,7 +5087,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, return getUNDEF(VT); break; case ISD::BITCAST: - // Basic sanity checking. assert(VT.getSizeInBits() == Operand.getValueSizeInBits() && "Cannot BITCAST between types of different sizes!"); if (VT == Operand.getValueType()) return Operand; // noop conversion. diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 5d911c165293..7726a0007e44 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -4336,9 +4336,7 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I, MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore, - // TODO: Make MachineMemOperands aware of scalable - // vectors. - VT.getStoreSize().getKnownMinSize(), *Alignment, I.getAAMetadata()); + MemoryLocation::UnknownSize, *Alignment, I.getAAMetadata()); SDValue StoreNode = DAG.getMaskedStore(getMemoryRoot(), sdl, Src0, Ptr, Offset, Mask, VT, MMO, ISD::UNINDEXED, false /* Truncating */, IsCompressing); @@ -4496,22 +4494,14 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) { const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); // Do not serialize masked loads of constant memory with anything. - MemoryLocation ML; - if (VT.isScalableVector()) - ML = MemoryLocation::getAfter(PtrOperand); - else - ML = MemoryLocation(PtrOperand, LocationSize::precise( - DAG.getDataLayout().getTypeStoreSize(I.getType())), - AAInfo); + MemoryLocation ML = MemoryLocation::getAfter(PtrOperand, AAInfo); bool AddToChain = !AA || !AA->pointsToConstantMemory(ML); SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode(); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad, - // TODO: Make MachineMemOperands aware of scalable - // vectors. - VT.getStoreSize().getKnownMinSize(), *Alignment, AAInfo, Ranges); + MemoryLocation::UnknownSize, *Alignment, AAInfo, Ranges); SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Offset, Mask, Src0, VT, MMO, @@ -5807,8 +5797,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, case Intrinsic::vscale: { match(&I, m_VScale(DAG.getDataLayout())); EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); - setValue(&I, - DAG.getVScale(getCurSDLoc(), VT, APInt(VT.getSizeInBits(), 1))); + setValue(&I, DAG.getVScale(sdl, VT, APInt(VT.getSizeInBits(), 1))); return; } case Intrinsic::vastart: visitVAStart(I); return; @@ -6942,10 +6931,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout()); const TargetRegisterClass *PtrRC = TLI.getRegClassFor(PtrVT); unsigned VReg = FuncInfo.getCatchPadExceptionPointerVReg(CPI, PtrRC); - SDValue N = - DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), VReg, PtrVT); + SDValue N = DAG.getCopyFromReg(DAG.getEntryNode(), sdl, VReg, PtrVT); if (Intrinsic == Intrinsic::eh_exceptioncode) - N = DAG.getZExtOrTrunc(N, getCurSDLoc(), MVT::i32); + N = DAG.getZExtOrTrunc(N, sdl, MVT::i32); setValue(&I, N); return; } @@ -6957,7 +6945,6 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, if (Triple.getArch() != Triple::x86_64) return; - SDLoc DL = getCurSDLoc(); SmallVector<SDValue, 8> Ops; // We want to say that we always want the arguments in registers. @@ -6974,7 +6961,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, // see that some registers may be assumed clobbered and have to preserve // them across calls to the intrinsic. MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::PATCHABLE_EVENT_CALL, - DL, NodeTys, Ops); + sdl, NodeTys, Ops); SDValue patchableNode = SDValue(MN, 0); DAG.setRoot(patchableNode); setValue(&I, patchableNode); @@ -6988,7 +6975,6 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, if (Triple.getArch() != Triple::x86_64) return; - SDLoc DL = getCurSDLoc(); SmallVector<SDValue, 8> Ops; // We want to say that we always want the arguments in registers. @@ -7009,7 +6995,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, // see that some registers may be assumed clobbered and have to preserve // them across calls to the intrinsic. MachineSDNode *MN = DAG.getMachineNode( - TargetOpcode::PATCHABLE_TYPED_EVENT_CALL, DL, NodeTys, Ops); + TargetOpcode::PATCHABLE_TYPED_EVENT_CALL, sdl, NodeTys, Ops); SDValue patchableNode = SDValue(MN, 0); DAG.setRoot(patchableNode); setValue(&I, patchableNode); @@ -7047,7 +7033,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, if (!Base) report_fatal_error( "llvm.icall.branch.funnel operand must be a GlobalValue"); - Ops.push_back(DAG.getTargetGlobalAddress(Base, getCurSDLoc(), MVT::i64, 0)); + Ops.push_back(DAG.getTargetGlobalAddress(Base, sdl, MVT::i64, 0)); struct BranchFunnelTarget { int64_t Offset; @@ -7068,8 +7054,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, report_fatal_error( "llvm.icall.branch.funnel operand must be a GlobalValue"); Targets.push_back({Offset, DAG.getTargetGlobalAddress( - GA->getGlobal(), getCurSDLoc(), - Val.getValueType(), GA->getOffset())}); + GA->getGlobal(), sdl, Val.getValueType(), + GA->getOffset())}); } llvm::sort(Targets, [](const BranchFunnelTarget &T1, const BranchFunnelTarget &T2) { @@ -7077,13 +7063,13 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, }); for (auto &T : Targets) { - Ops.push_back(DAG.getTargetConstant(T.Offset, getCurSDLoc(), MVT::i32)); + Ops.push_back(DAG.getTargetConstant(T.Offset, sdl, MVT::i32)); Ops.push_back(T.Target); } Ops.push_back(DAG.getRoot()); // Chain - SDValue N(DAG.getMachineNode(TargetOpcode::ICALL_BRANCH_FUNNEL, - getCurSDLoc(), MVT::Other, Ops), + SDValue N(DAG.getMachineNode(TargetOpcode::ICALL_BRANCH_FUNNEL, sdl, + MVT::Other, Ops), 0); DAG.setRoot(N); setValue(&I, N); @@ -7102,7 +7088,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); bool ZeroMemory = Intrinsic == Intrinsic::aarch64_settag_zero; SDValue Val = TSI.EmitTargetCodeForSetTag( - DAG, getCurSDLoc(), getRoot(), getValue(I.getArgOperand(0)), + DAG, sdl, getRoot(), getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), MachinePointerInfo(I.getArgOperand(0)), ZeroMemory); DAG.setRoot(Val); @@ -7114,46 +7100,42 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, SDValue Const = getValue(I.getOperand(1)); EVT PtrVT = Ptr.getValueType(); - setValue(&I, DAG.getNode(ISD::AND, getCurSDLoc(), PtrVT, Ptr, - DAG.getZExtOrTrunc(Const, getCurSDLoc(), PtrVT))); + setValue(&I, DAG.getNode(ISD::AND, sdl, PtrVT, Ptr, + DAG.getZExtOrTrunc(Const, sdl, PtrVT))); return; } case Intrinsic::get_active_lane_mask: { - auto DL = getCurSDLoc(); + EVT CCVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); SDValue Index = getValue(I.getOperand(0)); - SDValue TripCount = getValue(I.getOperand(1)); - Type *ElementTy = I.getOperand(0)->getType(); - EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); - unsigned VecWidth = VT.getVectorNumElements(); + EVT ElementVT = Index.getValueType(); - SmallVector<SDValue, 16> OpsTripCount; - SmallVector<SDValue, 16> OpsIndex; - SmallVector<SDValue, 16> OpsStepConstants; - for (unsigned i = 0; i < VecWidth; i++) { - OpsTripCount.push_back(TripCount); - OpsIndex.push_back(Index); - OpsStepConstants.push_back( - DAG.getConstant(i, DL, EVT::getEVT(ElementTy))); + if (!TLI.shouldExpandGetActiveLaneMask(CCVT, ElementVT)) { + visitTargetIntrinsic(I, Intrinsic); + return; } - EVT CCVT = EVT::getVectorVT(I.getContext(), MVT::i1, VecWidth); + SDValue TripCount = getValue(I.getOperand(1)); + auto VecTy = CCVT.changeVectorElementType(ElementVT); - auto VecTy = EVT::getEVT(FixedVectorType::get(ElementTy, VecWidth)); - SDValue VectorIndex = DAG.getBuildVector(VecTy, DL, OpsIndex); - SDValue VectorStep = DAG.getBuildVector(VecTy, DL, OpsStepConstants); + SDValue VectorIndex, VectorTripCount; + if (VecTy.isScalableVector()) { + VectorIndex = DAG.getSplatVector(VecTy, sdl, Index); + VectorTripCount = DAG.getSplatVector(VecTy, sdl, TripCount); + } else { + VectorIndex = DAG.getSplatBuildVector(VecTy, sdl, Index); + VectorTripCount = DAG.getSplatBuildVector(VecTy, sdl, TripCount); + } + SDValue VectorStep = DAG.getStepVector(sdl, VecTy); SDValue VectorInduction = DAG.getNode( - ISD::UADDO, DL, DAG.getVTList(VecTy, CCVT), VectorIndex, VectorStep); - SDValue VectorTripCount = DAG.getBuildVector(VecTy, DL, OpsTripCount); - SDValue SetCC = DAG.getSetCC(DL, CCVT, VectorInduction.getValue(0), + ISD::UADDO, sdl, DAG.getVTList(VecTy, CCVT), VectorIndex, VectorStep); + SDValue SetCC = DAG.getSetCC(sdl, CCVT, VectorInduction.getValue(0), VectorTripCount, ISD::CondCode::SETULT); - setValue(&I, DAG.getNode(ISD::AND, DL, CCVT, - DAG.getNOT(DL, VectorInduction.getValue(1), CCVT), + setValue(&I, DAG.getNode(ISD::AND, sdl, CCVT, + DAG.getNOT(sdl, VectorInduction.getValue(1), CCVT), SetCC)); return; } case Intrinsic::experimental_vector_insert: { - auto DL = getCurSDLoc(); - SDValue Vec = getValue(I.getOperand(0)); SDValue SubVec = getValue(I.getOperand(1)); SDValue Index = getValue(I.getOperand(2)); @@ -7163,16 +7145,14 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, MVT VectorIdxTy = TLI.getVectorIdxTy(DAG.getDataLayout()); if (Index.getValueType() != VectorIdxTy) Index = DAG.getVectorIdxConstant( - cast<ConstantSDNode>(Index)->getZExtValue(), DL); + cast<ConstantSDNode>(Index)->getZExtValue(), sdl); EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); - setValue(&I, DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ResultVT, Vec, SubVec, + setValue(&I, DAG.getNode(ISD::INSERT_SUBVECTOR, sdl, ResultVT, Vec, SubVec, Index)); return; } case Intrinsic::experimental_vector_extract: { - auto DL = getCurSDLoc(); - SDValue Vec = getValue(I.getOperand(0)); SDValue Index = getValue(I.getOperand(1)); EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); @@ -7182,9 +7162,10 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, MVT VectorIdxTy = TLI.getVectorIdxTy(DAG.getDataLayout()); if (Index.getValueType() != VectorIdxTy) Index = DAG.getVectorIdxConstant( - cast<ConstantSDNode>(Index)->getZExtValue(), DL); + cast<ConstantSDNode>(Index)->getZExtValue(), sdl); - setValue(&I, DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResultVT, Vec, Index)); + setValue(&I, + DAG.getNode(ISD::EXTRACT_SUBVECTOR, sdl, ResultVT, Vec, Index)); return; } case Intrinsic::experimental_vector_reverse: @@ -7314,9 +7295,9 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic( static unsigned getISDForVPIntrinsic(const VPIntrinsic &VPIntrin) { Optional<unsigned> ResOPC; switch (VPIntrin.getIntrinsicID()) { -#define BEGIN_REGISTER_VP_INTRINSIC(INTRIN, ...) case Intrinsic::INTRIN: -#define BEGIN_REGISTER_VP_SDNODE(VPSDID, ...) ResOPC = ISD::VPSDID; -#define END_REGISTER_VP_INTRINSIC(...) break; +#define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) case Intrinsic::VPID: +#define BEGIN_REGISTER_VP_SDNODE(VPSD, ...) ResOPC = ISD::VPSD; +#define END_REGISTER_VP_INTRINSIC(VPID) break; #include "llvm/IR/VPIntrinsics.def" } diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index e4a69adff05b..737695b5eabe 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -645,6 +645,7 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits( if (DemandedBits == 0 || DemandedElts == 0) return DAG.getUNDEF(Op.getValueType()); + bool IsLE = DAG.getDataLayout().isLittleEndian(); unsigned NumElts = DemandedElts.getBitWidth(); unsigned BitWidth = DemandedBits.getBitWidth(); KnownBits LHSKnown, RHSKnown; @@ -663,16 +664,15 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits( Src, DemandedBits, DemandedElts, DAG, Depth + 1)) return DAG.getBitcast(DstVT, V); - // TODO - bigendian once we have test coverage. - if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0 && - DAG.getDataLayout().isLittleEndian()) { + if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0) { unsigned Scale = NumDstEltBits / NumSrcEltBits; unsigned NumSrcElts = SrcVT.getVectorNumElements(); APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits); APInt DemandedSrcElts = APInt::getZero(NumSrcElts); for (unsigned i = 0; i != Scale; ++i) { - unsigned Offset = i * NumSrcEltBits; - APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset); + unsigned EltOffset = IsLE ? i : (Scale - 1 - i); + unsigned BitOffset = EltOffset * NumSrcEltBits; + APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset); if (!Sub.isZero()) { DemandedSrcBits |= Sub; for (unsigned j = 0; j != NumElts; ++j) @@ -687,8 +687,7 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits( } // TODO - bigendian once we have test coverage. - if ((NumSrcEltBits % NumDstEltBits) == 0 && - DAG.getDataLayout().isLittleEndian()) { + if (IsLE && (NumSrcEltBits % NumDstEltBits) == 0) { unsigned Scale = NumSrcEltBits / NumDstEltBits; unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1; APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits); @@ -802,8 +801,8 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits( SDValue Src = Op.getOperand(0); EVT SrcVT = Src.getValueType(); EVT DstVT = Op.getValueType(); - if (DemandedElts == 1 && DstVT.getSizeInBits() == SrcVT.getSizeInBits() && - DAG.getDataLayout().isLittleEndian() && + if (IsLE && DemandedElts == 1 && + DstVT.getSizeInBits() == SrcVT.getSizeInBits() && DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) { return DAG.getBitcast(DstVT, Src); } @@ -913,6 +912,7 @@ bool TargetLowering::SimplifyDemandedBits( if (Op.getValueType().isScalableVector()) return false; + bool IsLE = TLO.DAG.getDataLayout().isLittleEndian(); unsigned NumElts = OriginalDemandedElts.getBitWidth(); assert((!Op.getValueType().isVector() || NumElts == Op.getValueType().getVectorNumElements()) && @@ -1725,11 +1725,40 @@ bool TargetLowering::SimplifyDemandedBits( case ISD::ROTR: { SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); + bool IsROTL = (Op.getOpcode() == ISD::ROTL); // If we're rotating an 0/-1 value, then it stays an 0/-1 value. if (BitWidth == TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1)) return TLO.CombineTo(Op, Op0); + if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) { + unsigned Amt = SA->getAPIntValue().urem(BitWidth); + unsigned RevAmt = BitWidth - Amt; + + // rotl: (Op0 << Amt) | (Op0 >> (BW - Amt)) + // rotr: (Op0 << (BW - Amt)) | (Op0 >> Amt) + APInt Demanded0 = DemandedBits.rotr(IsROTL ? Amt : RevAmt); + if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO, + Depth + 1)) + return true; + + // rot*(x, 0) --> x + if (Amt == 0) + return TLO.CombineTo(Op, Op0); + + // See if we don't demand either half of the rotated bits. + if ((!TLO.LegalOperations() || isOperationLegal(ISD::SHL, VT)) && + DemandedBits.countTrailingZeros() >= (IsROTL ? Amt : RevAmt)) { + Op1 = TLO.DAG.getConstant(IsROTL ? Amt : RevAmt, dl, Op1.getValueType()); + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, Op1)); + } + if ((!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT)) && + DemandedBits.countLeadingZeros() >= (IsROTL ? RevAmt : Amt)) { + Op1 = TLO.DAG.getConstant(IsROTL ? RevAmt : Amt, dl, Op1.getValueType()); + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1)); + } + } + // For pow-2 bitwidths we only demand the bottom modulo amt bits. if (isPowerOf2_32(BitWidth)) { APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1); @@ -1887,9 +1916,8 @@ bool TargetLowering::SimplifyDemandedBits( if (DemandedBits.getActiveBits() <= InBits) { // If we only need the non-extended bits of the bottom element // then we can just bitcast to the result. - if (IsVecInReg && DemandedElts == 1 && - VT.getSizeInBits() == SrcVT.getSizeInBits() && - TLO.DAG.getDataLayout().isLittleEndian()) + if (IsLE && IsVecInReg && DemandedElts == 1 && + VT.getSizeInBits() == SrcVT.getSizeInBits()) return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src)); unsigned Opc = @@ -1925,9 +1953,8 @@ bool TargetLowering::SimplifyDemandedBits( if (DemandedBits.getActiveBits() <= InBits) { // If we only need the non-extended bits of the bottom element // then we can just bitcast to the result. - if (IsVecInReg && DemandedElts == 1 && - VT.getSizeInBits() == SrcVT.getSizeInBits() && - TLO.DAG.getDataLayout().isLittleEndian()) + if (IsLE && IsVecInReg && DemandedElts == 1 && + VT.getSizeInBits() == SrcVT.getSizeInBits()) return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src)); unsigned Opc = @@ -1976,9 +2003,8 @@ bool TargetLowering::SimplifyDemandedBits( // If we only need the bottom element then we can just bitcast. // TODO: Handle ANY_EXTEND? - if (IsVecInReg && DemandedElts == 1 && - VT.getSizeInBits() == SrcVT.getSizeInBits() && - TLO.DAG.getDataLayout().isLittleEndian()) + if (IsLE && IsVecInReg && DemandedElts == 1 && + VT.getSizeInBits() == SrcVT.getSizeInBits()) return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src)); APInt InDemandedBits = DemandedBits.trunc(InBits); @@ -2140,16 +2166,15 @@ bool TargetLowering::SimplifyDemandedBits( // Bitcast from a vector using SimplifyDemanded Bits/VectorElts. // Demand the elt/bit if any of the original elts/bits are demanded. - // TODO - bigendian once we have test coverage. - if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0 && - TLO.DAG.getDataLayout().isLittleEndian()) { + if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0) { unsigned Scale = BitWidth / NumSrcEltBits; unsigned NumSrcElts = SrcVT.getVectorNumElements(); APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits); APInt DemandedSrcElts = APInt::getZero(NumSrcElts); for (unsigned i = 0; i != Scale; ++i) { - unsigned Offset = i * NumSrcEltBits; - APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset); + unsigned EltOffset = IsLE ? i : (Scale - 1 - i); + unsigned BitOffset = EltOffset * NumSrcEltBits; + APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset); if (!Sub.isZero()) { DemandedSrcBits |= Sub; for (unsigned j = 0; j != NumElts; ++j) @@ -2167,8 +2192,8 @@ bool TargetLowering::SimplifyDemandedBits( if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, KnownSrcBits, TLO, Depth + 1)) return true; - } else if ((NumSrcEltBits % BitWidth) == 0 && - TLO.DAG.getDataLayout().isLittleEndian()) { + } else if (IsLE && (NumSrcEltBits % BitWidth) == 0) { + // TODO - bigendian once we have test coverage. unsigned Scale = NumSrcEltBits / BitWidth; unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1; APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits); @@ -2409,6 +2434,7 @@ bool TargetLowering::SimplifyDemandedVectorElts( SDLoc DL(Op); unsigned EltSizeInBits = VT.getScalarSizeInBits(); + bool IsLE = TLO.DAG.getDataLayout().isLittleEndian(); // Helper for demanding the specified elements and all the bits of both binary // operands. @@ -2484,7 +2510,7 @@ bool TargetLowering::SimplifyDemandedVectorElts( // Try calling SimplifyDemandedBits, converting demanded elts to the bits // of the large element. // TODO - bigendian once we have test coverage. - if (TLO.DAG.getDataLayout().isLittleEndian()) { + if (IsLE) { unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits(); APInt SrcDemandedBits = APInt::getZero(SrcEltSizeInBits); for (unsigned i = 0; i != NumElts; ++i) @@ -2797,9 +2823,9 @@ bool TargetLowering::SimplifyDemandedVectorElts( KnownZero = SrcZero.zextOrTrunc(NumElts); KnownUndef = SrcUndef.zextOrTrunc(NumElts); - if (Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG && + if (IsLE && Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG && Op.getValueSizeInBits() == Src.getValueSizeInBits() && - DemandedSrcElts == 1 && TLO.DAG.getDataLayout().isLittleEndian()) { + DemandedSrcElts == 1) { // aext - if we just need the bottom element then we can bitcast. return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src)); } @@ -2812,8 +2838,8 @@ bool TargetLowering::SimplifyDemandedVectorElts( // zext - if we just need the bottom element then we can mask: // zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and. - if (DemandedSrcElts == 1 && TLO.DAG.getDataLayout().isLittleEndian() && - Src.getOpcode() == ISD::AND && Op->isOnlyUserOf(Src.getNode()) && + if (IsLE && DemandedSrcElts == 1 && Src.getOpcode() == ISD::AND && + Op->isOnlyUserOf(Src.getNode()) && Op.getValueSizeInBits() == Src.getValueSizeInBits()) { SDLoc DL(Op); EVT SrcVT = Src.getValueType(); @@ -2834,9 +2860,19 @@ bool TargetLowering::SimplifyDemandedVectorElts( // TODO: There are more binop opcodes that could be handled here - MIN, // MAX, saturated math, etc. + case ISD::ADD: { + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + if (Op0 == Op1 && Op->isOnlyUserOf(Op0.getNode())) { + APInt UndefLHS, ZeroLHS; + if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO, + Depth + 1, /*AssumeSingleUse*/ true)) + return true; + } + LLVM_FALLTHROUGH; + } case ISD::OR: case ISD::XOR: - case ISD::ADD: case ISD::SUB: case ISD::FADD: case ISD::FSUB: @@ -5586,7 +5622,7 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode, .multiplicativeInverse(APInt::getSignedMinValue(W + 1)) .trunc(W); assert(!P.isZero() && "No multiplicative inverse!"); // unreachable - assert((D0 * P).isOne() && "Multiplicative inverse sanity check."); + assert((D0 * P).isOne() && "Multiplicative inverse basic check failed."); // Q = floor((2^W - 1) u/ D) // R = ((2^W - 1) u% D) @@ -5832,7 +5868,7 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode, .multiplicativeInverse(APInt::getSignedMinValue(W + 1)) .trunc(W); assert(!P.isZero() && "No multiplicative inverse!"); // unreachable - assert((D0 * P).isOne() && "Multiplicative inverse sanity check."); + assert((D0 * P).isOne() && "Multiplicative inverse basic check failed."); // A = floor((2^(W - 1) - 1) / D0) & -2^K APInt A = APInt::getSignedMaxValue(W).udiv(D0); diff --git a/llvm/lib/CodeGen/StackSlotColoring.cpp b/llvm/lib/CodeGen/StackSlotColoring.cpp index 9aea5a7a8853..f49ba5ccd447 100644 --- a/llvm/lib/CodeGen/StackSlotColoring.cpp +++ b/llvm/lib/CodeGen/StackSlotColoring.cpp @@ -159,8 +159,7 @@ void StackSlotColoring::ScanForSpillSlotRefs(MachineFunction &MF) { // FIXME: Need the equivalent of MachineRegisterInfo for frameindex operands. for (MachineBasicBlock &MBB : MF) { for (MachineInstr &MI : MBB) { - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI.getOperand(i); + for (const MachineOperand &MO : MI.operands()) { if (!MO.isFI()) continue; int FI = MO.getIndex(); @@ -394,8 +393,7 @@ void StackSlotColoring::RewriteInstruction(MachineInstr &MI, SmallVectorImpl<int> &SlotMapping, MachineFunction &MF) { // Update the operands. - for (unsigned i = 0, ee = MI.getNumOperands(); i != ee; ++i) { - MachineOperand &MO = MI.getOperand(i); + for (MachineOperand &MO : MI.operands()) { if (!MO.isFI()) continue; int OldFI = MO.getIndex(); diff --git a/llvm/lib/CodeGen/TailDuplicator.cpp b/llvm/lib/CodeGen/TailDuplicator.cpp index 943bd18c6c8b..54fc6ee45d00 100644 --- a/llvm/lib/CodeGen/TailDuplicator.cpp +++ b/llvm/lib/CodeGen/TailDuplicator.cpp @@ -70,12 +70,6 @@ static cl::opt<unsigned> TailDupIndirectBranchSize( "end with indirect branches."), cl::init(20), cl::Hidden); -static cl::opt<unsigned> TailDupJmpTableLoopSize( - "tail-dup-jmptable-loop-size", - cl::desc("Maximum loop latches to consider tail duplication that are " - "successors of loop header."), - cl::init(128), cl::Hidden); - static cl::opt<bool> TailDupVerify("tail-dup-verify", cl::desc("Verify sanity of PHI instructions during taildup"), @@ -569,29 +563,6 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple, if (TailBB.isSuccessor(&TailBB)) return false; - // When doing tail-duplication with jumptable loops like: - // 1 -> 2 <-> 3 | - // \ <-> 4 | - // \ <-> 5 | - // \ <-> ... | - // \---> rest | - // quadratic number of edges and much more loops are added to CFG. This - // may cause compile time regression when jumptable is quiet large. - // So set the limit on jumptable cases. - auto isLargeJumpTableLoop = [](const MachineBasicBlock &TailBB) { - const SmallPtrSet<const MachineBasicBlock *, 8> Preds(TailBB.pred_begin(), - TailBB.pred_end()); - // Check the basic block has large number of successors, all of them only - // have one successor which is the basic block itself. - return llvm::count_if( - TailBB.successors(), [&](const MachineBasicBlock *SuccBB) { - return Preds.count(SuccBB) && SuccBB->succ_size() == 1; - }) > TailDupJmpTableLoopSize; - }; - - if (isLargeJumpTableLoop(TailBB)) - return false; - // Set the limit on the cost to duplicate. When optimizing for size, // duplicate only one, because one branch instruction can be eliminated to // compensate for the duplication. diff --git a/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp b/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp index b0594ec086b2..fbf190a52585 100644 --- a/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp +++ b/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp @@ -136,6 +136,16 @@ unsigned TargetFrameLowering::getStackAlignmentSkew( return 0; } +bool TargetFrameLowering::allocateScavengingFrameIndexesNearIncomingSP( + const MachineFunction &MF) const { + if (!hasFP(MF)) + return false; + + const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); + return RegInfo->useFPForScavengingIndex(MF) && + !RegInfo->hasStackRealignment(MF); +} + bool TargetFrameLowering::isSafeForNoCSROpt(const Function &F) { if (!F.hasLocalLinkage() || F.hasAddressTaken() || !F.hasFnAttribute(Attribute::NoRecurse)) diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp index e74b3195a130..5119dac36713 100644 --- a/llvm/lib/CodeGen/TargetInstrInfo.cpp +++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp @@ -957,8 +957,7 @@ bool TargetInstrInfo::isReallyTriviallyReMaterializableGeneric( // If any of the registers accessed are non-constant, conservatively assume // the instruction is not rematerializable. - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI.getOperand(i); + for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg()) continue; Register Reg = MO.getReg(); if (Reg == 0) @@ -1401,3 +1400,21 @@ std::string TargetInstrInfo::createMIROperandComment( } TargetInstrInfo::PipelinerLoopInfo::~PipelinerLoopInfo() {} + +void TargetInstrInfo::mergeOutliningCandidateAttributes( + Function &F, std::vector<outliner::Candidate> &Candidates) const { + // Include target features from an arbitrary candidate for the outlined + // function. This makes sure the outlined function knows what kinds of + // instructions are going into it. This is fine, since all parent functions + // must necessarily support the instructions that are in the outlined region. + outliner::Candidate &FirstCand = Candidates.front(); + const Function &ParentFn = FirstCand.getMF()->getFunction(); + if (ParentFn.hasFnAttribute("target-features")) + F.addFnAttr(ParentFn.getFnAttribute("target-features")); + + // Set nounwind, so we don't generate eh_frame. + if (llvm::all_of(Candidates, [](const outliner::Candidate &C) { + return C.getMF()->getFunction().hasFnAttribute(Attribute::NoUnwind); + })) + F.addFnAttr(Attribute::NoUnwind); +} diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index 1d3bb286c882..d1c2cdeb133b 100644 --- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -1082,7 +1082,7 @@ const MCExpr *TargetLoweringObjectFileELF::lowerRelativeReference( if (!LHS->hasGlobalUnnamedAddr() || !LHS->getValueType()->isFunctionTy()) return nullptr; - // Basic sanity checks. + // Basic correctness checks. if (LHS->getType()->getPointerAddressSpace() != 0 || RHS->getType()->getPointerAddressSpace() != 0 || LHS->isThreadLocal() || RHS->isThreadLocal()) @@ -2135,7 +2135,7 @@ const MCExpr *TargetLoweringObjectFileWasm::lowerRelativeReference( if (!LHS->hasGlobalUnnamedAddr() || !LHS->getValueType()->isFunctionTy()) return nullptr; - // Basic sanity checks. + // Basic correctness checks. if (LHS->getType()->getPointerAddressSpace() != 0 || RHS->getType()->getPointerAddressSpace() != 0 || LHS->isThreadLocal() || RHS->isThreadLocal()) diff --git a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp index 46cec5407565..dfd962be2882 100644 --- a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -373,19 +373,25 @@ static bool isTwoAddrUse(MachineInstr &MI, Register Reg, Register &DstReg) { return false; } -/// Given a register, if has a single in-basic block use, return the use -/// instruction if it's a copy or a two-address use. +/// Given a register, if all its uses are in the same basic block, return the +/// last use instruction if it's a copy or a two-address use. static MachineInstr * findOnlyInterestingUse(Register Reg, MachineBasicBlock *MBB, MachineRegisterInfo *MRI, const TargetInstrInfo *TII, - bool &IsCopy, Register &DstReg, bool &IsDstPhys) { - if (!MRI->hasOneNonDBGUse(Reg)) - // None or more than one use. - return nullptr; - MachineOperand &UseOp = *MRI->use_nodbg_begin(Reg); - MachineInstr &UseMI = *UseOp.getParent(); - if (UseMI.getParent() != MBB) + bool &IsCopy, Register &DstReg, bool &IsDstPhys, + LiveIntervals *LIS) { + MachineOperand *UseOp = nullptr; + for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) { + MachineInstr *MI = MO.getParent(); + if (MI->getParent() != MBB) + return nullptr; + if (isPlainlyKilled(MI, Reg, LIS)) + UseOp = &MO; + } + if (!UseOp) return nullptr; + MachineInstr &UseMI = *UseOp->getParent(); + Register SrcReg; bool IsSrcPhys; if (isCopyToReg(UseMI, TII, SrcReg, DstReg, IsSrcPhys, IsDstPhys)) { @@ -399,7 +405,7 @@ findOnlyInterestingUse(Register Reg, MachineBasicBlock *MBB, } if (UseMI.isCommutable()) { unsigned Src1 = TargetInstrInfo::CommuteAnyOperandIndex; - unsigned Src2 = UseMI.getOperandNo(&UseOp); + unsigned Src2 = UseMI.getOperandNo(UseOp); if (TII->findCommutedOpIndices(UseMI, Src1, Src2)) { MachineOperand &MO = UseMI.getOperand(Src1); if (MO.isReg() && MO.isUse() && @@ -492,8 +498,7 @@ void TwoAddressInstructionPass::removeClobberedSrcRegMap(MachineInstr *MI) { return; } - for (unsigned i = 0, NumOps = MI->getNumOperands(); i != NumOps; ++i) { - const MachineOperand &MO = MI->getOperand(i); + for (const MachineOperand &MO : MI->operands()) { if (MO.isRegMask()) { removeMapRegEntry(MO, SrcRegMap, TRI); continue; @@ -685,7 +690,6 @@ bool TwoAddressInstructionPass::convertInstTo3Addr( // If the old instruction is debug value tracked, an update is required. if (auto OldInstrNum = mi->peekDebugInstrNum()) { - // Sanity check. assert(mi->getNumExplicitDefs() == 1); assert(NewMI->getNumExplicitDefs() == 1); @@ -724,7 +728,7 @@ void TwoAddressInstructionPass::scanUses(Register DstReg) { Register NewReg; Register Reg = DstReg; while (MachineInstr *UseMI = findOnlyInterestingUse(Reg, MBB, MRI, TII,IsCopy, - NewReg, IsDstPhys)) { + NewReg, IsDstPhys, LIS)) { if (IsCopy && !Processed.insert(UseMI).second) break; @@ -1336,8 +1340,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi, // Success, or at least we made an improvement. Keep the unfolded // instructions and discard the original. if (LV) { - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI.getOperand(i); + for (const MachineOperand &MO : MI.operands()) { if (MO.isReg() && MO.getReg().isVirtual()) { if (MO.isUse()) { if (MO.isKill()) { diff --git a/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp b/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp index fb0798f204e1..7673a721c4ea 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp @@ -15,6 +15,7 @@ #include "llvm/DebugInfo/DWARF/DWARFExpression.h" #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" #include "llvm/DebugInfo/DWARF/DWARFSection.h" +#include "llvm/DebugInfo/DWARF/DWARFUnitIndex.h" #include "llvm/Support/DJB.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/WithColor.h" @@ -317,12 +318,33 @@ bool DWARFVerifier::handleDebugAbbrev() { return NumErrors == 0; } -unsigned DWARFVerifier::verifyUnitSection(const DWARFSection &S, - DWARFSectionKind SectionKind) { +unsigned DWARFVerifier::verifyUnits(const DWARFUnitVector &Units) { + unsigned NumDebugInfoErrors = 0; + ReferenceMap CrossUnitReferences; + + for (const auto &Unit : Units) { + ReferenceMap UnitLocalReferences; + NumDebugInfoErrors += + verifyUnitContents(*Unit, UnitLocalReferences, CrossUnitReferences); + NumDebugInfoErrors += verifyDebugInfoReferences( + UnitLocalReferences, [&](uint64_t Offset) { return Unit.get(); }); + } + + NumDebugInfoErrors += verifyDebugInfoReferences( + CrossUnitReferences, [&](uint64_t Offset) -> DWARFUnit * { + if (DWARFUnit *U = Units.getUnitForOffset(Offset)) + return U; + return nullptr; + }); + + return NumDebugInfoErrors; +} + +unsigned DWARFVerifier::verifyUnitSection(const DWARFSection &S) { const DWARFObject &DObj = DCtx.getDWARFObj(); DWARFDataExtractor DebugInfoData(DObj, S, DCtx.isLittleEndian(), 0); unsigned NumDebugInfoErrors = 0; - uint64_t OffsetStart = 0, Offset = 0, UnitIdx = 0; + uint64_t Offset = 0, UnitIdx = 0; uint8_t UnitType = 0; bool isUnitDWARF64 = false; bool isHeaderChainValid = true; @@ -334,48 +356,11 @@ unsigned DWARFVerifier::verifyUnitSection(const DWARFSection &S, /// lies between to valid DIEs. ReferenceMap CrossUnitReferences; while (hasDIE) { - OffsetStart = Offset; if (!verifyUnitHeader(DebugInfoData, &Offset, UnitIdx, UnitType, isUnitDWARF64)) { isHeaderChainValid = false; if (isUnitDWARF64) break; - } else { - DWARFUnitHeader Header; - Header.extract(DCtx, DebugInfoData, &OffsetStart, SectionKind); - ReferenceMap UnitLocalReferences; - DWARFUnit *Unit; - switch (UnitType) { - case dwarf::DW_UT_type: - case dwarf::DW_UT_split_type: { - Unit = TypeUnitVector.addUnit(std::make_unique<DWARFTypeUnit>( - DCtx, S, Header, DCtx.getDebugAbbrev(), &DObj.getRangesSection(), - &DObj.getLocSection(), DObj.getStrSection(), - DObj.getStrOffsetsSection(), &DObj.getAddrSection(), - DObj.getLineSection(), DCtx.isLittleEndian(), false, - TypeUnitVector)); - break; - } - case dwarf::DW_UT_skeleton: - case dwarf::DW_UT_split_compile: - case dwarf::DW_UT_compile: - case dwarf::DW_UT_partial: - // UnitType = 0 means that we are verifying a compile unit in DWARF v4. - case 0: { - Unit = CompileUnitVector.addUnit(std::make_unique<DWARFCompileUnit>( - DCtx, S, Header, DCtx.getDebugAbbrev(), &DObj.getRangesSection(), - &DObj.getLocSection(), DObj.getStrSection(), - DObj.getStrOffsetsSection(), &DObj.getAddrSection(), - DObj.getLineSection(), DCtx.isLittleEndian(), false, - CompileUnitVector)); - break; - } - default: { llvm_unreachable("Invalid UnitType."); } - } - NumDebugInfoErrors += - verifyUnitContents(*Unit, UnitLocalReferences, CrossUnitReferences); - NumDebugInfoErrors += verifyDebugInfoReferences( - UnitLocalReferences, [&](uint64_t Offset) { return Unit; }); } hasDIE = DebugInfoData.isValidOffset(Offset); ++UnitIdx; @@ -386,14 +371,6 @@ unsigned DWARFVerifier::verifyUnitSection(const DWARFSection &S, } if (!isHeaderChainValid) ++NumDebugInfoErrors; - NumDebugInfoErrors += verifyDebugInfoReferences( - CrossUnitReferences, [&](uint64_t Offset) -> DWARFUnit * { - if (DWARFUnit *U = TypeUnitVector.getUnitForOffset(Offset)) - return U; - if (DWARFUnit *U = CompileUnitVector.getUnitForOffset(Offset)) - return U; - return nullptr; - }); return NumDebugInfoErrors; } @@ -403,13 +380,16 @@ bool DWARFVerifier::handleDebugInfo() { OS << "Verifying .debug_info Unit Header Chain...\n"; DObj.forEachInfoSections([&](const DWARFSection &S) { - NumErrors += verifyUnitSection(S, DW_SECT_INFO); + NumErrors += verifyUnitSection(S); }); OS << "Verifying .debug_types Unit Header Chain...\n"; DObj.forEachTypesSections([&](const DWARFSection &S) { - NumErrors += verifyUnitSection(S, DW_SECT_EXT_TYPES); + NumErrors += verifyUnitSection(S); }); + + OS << "Verifying non-dwo Units...\n"; + NumErrors += verifyUnits(DCtx.getNormalUnitsVector()); return NumErrors == 0; } diff --git a/llvm/lib/Demangle/DLangDemangle.cpp b/llvm/lib/Demangle/DLangDemangle.cpp index d2f1bf4323ee..f380aa90035e 100644 --- a/llvm/lib/Demangle/DLangDemangle.cpp +++ b/llvm/lib/Demangle/DLangDemangle.cpp @@ -14,12 +14,250 @@ //===----------------------------------------------------------------------===// #include "llvm/Demangle/Demangle.h" +#include "llvm/Demangle/StringView.h" #include "llvm/Demangle/Utility.h" +#include <cctype> #include <cstring> +#include <limits> using namespace llvm; using llvm::itanium_demangle::OutputBuffer; +using llvm::itanium_demangle::StringView; + +namespace { + +/// Demangle information structure. +struct Demangler { + /// Initialize the information structure we use to pass around information. + /// + /// \param Mangled String to demangle. + Demangler(const char *Mangled); + + /// Extract and demangle the mangled symbol and append it to the output + /// string. + /// + /// \param Demangled Output buffer to write the demangled name. + /// + /// \return The remaining string on success or nullptr on failure. + /// + /// \see https://dlang.org/spec/abi.html#name_mangling . + /// \see https://dlang.org/spec/abi.html#MangledName . + const char *parseMangle(OutputBuffer *Demangled); + +private: + /// Extract and demangle a given mangled symbol and append it to the output + /// string. + /// + /// \param Demangled output buffer to write the demangled name. + /// \param Mangled mangled symbol to be demangled. + /// + /// \return The remaining string on success or nullptr on failure. + /// + /// \see https://dlang.org/spec/abi.html#name_mangling . + /// \see https://dlang.org/spec/abi.html#MangledName . + const char *parseMangle(OutputBuffer *Demangled, const char *Mangled); + + /// Extract the number from a given string. + /// + /// \param Mangled string to extract the number. + /// \param Ret assigned result value. + /// + /// \return The remaining string on success or nullptr on failure. + /// + /// \note A result larger than UINT_MAX is considered a failure. + /// + /// \see https://dlang.org/spec/abi.html#Number . + const char *decodeNumber(const char *Mangled, unsigned long *Ret); + + /// Check whether it is the beginning of a symbol name. + /// + /// \param Mangled string to extract the symbol name. + /// + /// \return true on success, false otherwise. + /// + /// \see https://dlang.org/spec/abi.html#SymbolName . + bool isSymbolName(const char *Mangled); + + /// Extract and demangle an identifier from a given mangled symbol append it + /// to the output string. + /// + /// \param Demangled Output buffer to write the demangled name. + /// \param Mangled Mangled symbol to be demangled. + /// + /// \return The remaining string on success or nullptr on failure. + /// + /// \see https://dlang.org/spec/abi.html#SymbolName . + const char *parseIdentifier(OutputBuffer *Demangled, const char *Mangled); + + /// Extract and demangle the plain identifier from a given mangled symbol and + /// prepend/append it to the output string, with a special treatment for some + /// magic compiler generated symbols. + /// + /// \param Demangled Output buffer to write the demangled name. + /// \param Mangled Mangled symbol to be demangled. + /// \param Len Length of the mangled symbol name. + /// + /// \return The remaining string on success or nullptr on failure. + /// + /// \see https://dlang.org/spec/abi.html#LName . + const char *parseLName(OutputBuffer *Demangled, const char *Mangled, + unsigned long Len); + + /// Extract and demangle the qualified symbol from a given mangled symbol + /// append it to the output string. + /// + /// \param Demangled Output buffer to write the demangled name. + /// \param Mangled Mangled symbol to be demangled. + /// + /// \return The remaining string on success or nullptr on failure. + /// + /// \see https://dlang.org/spec/abi.html#QualifiedName . + const char *parseQualified(OutputBuffer *Demangled, const char *Mangled); + + /// The string we are demangling. + const char *Str; +}; + +} // namespace + +const char *Demangler::decodeNumber(const char *Mangled, unsigned long *Ret) { + // Return nullptr if trying to extract something that isn't a digit. + if (Mangled == nullptr || !std::isdigit(*Mangled)) + return nullptr; + + unsigned long Val = 0; + + do { + unsigned long Digit = Mangled[0] - '0'; + + // Check for overflow. + if (Val > (std::numeric_limits<unsigned int>::max() - Digit) / 10) + return nullptr; + + Val = Val * 10 + Digit; + ++Mangled; + } while (std::isdigit(*Mangled)); + + if (*Mangled == '\0') + return nullptr; + + *Ret = Val; + return Mangled; +} + +bool Demangler::isSymbolName(const char *Mangled) { + if (std::isdigit(*Mangled)) + return true; + + // TODO: Handle symbol back references and template instances. + return false; +} + +const char *Demangler::parseMangle(OutputBuffer *Demangled, + const char *Mangled) { + // A D mangled symbol is comprised of both scope and type information. + // MangleName: + // _D QualifiedName Type + // _D QualifiedName Z + // ^ + // The caller should have guaranteed that the start pointer is at the + // above location. + // Note that type is never a function type, but only the return type of + // a function or the type of a variable. + Mangled += 2; + + Mangled = parseQualified(Demangled, Mangled); + + if (Mangled != nullptr) { + // Artificial symbols end with 'Z' and have no type. + if (*Mangled == 'Z') + ++Mangled; + else { + // TODO: Implement symbols with types. + return nullptr; + } + } + + return Mangled; +} + +const char *Demangler::parseQualified(OutputBuffer *Demangled, + const char *Mangled) { + // Qualified names are identifiers separated by their encoded length. + // Nested functions also encode their argument types without specifying + // what they return. + // QualifiedName: + // SymbolFunctionName + // SymbolFunctionName QualifiedName + // ^ + // SymbolFunctionName: + // SymbolName + // SymbolName TypeFunctionNoReturn + // SymbolName M TypeFunctionNoReturn + // SymbolName M TypeModifiers TypeFunctionNoReturn + // The start pointer should be at the above location. + + // Whether it has more than one symbol + size_t NotFirst = false; + do { + // Skip over anonymous symbols. + if (*Mangled == '0') { + do + ++Mangled; + while (*Mangled == '0'); + + continue; + } + + if (NotFirst) + *Demangled << '.'; + NotFirst = true; + + Mangled = parseIdentifier(Demangled, Mangled); + + } while (Mangled && isSymbolName(Mangled)); + + return Mangled; +} + +const char *Demangler::parseIdentifier(OutputBuffer *Demangled, + const char *Mangled) { + unsigned long Len; + + if (Mangled == nullptr || *Mangled == '\0') + return nullptr; + + // TODO: Parse back references and lengthless template instances. + + const char *Endptr = decodeNumber(Mangled, &Len); + + if (Endptr == nullptr || Len == 0) + return nullptr; + + if (strlen(Endptr) < Len) + return nullptr; + + Mangled = Endptr; + + // TODO: Parse template instances with a length prefix. + + return parseLName(Demangled, Mangled, Len); +} + +const char *Demangler::parseLName(OutputBuffer *Demangled, const char *Mangled, + unsigned long Len) { + *Demangled << StringView(Mangled, Len); + Mangled += Len; + + return Mangled; +} + +Demangler::Demangler(const char *Mangled) : Str(Mangled) {} + +const char *Demangler::parseMangle(OutputBuffer *Demangled) { + return parseMangle(Demangled, this->Str); +} char *llvm::dlangDemangle(const char *MangledName) { if (MangledName == nullptr || strncmp(MangledName, "_D", 2) != 0) @@ -29,8 +267,19 @@ char *llvm::dlangDemangle(const char *MangledName) { if (!initializeOutputBuffer(nullptr, nullptr, Demangled, 1024)) return nullptr; - if (strcmp(MangledName, "_Dmain") == 0) + if (strcmp(MangledName, "_Dmain") == 0) { Demangled << "D main"; + } else { + + Demangler D = Demangler(MangledName); + MangledName = D.parseMangle(&Demangled); + + // Check that the entire symbol was successfully demangled. + if (MangledName == nullptr || *MangledName != '\0') { + std::free(Demangled.getBuffer()); + return nullptr; + } + } // OutputBuffer's internal buffer is not null terminated and therefore we need // to add it to comply with C null terminated strings. @@ -40,6 +289,6 @@ char *llvm::dlangDemangle(const char *MangledName) { return Demangled.getBuffer(); } - free(Demangled.getBuffer()); + std::free(Demangled.getBuffer()); return nullptr; } diff --git a/llvm/lib/ExecutionEngine/ExecutionEngine.cpp b/llvm/lib/ExecutionEngine/ExecutionEngine.cpp index fe3c433bd2c5..a14bd4d2c3fd 100644 --- a/llvm/lib/ExecutionEngine/ExecutionEngine.cpp +++ b/llvm/lib/ExecutionEngine/ExecutionEngine.cpp @@ -1256,8 +1256,7 @@ void ExecutionEngine::emitGlobals() { // If there are multiple modules, map the non-canonical globals to their // canonical location. if (!NonCanonicalGlobals.empty()) { - for (unsigned i = 0, e = NonCanonicalGlobals.size(); i != e; ++i) { - const GlobalValue *GV = NonCanonicalGlobals[i]; + for (const GlobalValue *GV : NonCanonicalGlobals) { const GlobalValue *CGV = LinkedGlobalsMap[std::make_pair( std::string(GV->getName()), GV->getType())]; void *Ptr = getPointerToGlobalIfAvailable(CGV); diff --git a/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h b/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h index fdc987751286..f9101d71dfa8 100644 --- a/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h +++ b/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h @@ -143,6 +143,9 @@ protected: // Only SHF_ALLOC sections will have graph sections. DenseMap<ELFSectionIndex, Section *> GraphSections; DenseMap<ELFSymbolIndex, Symbol *> GraphSymbols; + DenseMap<const typename ELFFile::Elf_Shdr *, + ArrayRef<typename ELFFile::Elf_Word>> + ShndxTables; }; template <typename ELFT> @@ -241,7 +244,7 @@ template <typename ELFT> Error ELFLinkGraphBuilder<ELFT>::prepare() { return SectionStringTabOrErr.takeError(); // Get the SHT_SYMTAB section. - for (auto &Sec : Sections) + for (auto &Sec : Sections) { if (Sec.sh_type == ELF::SHT_SYMTAB) { if (!SymTabSec) SymTabSec = &Sec; @@ -250,6 +253,20 @@ template <typename ELFT> Error ELFLinkGraphBuilder<ELFT>::prepare() { G->getName()); } + // Extended table. + if (Sec.sh_type == ELF::SHT_SYMTAB_SHNDX) { + uint32_t SymtabNdx = Sec.sh_link; + if (SymtabNdx >= Sections.size()) + return make_error<JITLinkError>("sh_link is out of bound"); + + auto ShndxTable = Obj.getSHNDXTable(Sec); + if (!ShndxTable) + return ShndxTable.takeError(); + + ShndxTables.insert({&Sections[SymtabNdx], *ShndxTable}); + } + } + return Error::success(); } @@ -299,11 +316,6 @@ template <typename ELFT> Error ELFLinkGraphBuilder<ELFT>::graphifySections() { else Prot = MemProt::Read | MemProt::Write; - // For now we just use this to skip the "undefined" section, probably need - // to revist. - if (Sec.sh_size == 0) - continue; - auto &GraphSec = G->createSection(*Name, Prot); if (Sec.sh_type != ELF::SHT_NOBITS) { auto Data = Obj.template getSectionContentsAsArray<char>(Sec); @@ -401,9 +413,19 @@ template <typename ELFT> Error ELFLinkGraphBuilder<ELFT>::graphifySymbols() { (Sym.getType() == ELF::STT_NOTYPE || Sym.getType() == ELF::STT_FUNC || Sym.getType() == ELF::STT_OBJECT || Sym.getType() == ELF::STT_SECTION || Sym.getType() == ELF::STT_TLS)) { - - // FIXME: Handle extended tables. - if (auto *GraphSec = getGraphSection(Sym.st_shndx)) { + // Handle extended tables. + unsigned Shndx = Sym.st_shndx; + if (Shndx == ELF::SHN_XINDEX) { + auto ShndxTable = ShndxTables.find(SymTabSec); + if (ShndxTable == ShndxTables.end()) + continue; + auto NdxOrErr = object::getExtendedSymbolTableIndex<ELFT>( + Sym, SymIndex, ShndxTable->second); + if (!NdxOrErr) + return NdxOrErr.takeError(); + Shndx = *NdxOrErr; + } + if (auto *GraphSec = getGraphSection(Shndx)) { Block *B = nullptr; { auto Blocks = GraphSec->blocks(); diff --git a/llvm/lib/ExecutionEngine/Orc/Core.cpp b/llvm/lib/ExecutionEngine/Orc/Core.cpp index 6b24d6461b63..56a97f83d915 100644 --- a/llvm/lib/ExecutionEngine/Orc/Core.cpp +++ b/llvm/lib/ExecutionEngine/Orc/Core.cpp @@ -612,9 +612,14 @@ void LookupState::continueLookup(Error Err) { DefinitionGenerator::~DefinitionGenerator() {} +JITDylib::~JITDylib() { + LLVM_DEBUG(dbgs() << "Destroying JITDylib " << getName() << "\n"); +} + Error JITDylib::clear() { std::vector<ResourceTrackerSP> TrackersToRemove; ES.runSessionLocked([&]() { + assert(State != Closed && "JD is defunct"); for (auto &KV : TrackerSymbols) TrackersToRemove.push_back(KV.first); TrackersToRemove.push_back(getDefaultResourceTracker()); @@ -628,6 +633,7 @@ Error JITDylib::clear() { ResourceTrackerSP JITDylib::getDefaultResourceTracker() { return ES.runSessionLocked([this] { + assert(State != Closed && "JD is defunct"); if (!DefaultTracker) DefaultTracker = new ResourceTracker(this); return DefaultTracker; @@ -636,19 +642,22 @@ ResourceTrackerSP JITDylib::getDefaultResourceTracker() { ResourceTrackerSP JITDylib::createResourceTracker() { return ES.runSessionLocked([this] { + assert(State == Open && "JD is defunct"); ResourceTrackerSP RT = new ResourceTracker(this); return RT; }); } void JITDylib::removeGenerator(DefinitionGenerator &G) { - std::lock_guard<std::mutex> Lock(GeneratorsMutex); - auto I = llvm::find_if(DefGenerators, - [&](const std::shared_ptr<DefinitionGenerator> &H) { - return H.get() == &G; - }); - assert(I != DefGenerators.end() && "Generator not found"); - DefGenerators.erase(I); + ES.runSessionLocked([&] { + assert(State == Open && "JD is defunct"); + auto I = llvm::find_if(DefGenerators, + [&](const std::shared_ptr<DefinitionGenerator> &H) { + return H.get() == &G; + }); + assert(I != DefGenerators.end() && "Generator not found"); + DefGenerators.erase(I); + }); } Expected<SymbolFlagsMap> @@ -708,10 +717,8 @@ Error JITDylib::replace(MaterializationResponsibility &FromMR, auto Err = ES.runSessionLocked([&, this]() -> Error { - auto RT = getTracker(FromMR); - - if (RT->isDefunct()) - return make_error<ResourceTrackerDefunct>(std::move(RT)); + if (FromMR.RT->isDefunct()) + return make_error<ResourceTrackerDefunct>(std::move(FromMR.RT)); #ifndef NDEBUG for (auto &KV : MU->getSymbols()) { @@ -735,7 +742,8 @@ Error JITDylib::replace(MaterializationResponsibility &FromMR, if (MII != MaterializingInfos.end()) { if (MII->second.hasQueriesPending()) { MustRunMR = ES.createMaterializationResponsibility( - *RT, std::move(MU->SymbolFlags), std::move(MU->InitSymbol)); + *FromMR.RT, std::move(MU->SymbolFlags), + std::move(MU->InitSymbol)); MustRunMU = std::move(MU); return Error::success(); } @@ -743,10 +751,8 @@ Error JITDylib::replace(MaterializationResponsibility &FromMR, } // Otherwise, make MU responsible for all the symbols. - auto RTI = MRTrackers.find(&FromMR); - assert(RTI != MRTrackers.end() && "No tracker for FromMR"); - auto UMI = - std::make_shared<UnmaterializedInfo>(std::move(MU), RTI->second); + auto UMI = std::make_shared<UnmaterializedInfo>(std::move(MU), + FromMR.RT.get()); for (auto &KV : UMI->MU->getSymbols()) { auto SymI = Symbols.find(KV.first); assert(SymI->second.getState() == SymbolState::Materializing && @@ -787,13 +793,11 @@ JITDylib::delegate(MaterializationResponsibility &FromMR, return ES.runSessionLocked( [&]() -> Expected<std::unique_ptr<MaterializationResponsibility>> { - auto RT = getTracker(FromMR); - - if (RT->isDefunct()) - return make_error<ResourceTrackerDefunct>(std::move(RT)); + if (FromMR.RT->isDefunct()) + return make_error<ResourceTrackerDefunct>(std::move(FromMR.RT)); return ES.createMaterializationResponsibility( - *RT, std::move(SymbolFlags), std::move(InitSymbol)); + *FromMR.RT, std::move(SymbolFlags), std::move(InitSymbol)); }); } @@ -903,10 +907,13 @@ Error JITDylib::resolve(MaterializationResponsibility &MR, AsynchronousSymbolQuerySet CompletedQueries; if (auto Err = ES.runSessionLocked([&, this]() -> Error { - auto RTI = MRTrackers.find(&MR); - assert(RTI != MRTrackers.end() && "No resource tracker for MR?"); - if (RTI->second->isDefunct()) - return make_error<ResourceTrackerDefunct>(RTI->second); + if (MR.RT->isDefunct()) + return make_error<ResourceTrackerDefunct>(MR.RT); + + if (State != Open) + return make_error<StringError>("JITDylib " + getName() + + " is defunct", + inconvertibleErrorCode()); struct WorklistEntry { SymbolTable::iterator SymI; @@ -1001,10 +1008,13 @@ Error JITDylib::emit(MaterializationResponsibility &MR, DenseMap<JITDylib *, SymbolNameVector> ReadySymbols; if (auto Err = ES.runSessionLocked([&, this]() -> Error { - auto RTI = MRTrackers.find(&MR); - assert(RTI != MRTrackers.end() && "No resource tracker for MR?"); - if (RTI->second->isDefunct()) - return make_error<ResourceTrackerDefunct>(RTI->second); + if (MR.RT->isDefunct()) + return make_error<ResourceTrackerDefunct>(MR.RT); + + if (State != Open) + return make_error<StringError>("JITDylib " + getName() + + " is defunct", + inconvertibleErrorCode()); SymbolNameSet SymbolsInErrorState; std::vector<SymbolTable::iterator> Worklist; @@ -1149,9 +1159,12 @@ Error JITDylib::emit(MaterializationResponsibility &MR, void JITDylib::unlinkMaterializationResponsibility( MaterializationResponsibility &MR) { ES.runSessionLocked([&]() { - auto I = MRTrackers.find(&MR); - assert(I != MRTrackers.end() && "MaterializationResponsibility not linked"); - MRTrackers.erase(I); + auto I = TrackerMRs.find(MR.RT.get()); + assert(I != TrackerMRs.end() && "No MRs in TrackerMRs list for RT"); + assert(I->second.count(&MR) && "MR not in TrackerMRs list for RT"); + I->second.erase(&MR); + if (I->second.empty()) + TrackerMRs.erase(MR.RT.get()); }); } @@ -1169,8 +1182,16 @@ JITDylib::failSymbols(FailedSymbolsWorklist Worklist) { (*FailedSymbolsMap)[&JD].insert(Name); - assert(JD.Symbols.count(Name) && "No symbol table entry for Name"); - auto &Sym = JD.Symbols[Name]; + // Look up the symbol to fail. + auto SymI = JD.Symbols.find(Name); + + // It's possible that this symbol has already been removed, e.g. if a + // materialization failure happens concurrently with a ResourceTracker or + // JITDylib removal. In that case we can safely skip this symbol and + // continue. + if (SymI == JD.Symbols.end()) + continue; + auto &Sym = SymI->second; // Move the symbol into the error state. // Note that this may be redundant: The symbol might already have been @@ -1267,6 +1288,7 @@ JITDylib::failSymbols(FailedSymbolsWorklist Worklist) { void JITDylib::setLinkOrder(JITDylibSearchOrder NewLinkOrder, bool LinkAgainstThisJITDylibFirst) { ES.runSessionLocked([&]() { + assert(State == Open && "JD is defunct"); if (LinkAgainstThisJITDylibFirst) { LinkOrder.clear(); if (NewLinkOrder.empty() || NewLinkOrder.front().first != this) @@ -1285,6 +1307,7 @@ void JITDylib::addToLinkOrder(JITDylib &JD, JITDylibLookupFlags JDLookupFlags) { void JITDylib::replaceInLinkOrder(JITDylib &OldJD, JITDylib &NewJD, JITDylibLookupFlags JDLookupFlags) { ES.runSessionLocked([&]() { + assert(State == Open && "JD is defunct"); for (auto &KV : LinkOrder) if (KV.first == &OldJD) { KV = {&NewJD, JDLookupFlags}; @@ -1295,6 +1318,7 @@ void JITDylib::replaceInLinkOrder(JITDylib &OldJD, JITDylib &NewJD, void JITDylib::removeFromLinkOrder(JITDylib &JD) { ES.runSessionLocked([&]() { + assert(State == Open && "JD is defunct"); auto I = llvm::find_if(LinkOrder, [&](const JITDylibSearchOrder::value_type &KV) { return KV.first == &JD; @@ -1306,6 +1330,7 @@ void JITDylib::removeFromLinkOrder(JITDylib &JD) { Error JITDylib::remove(const SymbolNameSet &Names) { return ES.runSessionLocked([&]() -> Error { + assert(State == Open && "JD is defunct"); using SymbolMaterializerItrPair = std::pair<SymbolTable::iterator, UnmaterializedInfosMap::iterator>; std::vector<SymbolMaterializerItrPair> SymbolsToRemove; @@ -1365,8 +1390,23 @@ Error JITDylib::remove(const SymbolNameSet &Names) { void JITDylib::dump(raw_ostream &OS) { ES.runSessionLocked([&, this]() { OS << "JITDylib \"" << getName() << "\" (ES: " - << format("0x%016" PRIx64, reinterpret_cast<uintptr_t>(&ES)) << "):\n" - << "Link order: " << LinkOrder << "\n" + << format("0x%016" PRIx64, reinterpret_cast<uintptr_t>(&ES)) + << ", State = "; + switch (State) { + case Open: + OS << "Open"; + break; + case Closing: + OS << "Closing"; + break; + case Closed: + OS << "Closed"; + break; + } + OS << ")\n"; + if (State == Closed) + return; + OS << "Link order: " << LinkOrder << "\n" << "Symbol table:\n"; for (auto &KV : Symbols) { @@ -1454,17 +1494,11 @@ JITDylib::JITDylib(ExecutionSession &ES, std::string Name) LinkOrder.push_back({this, JITDylibLookupFlags::MatchAllSymbols}); } -ResourceTrackerSP JITDylib::getTracker(MaterializationResponsibility &MR) { - auto I = MRTrackers.find(&MR); - assert(I != MRTrackers.end() && "MR is not linked"); - assert(I->second && "Linked tracker is null"); - return I->second; -} - std::pair<JITDylib::AsynchronousSymbolQuerySet, std::shared_ptr<SymbolDependenceMap>> JITDylib::removeTracker(ResourceTracker &RT) { // Note: Should be called under the session lock. + assert(State != Closed && "JD is defunct"); SymbolNameVector SymbolsToRemove; std::vector<std::pair<JITDylib *, SymbolStringPtr>> SymbolsToFail; @@ -1525,6 +1559,7 @@ JITDylib::removeTracker(ResourceTracker &RT) { } void JITDylib::transferTracker(ResourceTracker &DstRT, ResourceTracker &SrcRT) { + assert(State != Closed && "JD is defunct"); assert(&DstRT != &SrcRT && "No-op transfers shouldn't call transferTracker"); assert(&DstRT.getJITDylib() == this && "DstRT is not for this JITDylib"); assert(&SrcRT.getJITDylib() == this && "SrcRT is not for this JITDylib"); @@ -1536,9 +1571,22 @@ void JITDylib::transferTracker(ResourceTracker &DstRT, ResourceTracker &SrcRT) { } // Update trackers for any active materialization responsibilities. - for (auto &KV : MRTrackers) { - if (KV.second == &SrcRT) - KV.second = &DstRT; + { + auto I = TrackerMRs.find(&SrcRT); + if (I != TrackerMRs.end()) { + auto &SrcMRs = I->second; + auto &DstMRs = TrackerMRs[&DstRT]; + for (auto *MR : SrcMRs) + MR->RT = &DstRT; + if (DstMRs.empty()) + DstMRs = std::move(SrcMRs); + else + for (auto *MR : SrcMRs) + DstMRs.insert(MR); + // Erase SrcRT entry in TrackerMRs. Use &SrcRT key rather than iterator I + // for this, since I may have been invalidated by 'TrackerMRs[&DstRT]'. + TrackerMRs.erase(&SrcRT); + } } // If we're transfering to the default tracker we just need to delete the @@ -1872,6 +1920,40 @@ Expected<JITDylib &> ExecutionSession::createJITDylib(std::string Name) { return JD; } +Error ExecutionSession::removeJITDylib(JITDylib &JD) { + // Keep JD alive throughout this routine, even if all other references + // have been dropped. + JITDylibSP JDKeepAlive = &JD; + + // Set JD to 'Closing' state and remove JD from the ExecutionSession. + runSessionLocked([&] { + assert(JD.State == JITDylib::Open && "JD already closed"); + JD.State = JITDylib::Closing; + auto I = llvm::find(JDs, &JD); + assert(I != JDs.end() && "JD does not appear in session JDs"); + JDs.erase(I); + }); + + // Clear the JITDylib. + auto Err = JD.clear(); + + // Set JD to closed state. Clear remaining data structures. + runSessionLocked([&] { + assert(JD.State == JITDylib::Closing && "JD should be closing"); + JD.State = JITDylib::Closed; + assert(JD.Symbols.empty() && "JD.Symbols is not empty after clear"); + assert(JD.UnmaterializedInfos.empty() && + "JD.UnmaterializedInfos is not empty after clear"); + assert(JD.MaterializingInfos.empty() && + "JD.MaterializingInfos is not empty after clear"); + assert(JD.TrackerSymbols.empty() && + "TrackerSymbols is not empty after clear"); + JD.DefGenerators.clear(); + JD.LinkOrder.clear(); + }); + return Err; +} + std::vector<JITDylibSP> JITDylib::getDFSLinkOrder(ArrayRef<JITDylibSP> JDs) { if (JDs.empty()) return {}; @@ -1883,6 +1965,8 @@ std::vector<JITDylibSP> JITDylib::getDFSLinkOrder(ArrayRef<JITDylibSP> JDs) { for (auto &JD : JDs) { + assert(JD->State == Open && "JD is defunct"); + if (Visited.count(JD.get())) continue; @@ -2311,8 +2395,11 @@ void ExecutionSession::OL_applyQueryPhase1( }); // Build the definition generator stack for this JITDylib. - for (auto &DG : reverse(JD.DefGenerators)) - IPLS->CurDefGeneratorStack.push_back(DG); + runSessionLocked([&] { + IPLS->CurDefGeneratorStack.reserve(JD.DefGenerators.size()); + for (auto &DG : reverse(JD.DefGenerators)) + IPLS->CurDefGeneratorStack.push_back(DG); + }); // Flag that we've done our initialization. IPLS->NewJITDylib = false; @@ -2629,17 +2716,15 @@ void ExecutionSession::OL_completeLookup( LLVM_DEBUG(dbgs() << "Adding MUs to dispatch:\n"); for (auto &KV : CollectedUMIs) { - auto &JD = *KV.first; LLVM_DEBUG({ + auto &JD = *KV.first; dbgs() << " For " << JD.getName() << ": Adding " << KV.second.size() << " MUs.\n"; }); for (auto &UMI : KV.second) { - std::unique_ptr<MaterializationResponsibility> MR( - new MaterializationResponsibility( - &JD, std::move(UMI->MU->SymbolFlags), - std::move(UMI->MU->InitSymbol))); - JD.MRTrackers[MR.get()] = UMI->RT; + auto MR = createMaterializationResponsibility( + *UMI->RT, std::move(UMI->MU->SymbolFlags), + std::move(UMI->MU->InitSymbol)); OutstandingMUs.push_back( std::make_pair(std::move(UMI->MU), std::move(MR))); } @@ -2757,18 +2842,18 @@ void ExecutionSession::OL_destroyMaterializationResponsibility( assert(MR.SymbolFlags.empty() && "All symbols should have been explicitly materialized or failed"); - MR.JD->unlinkMaterializationResponsibility(MR); + MR.JD.unlinkMaterializationResponsibility(MR); } SymbolNameSet ExecutionSession::OL_getRequestedSymbols( const MaterializationResponsibility &MR) { - return MR.JD->getRequestedSymbols(MR.SymbolFlags); + return MR.JD.getRequestedSymbols(MR.SymbolFlags); } Error ExecutionSession::OL_notifyResolved(MaterializationResponsibility &MR, const SymbolMap &Symbols) { LLVM_DEBUG({ - dbgs() << "In " << MR.JD->getName() << " resolving " << Symbols << "\n"; + dbgs() << "In " << MR.JD.getName() << " resolving " << Symbols << "\n"; }); #ifndef NDEBUG for (auto &KV : Symbols) { @@ -2783,15 +2868,16 @@ Error ExecutionSession::OL_notifyResolved(MaterializationResponsibility &MR, } #endif - return MR.JD->resolve(MR, Symbols); + return MR.JD.resolve(MR, Symbols); } Error ExecutionSession::OL_notifyEmitted(MaterializationResponsibility &MR) { LLVM_DEBUG({ - dbgs() << "In " << MR.JD->getName() << " emitting " << MR.SymbolFlags << "\n"; + dbgs() << "In " << MR.JD.getName() << " emitting " << MR.SymbolFlags + << "\n"; }); - if (auto Err = MR.JD->emit(MR, MR.SymbolFlags)) + if (auto Err = MR.JD.emit(MR, MR.SymbolFlags)) return Err; MR.SymbolFlags.clear(); @@ -2802,10 +2888,11 @@ Error ExecutionSession::OL_defineMaterializing( MaterializationResponsibility &MR, SymbolFlagsMap NewSymbolFlags) { LLVM_DEBUG({ - dbgs() << "In " << MR.JD->getName() << " defining materializing symbols " + dbgs() << "In " << MR.JD.getName() << " defining materializing symbols " << NewSymbolFlags << "\n"; }); - if (auto AcceptedDefs = MR.JD->defineMaterializing(std::move(NewSymbolFlags))) { + if (auto AcceptedDefs = + MR.JD.defineMaterializing(std::move(NewSymbolFlags))) { // Add all newly accepted symbols to this responsibility object. for (auto &KV : *AcceptedDefs) MR.SymbolFlags.insert(KV); @@ -2817,14 +2904,14 @@ Error ExecutionSession::OL_defineMaterializing( void ExecutionSession::OL_notifyFailed(MaterializationResponsibility &MR) { LLVM_DEBUG({ - dbgs() << "In " << MR.JD->getName() << " failing materialization for " + dbgs() << "In " << MR.JD.getName() << " failing materialization for " << MR.SymbolFlags << "\n"; }); JITDylib::FailedSymbolsWorklist Worklist; for (auto &KV : MR.SymbolFlags) - Worklist.push_back(std::make_pair(MR.JD.get(), KV.first)); + Worklist.push_back(std::make_pair(&MR.JD, KV.first)); MR.SymbolFlags.clear(); if (Worklist.empty()) @@ -2834,9 +2921,8 @@ void ExecutionSession::OL_notifyFailed(MaterializationResponsibility &MR) { std::shared_ptr<SymbolDependenceMap> FailedSymbols; runSessionLocked([&]() { - auto RTI = MR.JD->MRTrackers.find(&MR); - assert(RTI != MR.JD->MRTrackers.end() && "No tracker for this"); - if (RTI->second->isDefunct()) + // If the tracker is defunct then there's nothing to do here. + if (MR.RT->isDefunct()) return; std::tie(FailedQueries, FailedSymbols) = @@ -2858,12 +2944,12 @@ Error ExecutionSession::OL_replace(MaterializationResponsibility &MR, if (MU->getInitializerSymbol() == MR.InitSymbol) MR.InitSymbol = nullptr; - LLVM_DEBUG(MR.JD->getExecutionSession().runSessionLocked([&]() { - dbgs() << "In " << MR.JD->getName() << " replacing symbols with " << *MU + LLVM_DEBUG(MR.JD.getExecutionSession().runSessionLocked([&]() { + dbgs() << "In " << MR.JD.getName() << " replacing symbols with " << *MU << "\n"; });); - return MR.JD->replace(MR, std::move(MU)); + return MR.JD.replace(MR, std::move(MU)); } Expected<std::unique_ptr<MaterializationResponsibility>> @@ -2886,8 +2972,8 @@ ExecutionSession::OL_delegate(MaterializationResponsibility &MR, MR.SymbolFlags.erase(I); } - return MR.JD->delegate(MR, std::move(DelegatedFlags), - std::move(DelegatedInitSymbol)); + return MR.JD.delegate(MR, std::move(DelegatedFlags), + std::move(DelegatedInitSymbol)); } void ExecutionSession::OL_addDependencies( @@ -2899,7 +2985,7 @@ void ExecutionSession::OL_addDependencies( }); assert(MR.SymbolFlags.count(Name) && "Symbol not covered by this MaterializationResponsibility instance"); - MR.JD->addDependencies(Name, Dependencies); + MR.JD.addDependencies(Name, Dependencies); } void ExecutionSession::OL_addDependenciesForAll( @@ -2910,7 +2996,7 @@ void ExecutionSession::OL_addDependenciesForAll( << Dependencies << "\n"; }); for (auto &KV : MR.SymbolFlags) - MR.JD->addDependencies(KV.first, Dependencies); + MR.JD.addDependencies(KV.first, Dependencies); } #ifndef NDEBUG diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp index 1b7fdb588275..0de76ab78e0f 100644 --- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp +++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp @@ -1301,7 +1301,7 @@ RuntimeDyldELF::processRelocationRef( MemMgr.allowStubAllocation()) { resolveAArch64Branch(SectionID, Value, RelI, Stubs); } else if (RelType == ELF::R_AARCH64_ADR_GOT_PAGE) { - // Craete new GOT entry or find existing one. If GOT entry is + // Create new GOT entry or find existing one. If GOT entry is // to be created, then we also emit ABS64 relocation for it. uint64_t GOTOffset = findOrAllocGOTEntry(Value, ELF::R_AARCH64_ABS64); resolveGOTOffsetRelocation(SectionID, Offset, GOTOffset + Addend, diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index ce998df757ec..18f1a2314853 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -993,6 +993,8 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSections( Value *ST = ConstantInt::get(I32Ty, 1); llvm::CanonicalLoopInfo *LoopInfo = createCanonicalLoop( Loc, LoopBodyGenCB, LB, UB, ST, true, false, AllocaIP, "section_loop"); + Builder.SetInsertPoint(AllocaIP.getBlock()->getTerminator()); + AllocaIP = Builder.saveIP(); InsertPointTy AfterIP = applyStaticWorkshareLoop(Loc.DL, LoopInfo, AllocaIP, true); BasicBlock *LoopAfterBB = AfterIP.getBlock(); diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp index 7734c0a8de58..c9748e1387eb 100644 --- a/llvm/lib/IR/AsmWriter.cpp +++ b/llvm/lib/IR/AsmWriter.cpp @@ -353,12 +353,11 @@ void llvm::printLLVMNameWithoutPrefix(raw_ostream &OS, StringRef Name) { // Scan the name to see if it needs quotes first. bool NeedsQuotes = isdigit(static_cast<unsigned char>(Name[0])); if (!NeedsQuotes) { - for (unsigned i = 0, e = Name.size(); i != e; ++i) { + for (unsigned char C : Name) { // By making this unsigned, the value passed in to isalnum will always be // in the range 0-255. This is important when building with MSVC because // its implementation will assert. This situation can arise when dealing // with UTF-8 multibyte characters. - unsigned char C = Name[i]; if (!isalnum(static_cast<unsigned char>(C)) && C != '-' && C != '.' && C != '_') { NeedsQuotes = true; @@ -1309,27 +1308,8 @@ static void WriteAsOperandInternal(raw_ostream &Out, const Metadata *MD, bool FromValue = false); static void WriteOptimizationInfo(raw_ostream &Out, const User *U) { - if (const FPMathOperator *FPO = dyn_cast<const FPMathOperator>(U)) { - // 'Fast' is an abbreviation for all fast-math-flags. - if (FPO->isFast()) - Out << " fast"; - else { - if (FPO->hasAllowReassoc()) - Out << " reassoc"; - if (FPO->hasNoNaNs()) - Out << " nnan"; - if (FPO->hasNoInfs()) - Out << " ninf"; - if (FPO->hasNoSignedZeros()) - Out << " nsz"; - if (FPO->hasAllowReciprocal()) - Out << " arcp"; - if (FPO->hasAllowContract()) - Out << " contract"; - if (FPO->hasApproxFunc()) - Out << " afn"; - } - } + if (const FPMathOperator *FPO = dyn_cast<const FPMathOperator>(U)) + Out << FPO->getFastMathFlags(); if (const OverflowingBinaryOperator *OBO = dyn_cast<OverflowingBinaryOperator>(U)) { diff --git a/llvm/lib/IR/Core.cpp b/llvm/lib/IR/Core.cpp index 905372982dc2..2c396ae97499 100644 --- a/llvm/lib/IR/Core.cpp +++ b/llvm/lib/IR/Core.cpp @@ -2266,6 +2266,14 @@ LLVMValueRef LLVMAddAlias(LLVMModuleRef M, LLVMTypeRef Ty, LLVMValueRef Aliasee, unwrap<Constant>(Aliasee), unwrap(M))); } +LLVMValueRef LLVMAddAlias2(LLVMModuleRef M, LLVMTypeRef ValueTy, + unsigned AddrSpace, LLVMValueRef Aliasee, + const char *Name) { + return wrap(GlobalAlias::create(unwrap(ValueTy), AddrSpace, + GlobalValue::ExternalLinkage, Name, + unwrap<Constant>(Aliasee), unwrap(M))); +} + LLVMValueRef LLVMGetNamedGlobalAlias(LLVMModuleRef M, const char *Name, size_t NameLen) { return wrap(unwrap(M)->getNamedAlias(Name)); diff --git a/llvm/lib/IR/DIBuilder.cpp b/llvm/lib/IR/DIBuilder.cpp index ca7dafc814ce..548962bd6a98 100644 --- a/llvm/lib/IR/DIBuilder.cpp +++ b/llvm/lib/IR/DIBuilder.cpp @@ -34,7 +34,20 @@ static cl::opt<bool> DIBuilder::DIBuilder(Module &m, bool AllowUnresolvedNodes, DICompileUnit *CU) : M(m), VMContext(M.getContext()), CUNode(CU), DeclareFn(nullptr), ValueFn(nullptr), LabelFn(nullptr), - AllowUnresolvedNodes(AllowUnresolvedNodes) {} + AllowUnresolvedNodes(AllowUnresolvedNodes) { + if (CUNode) { + if (const auto &ETs = CUNode->getEnumTypes()) + AllEnumTypes.assign(ETs.begin(), ETs.end()); + if (const auto &RTs = CUNode->getRetainedTypes()) + AllRetainTypes.assign(RTs.begin(), RTs.end()); + if (const auto &GVs = CUNode->getGlobalVariables()) + AllGVs.assign(GVs.begin(), GVs.end()); + if (const auto &IMs = CUNode->getImportedEntities()) + AllImportedModules.assign(IMs.begin(), IMs.end()); + if (const auto &MNs = CUNode->getMacros()) + AllMacrosPerParent.insert({nullptr, {MNs.begin(), MNs.end()}}); + } +} void DIBuilder::trackIfUnresolved(MDNode *N) { if (!N) diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp index c42df49d97ea..ad27a6d8c08e 100644 --- a/llvm/lib/IR/Instructions.cpp +++ b/llvm/lib/IR/Instructions.cpp @@ -2474,7 +2474,7 @@ bool ShuffleVectorInst::isReplicationMask(ArrayRef<int> Mask, // Additionally, mask size is a replication factor multiplied by vector size, // which further significantly reduces the search space. - // Before doing that, let's perform basic sanity check first. + // Before doing that, let's perform basic correctness checking first. int Largest = -1; for (int MaskElt : Mask) { if (MaskElt == UndefMaskElem) diff --git a/llvm/lib/IR/IntrinsicInst.cpp b/llvm/lib/IR/IntrinsicInst.cpp index 7552906fd07a..9206cd37a6d1 100644 --- a/llvm/lib/IR/IntrinsicInst.cpp +++ b/llvm/lib/IR/IntrinsicInst.cpp @@ -358,13 +358,13 @@ Value *VPIntrinsic::getMemoryPointerParam() const { Optional<unsigned> VPIntrinsic::getMemoryPointerParamPos(Intrinsic::ID VPID) { switch (VPID) { default: - return None; - -#define HANDLE_VP_IS_MEMOP(VPID, POINTERPOS, DATAPOS) \ - case Intrinsic::VPID: \ - return POINTERPOS; + break; +#define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) case Intrinsic::VPID: +#define VP_PROPERTY_MEMOP(POINTERPOS, ...) return POINTERPOS; +#define END_REGISTER_VP_INTRINSIC(VPID) break; #include "llvm/IR/VPIntrinsics.def" } + return None; } /// \return The data (payload) operand of this store or scatter. @@ -378,52 +378,51 @@ Value *VPIntrinsic::getMemoryDataParam() const { Optional<unsigned> VPIntrinsic::getMemoryDataParamPos(Intrinsic::ID VPID) { switch (VPID) { default: - return None; - -#define HANDLE_VP_IS_MEMOP(VPID, POINTERPOS, DATAPOS) \ - case Intrinsic::VPID: \ - return DATAPOS; + break; +#define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) case Intrinsic::VPID: +#define VP_PROPERTY_MEMOP(POINTERPOS, DATAPOS) return DATAPOS; +#define END_REGISTER_VP_INTRINSIC(VPID) break; #include "llvm/IR/VPIntrinsics.def" } + return None; } bool VPIntrinsic::isVPIntrinsic(Intrinsic::ID ID) { switch (ID) { default: - return false; - + break; #define BEGIN_REGISTER_VP_INTRINSIC(VPID, MASKPOS, VLENPOS) \ case Intrinsic::VPID: \ - break; + return true; #include "llvm/IR/VPIntrinsics.def" } - return true; + return false; } // Equivalent non-predicated opcode Optional<unsigned> VPIntrinsic::getFunctionalOpcodeForVP(Intrinsic::ID ID) { - Optional<unsigned> FunctionalOC; switch (ID) { default: break; #define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) case Intrinsic::VPID: -#define HANDLE_VP_TO_OPC(OPC) FunctionalOC = Instruction::OPC; -#define END_REGISTER_VP_INTRINSIC(...) break; +#define VP_PROPERTY_FUNCTIONAL_OPC(OPC) return Instruction::OPC; +#define END_REGISTER_VP_INTRINSIC(VPID) break; #include "llvm/IR/VPIntrinsics.def" } - - return FunctionalOC; + return None; } Intrinsic::ID VPIntrinsic::getForOpcode(unsigned IROPC) { switch (IROPC) { default: - return Intrinsic::not_intrinsic; + break; -#define HANDLE_VP_TO_OPC(OPC) case Instruction::OPC: +#define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) break; +#define VP_PROPERTY_FUNCTIONAL_OPC(OPC) case Instruction::OPC: #define END_REGISTER_VP_INTRINSIC(VPID) return Intrinsic::VPID; #include "llvm/IR/VPIntrinsics.def" } + return Intrinsic::not_intrinsic; } bool VPIntrinsic::canIgnoreVectorLengthParam() const { @@ -516,13 +515,13 @@ Function *VPIntrinsic::getDeclarationForParams(Module *M, Intrinsic::ID VPID, bool VPReductionIntrinsic::isVPReduction(Intrinsic::ID ID) { switch (ID) { default: - return false; -#define HANDLE_VP_REDUCTION(VPID, STARTPOS, VECTORPOS) \ - case Intrinsic::VPID: \ break; +#define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) case Intrinsic::VPID: +#define VP_PROPERTY_REDUCTION(STARTPOS, ...) return true; +#define END_REGISTER_VP_INTRINSIC(VPID) break; #include "llvm/IR/VPIntrinsics.def" } - return true; + return false; } unsigned VPReductionIntrinsic::getVectorParamPos() const { @@ -535,24 +534,26 @@ unsigned VPReductionIntrinsic::getStartParamPos() const { Optional<unsigned> VPReductionIntrinsic::getVectorParamPos(Intrinsic::ID ID) { switch (ID) { -#define HANDLE_VP_REDUCTION(VPID, STARTPOS, VECTORPOS) \ - case Intrinsic::VPID: \ - return VECTORPOS; +#define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) case Intrinsic::VPID: +#define VP_PROPERTY_REDUCTION(STARTPOS, VECTORPOS) return VECTORPOS; +#define END_REGISTER_VP_INTRINSIC(VPID) break; #include "llvm/IR/VPIntrinsics.def" default: - return None; + break; } + return None; } Optional<unsigned> VPReductionIntrinsic::getStartParamPos(Intrinsic::ID ID) { switch (ID) { -#define HANDLE_VP_REDUCTION(VPID, STARTPOS, VECTORPOS) \ - case Intrinsic::VPID: \ - return STARTPOS; +#define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) case Intrinsic::VPID: +#define VP_PROPERTY_REDUCTION(STARTPOS, VECTORPOS) return STARTPOS; +#define END_REGISTER_VP_INTRINSIC(VPID) break; #include "llvm/IR/VPIntrinsics.def" default: - return None; + break; } + return None; } Instruction::BinaryOps BinaryOpIntrinsic::getBinaryOp() const { diff --git a/llvm/lib/IR/Operator.cpp b/llvm/lib/IR/Operator.cpp index cf309ffd6212..d15fcfbc5b9f 100644 --- a/llvm/lib/IR/Operator.cpp +++ b/llvm/lib/IR/Operator.cpp @@ -226,4 +226,25 @@ bool GEPOperator::collectOffset( } return true; } + +void FastMathFlags::print(raw_ostream &O) const { + if (all()) + O << " fast"; + else { + if (allowReassoc()) + O << " reassoc"; + if (noNaNs()) + O << " nnan"; + if (noInfs()) + O << " ninf"; + if (noSignedZeros()) + O << " nsz"; + if (allowReciprocal()) + O << " arcp"; + if (allowContract()) + O << " contract"; + if (approxFunc()) + O << " afn"; + } +} } // namespace llvm diff --git a/llvm/lib/IR/PassTimingInfo.cpp b/llvm/lib/IR/PassTimingInfo.cpp index d0c1517f480b..a03fafec9fac 100644 --- a/llvm/lib/IR/PassTimingInfo.cpp +++ b/llvm/lib/IR/PassTimingInfo.cpp @@ -187,7 +187,7 @@ Timer &TimePassesHandler::getPassTimer(StringRef PassID) { Timer *T = new Timer(PassID, FullDesc, TG); Timers.emplace_back(T); - assert(Count == Timers.size() && "sanity check"); + assert(Count == Timers.size() && "Timers vector not adjusted correctly."); return *T; } diff --git a/llvm/lib/IR/SafepointIRVerifier.cpp b/llvm/lib/IR/SafepointIRVerifier.cpp index 9be6de693ee3..2117527a64f0 100644 --- a/llvm/lib/IR/SafepointIRVerifier.cpp +++ b/llvm/lib/IR/SafepointIRVerifier.cpp @@ -6,9 +6,9 @@ // //===----------------------------------------------------------------------===// // -// Run a sanity check on the IR to ensure that Safepoints - if they've been -// inserted - were inserted correctly. In particular, look for use of -// non-relocated values after a safepoint. It's primary use is to check the +// Run a basic correctness check on the IR to ensure that Safepoints - if +// they've been inserted - were inserted correctly. In particular, look for use +// of non-relocated values after a safepoint. It's primary use is to check the // correctness of safepoint insertion immediately after insertion, but it can // also be used to verify that later transforms have not found a way to break // safepoint semenatics. diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index dc4370d4b6ed..154b59835b01 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // // This file defines the function verifier interface, that can be used for some -// sanity checking of input to the system. +// basic correctness checking of input to the system. // // Note that this does not provide full `Java style' security and verifications, // instead it just tries to ensure that code is well-formed. @@ -1604,7 +1604,7 @@ Verifier::visitModuleFlag(const MDNode *Op, Assert(ID, "invalid ID operand in module flag (expected metadata string)", Op->getOperand(1)); - // Sanity check the values for behaviors with additional requirements. + // Check the values for behaviors with additional requirements. switch (MFB) { case Module::Error: case Module::Warning: @@ -5269,24 +5269,32 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { Op0ElemTy = cast<VectorType>(Call.getArgOperand(0)->getType())->getElementType(); break; - case Intrinsic::matrix_column_major_load: + case Intrinsic::matrix_column_major_load: { Stride = dyn_cast<ConstantInt>(Call.getArgOperand(1)); NumRows = cast<ConstantInt>(Call.getArgOperand(3)); NumColumns = cast<ConstantInt>(Call.getArgOperand(4)); ResultTy = cast<VectorType>(Call.getType()); - Op0ElemTy = - cast<PointerType>(Call.getArgOperand(0)->getType())->getElementType(); + + PointerType *Op0PtrTy = + cast<PointerType>(Call.getArgOperand(0)->getType()); + if (!Op0PtrTy->isOpaque()) + Op0ElemTy = Op0PtrTy->getElementType(); break; - case Intrinsic::matrix_column_major_store: + } + case Intrinsic::matrix_column_major_store: { Stride = dyn_cast<ConstantInt>(Call.getArgOperand(2)); NumRows = cast<ConstantInt>(Call.getArgOperand(4)); NumColumns = cast<ConstantInt>(Call.getArgOperand(5)); ResultTy = cast<VectorType>(Call.getArgOperand(0)->getType()); Op0ElemTy = cast<VectorType>(Call.getArgOperand(0)->getType())->getElementType(); - Op1ElemTy = - cast<PointerType>(Call.getArgOperand(1)->getType())->getElementType(); + + PointerType *Op1PtrTy = + cast<PointerType>(Call.getArgOperand(1)->getType()); + if (!Op1PtrTy->isOpaque()) + Op1ElemTy = Op1PtrTy->getElementType(); break; + } default: llvm_unreachable("unexpected intrinsic"); } @@ -5295,9 +5303,10 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { ResultTy->getElementType()->isFloatingPointTy(), "Result type must be an integer or floating-point type!", IF); - Assert(ResultTy->getElementType() == Op0ElemTy, - "Vector element type mismatch of the result and first operand " - "vector!", IF); + if (Op0ElemTy) + Assert(ResultTy->getElementType() == Op0ElemTy, + "Vector element type mismatch of the result and first operand " + "vector!", IF); if (Op1ElemTy) Assert(ResultTy->getElementType() == Op1ElemTy, diff --git a/llvm/lib/InterfaceStub/ELFObjHandler.cpp b/llvm/lib/InterfaceStub/ELFObjHandler.cpp index d41c7d3217d7..0d1a864f31ac 100644 --- a/llvm/lib/InterfaceStub/ELFObjHandler.cpp +++ b/llvm/lib/InterfaceStub/ELFObjHandler.cpp @@ -372,7 +372,7 @@ Error appendToError(Error Err, StringRef After) { /// This function populates a DynamicEntries struct using an ELFT::DynRange. /// After populating the struct, the members are validated with -/// some basic sanity checks. +/// some basic correctness checks. /// /// @param Dyn Target DynamicEntries struct to populate. /// @param DynTable Source dynamic table. diff --git a/llvm/lib/MC/MCAsmStreamer.cpp b/llvm/lib/MC/MCAsmStreamer.cpp index 154b2d051f34..2ca921017171 100644 --- a/llvm/lib/MC/MCAsmStreamer.cpp +++ b/llvm/lib/MC/MCAsmStreamer.cpp @@ -1069,16 +1069,14 @@ void MCAsmStreamer::PrintQuotedString(StringRef Data, raw_ostream &OS) const { OS << '"'; if (MAI->hasPairedDoubleQuoteStringConstants()) { - for (unsigned i = 0, e = Data.size(); i != e; ++i) { - unsigned char C = Data[i]; + for (unsigned char C : Data) { if (C == '"') OS << "\"\""; else OS << (char)C; } } else { - for (unsigned i = 0, e = Data.size(); i != e; ++i) { - unsigned char C = Data[i]; + for (unsigned char C : Data) { if (C == '"' || C == '\\') { OS << '\\' << (char)C; continue; diff --git a/llvm/lib/MC/MCELFStreamer.cpp b/llvm/lib/MC/MCELFStreamer.cpp index 1ba999a63113..fbf3c860368a 100644 --- a/llvm/lib/MC/MCELFStreamer.cpp +++ b/llvm/lib/MC/MCELFStreamer.cpp @@ -646,8 +646,6 @@ void MCELFStreamer::emitBundleAlignMode(unsigned AlignPow2) { void MCELFStreamer::emitBundleLock(bool AlignToEnd) { MCSection &Sec = *getCurrentSectionOnly(); - // Sanity checks - // if (!getAssembler().isBundlingEnabled()) report_fatal_error(".bundle_lock forbidden when bundling is disabled"); @@ -667,7 +665,6 @@ void MCELFStreamer::emitBundleLock(bool AlignToEnd) { void MCELFStreamer::emitBundleUnlock() { MCSection &Sec = *getCurrentSectionOnly(); - // Sanity checks if (!getAssembler().isBundlingEnabled()) report_fatal_error(".bundle_unlock forbidden when bundling is disabled"); else if (!isBundleLocked()) diff --git a/llvm/lib/MC/MCParser/ELFAsmParser.cpp b/llvm/lib/MC/MCParser/ELFAsmParser.cpp index ddc41d0a08ab..e95019c12db7 100644 --- a/llvm/lib/MC/MCParser/ELFAsmParser.cpp +++ b/llvm/lib/MC/MCParser/ELFAsmParser.cpp @@ -676,14 +676,14 @@ EndStmt: getContext().getELFSection(SectionName, Type, Flags, Size, GroupName, IsComdat, UniqueID, LinkedToSym); getStreamer().SwitchSection(Section, Subsection); - if (Section->getType() != Type && + // Check that flags are used consistently. However, the GNU assembler permits + // to leave out in subsequent uses of the same sections; for compatibility, + // do likewise. + if (!TypeName.empty() && Section->getType() != Type && !allowSectionTypeMismatch(getContext().getTargetTriple(), SectionName, Type)) Error(loc, "changed section type for " + SectionName + ", expected: 0x" + utohexstr(Section->getType())); - // Check that flags are used consistently. However, the GNU assembler permits - // to leave out in subsequent uses of the same sections; for compatibility, - // do likewise. if ((extraFlags || Size || !TypeName.empty()) && Section->getFlags() != Flags) Error(loc, "changed section flags for " + SectionName + ", expected: 0x" + utohexstr(Section->getFlags())); diff --git a/llvm/lib/MC/WinCOFFObjectWriter.cpp b/llvm/lib/MC/WinCOFFObjectWriter.cpp index 646f416821ae..73c687331d30 100644 --- a/llvm/lib/MC/WinCOFFObjectWriter.cpp +++ b/llvm/lib/MC/WinCOFFObjectWriter.cpp @@ -56,6 +56,8 @@ using llvm::support::endian::write32le; namespace { +constexpr int OffsetLabelIntervalBits = 20; + using name = SmallString<COFF::NameSize>; enum AuxiliaryType { @@ -120,6 +122,8 @@ public: relocations Relocations; COFFSection(StringRef Name) : Name(std::string(Name)) {} + + SmallVector<COFFSymbol *, 1> OffsetSymbols; }; class WinCOFFObjectWriter : public MCObjectWriter { @@ -149,6 +153,7 @@ public: symbol_list WeakDefaults; bool UseBigObj; + bool UseOffsetLabels = false; bool EmitAddrsigSection = false; MCSectionCOFF *AddrsigSection; @@ -174,7 +179,7 @@ public: COFFSymbol *GetOrCreateCOFFSymbol(const MCSymbol *Symbol); COFFSection *createSection(StringRef Name); - void defineSection(MCSectionCOFF const &Sec); + void defineSection(MCSectionCOFF const &Sec, const MCAsmLayout &Layout); COFFSymbol *getLinkedSymbol(const MCSymbol &Symbol); void DefineSymbol(const MCSymbol &Symbol, MCAssembler &Assembler, @@ -244,6 +249,11 @@ WinCOFFObjectWriter::WinCOFFObjectWriter( std::unique_ptr<MCWinCOFFObjectTargetWriter> MOTW, raw_pwrite_stream &OS) : W(OS, support::little), TargetObjectWriter(std::move(MOTW)) { Header.Machine = TargetObjectWriter->getMachine(); + // Some relocations on ARM64 (the 21 bit ADRP relocations) have a slightly + // limited range for the immediate offset (+/- 1 MB); create extra offset + // label symbols with regular intervals to allow referencing a + // non-temporary symbol that is close enough. + UseOffsetLabels = Header.Machine == COFF::IMAGE_FILE_MACHINE_ARM64; } COFFSymbol *WinCOFFObjectWriter::createSymbol(StringRef Name) { @@ -299,7 +309,8 @@ static uint32_t getAlignment(const MCSectionCOFF &Sec) { /// This function takes a section data object from the assembler /// and creates the associated COFF section staging object. -void WinCOFFObjectWriter::defineSection(const MCSectionCOFF &MCSec) { +void WinCOFFObjectWriter::defineSection(const MCSectionCOFF &MCSec, + const MCAsmLayout &Layout) { COFFSection *Section = createSection(MCSec.getName()); COFFSymbol *Symbol = createSymbol(MCSec.getName()); Section->Symbol = Symbol; @@ -329,6 +340,20 @@ void WinCOFFObjectWriter::defineSection(const MCSectionCOFF &MCSec) { // Bind internal COFF section to MC section. Section->MCSection = &MCSec; SectionMap[&MCSec] = Section; + + if (UseOffsetLabels && !MCSec.getFragmentList().empty()) { + const uint32_t Interval = 1 << OffsetLabelIntervalBits; + uint32_t N = 1; + for (uint32_t Off = Interval, E = Layout.getSectionAddressSize(&MCSec); + Off < E; Off += Interval) { + auto Name = ("$L" + MCSec.getName() + "_" + Twine(N++)).str(); + COFFSymbol *Label = createSymbol(Name); + Label->Section = Section; + Label->Data.StorageClass = COFF::IMAGE_SYM_CLASS_LABEL; + Label->Data.Value = Off; + Section->OffsetSymbols.push_back(Label); + } + } } static uint64_t getSymbolValue(const MCSymbol &Symbol, @@ -688,7 +713,7 @@ void WinCOFFObjectWriter::executePostLayoutBinding(MCAssembler &Asm, // "Define" each section & symbol. This creates section & symbol // entries in the staging area. for (const auto &Section : Asm) - defineSection(static_cast<const MCSectionCOFF &>(Section)); + defineSection(static_cast<const MCSectionCOFF &>(Section), Layout); for (const MCSymbol &Symbol : Asm.symbols()) if (!Symbol.isTemporary()) @@ -774,8 +799,23 @@ void WinCOFFObjectWriter::recordRelocation(MCAssembler &Asm, assert( SectionMap.find(TargetSection) != SectionMap.end() && "Section must already have been defined in executePostLayoutBinding!"); - Reloc.Symb = SectionMap[TargetSection]->Symbol; + COFFSection *Section = SectionMap[TargetSection]; + Reloc.Symb = Section->Symbol; FixedValue += Layout.getSymbolOffset(A); + // Technically, we should do the final adjustments of FixedValue (below) + // before picking an offset symbol, otherwise we might choose one which + // is slightly too far away. The relocations where it really matters + // (arm64 adrp relocations) don't get any offset though. + if (UseOffsetLabels && !Section->OffsetSymbols.empty()) { + uint64_t LabelIndex = FixedValue >> OffsetLabelIntervalBits; + if (LabelIndex > 0) { + if (LabelIndex <= Section->OffsetSymbols.size()) + Reloc.Symb = Section->OffsetSymbols[LabelIndex - 1]; + else + Reloc.Symb = Section->OffsetSymbols.back(); + FixedValue -= Reloc.Symb->Data.Value; + } + } } else { assert( SymbolMap.find(&A) != SymbolMap.end() && diff --git a/llvm/lib/MCA/InstrBuilder.cpp b/llvm/lib/MCA/InstrBuilder.cpp index 0ab845a4c28f..d8283f8d2682 100644 --- a/llvm/lib/MCA/InstrBuilder.cpp +++ b/llvm/lib/MCA/InstrBuilder.cpp @@ -612,7 +612,7 @@ InstrBuilder::createInstrDescImpl(const MCInst &MCI) { LLVM_DEBUG(dbgs() << "\t\tMaxLatency=" << ID->MaxLatency << '\n'); LLVM_DEBUG(dbgs() << "\t\tNumMicroOps=" << ID->NumMicroOps << '\n'); - // Sanity check on the instruction descriptor. + // Validation check on the instruction descriptor. if (Error Err = verifyInstrDesc(*ID, MCI)) return std::move(Err); diff --git a/llvm/lib/MCA/Stages/ExecuteStage.cpp b/llvm/lib/MCA/Stages/ExecuteStage.cpp index 6e021d3d9232..2b11f73b19df 100644 --- a/llvm/lib/MCA/Stages/ExecuteStage.cpp +++ b/llvm/lib/MCA/Stages/ExecuteStage.cpp @@ -188,7 +188,7 @@ Error ExecuteStage::execute(InstRef &IR) { #ifndef NDEBUG // Ensure that the HWS has not stored this instruction in its queues. - HWS.sanityCheck(IR); + HWS.instructionCheck(IR); #endif if (IR.getInstruction()->isEliminated()) diff --git a/llvm/lib/Object/ELFObjectFile.cpp b/llvm/lib/Object/ELFObjectFile.cpp index 50035d6c7523..cf1f12d9a9a7 100644 --- a/llvm/lib/Object/ELFObjectFile.cpp +++ b/llvm/lib/Object/ELFObjectFile.cpp @@ -682,7 +682,7 @@ readDynsymVersionsImpl(const ELFFile<ELFT> &EF, std::vector<VersionEntry> Ret; size_t I = 0; - for (auto It = Symbols.begin(), E = Symbols.end(); It != E; ++It) { + for (const ELFSymbolRef &Sym : Symbols) { ++I; Expected<const typename ELFT::Versym *> VerEntryOrErr = EF.template getEntry<typename ELFT::Versym>(*VerSec, I); @@ -691,7 +691,7 @@ readDynsymVersionsImpl(const ELFFile<ELFT> &EF, " from " + describe(EF, *VerSec) + ": " + toString(VerEntryOrErr.takeError())); - Expected<uint32_t> FlagsOrErr = It->getFlags(); + Expected<uint32_t> FlagsOrErr = Sym.getFlags(); if (!FlagsOrErr) return createError("unable to read flags for symbol with index " + Twine(I) + ": " + toString(FlagsOrErr.takeError())); diff --git a/llvm/lib/ObjectYAML/COFFEmitter.cpp b/llvm/lib/ObjectYAML/COFFEmitter.cpp index 5f38ca13cfc2..66ad16db1ba4 100644 --- a/llvm/lib/ObjectYAML/COFFEmitter.cpp +++ b/llvm/lib/ObjectYAML/COFFEmitter.cpp @@ -476,29 +476,25 @@ static bool writeCOFF(COFFParser &CP, raw_ostream &OS) { assert(OS.tell() == CP.SectionTableStart); // Output section table. - for (std::vector<COFFYAML::Section>::iterator i = CP.Obj.Sections.begin(), - e = CP.Obj.Sections.end(); - i != e; ++i) { - OS.write(i->Header.Name, COFF::NameSize); - OS << binary_le(i->Header.VirtualSize) - << binary_le(i->Header.VirtualAddress) - << binary_le(i->Header.SizeOfRawData) - << binary_le(i->Header.PointerToRawData) - << binary_le(i->Header.PointerToRelocations) - << binary_le(i->Header.PointerToLineNumbers) - << binary_le(i->Header.NumberOfRelocations) - << binary_le(i->Header.NumberOfLineNumbers) - << binary_le(i->Header.Characteristics); + for (const COFFYAML::Section &S : CP.Obj.Sections) { + OS.write(S.Header.Name, COFF::NameSize); + OS << binary_le(S.Header.VirtualSize) + << binary_le(S.Header.VirtualAddress) + << binary_le(S.Header.SizeOfRawData) + << binary_le(S.Header.PointerToRawData) + << binary_le(S.Header.PointerToRelocations) + << binary_le(S.Header.PointerToLineNumbers) + << binary_le(S.Header.NumberOfRelocations) + << binary_le(S.Header.NumberOfLineNumbers) + << binary_le(S.Header.Characteristics); } assert(OS.tell() == CP.SectionTableStart + CP.SectionTableSize); unsigned CurSymbol = 0; StringMap<unsigned> SymbolTableIndexMap; - for (std::vector<COFFYAML::Symbol>::iterator I = CP.Obj.Symbols.begin(), - E = CP.Obj.Symbols.end(); - I != E; ++I) { - SymbolTableIndexMap[I->Name] = CurSymbol; - CurSymbol += 1 + I->Header.NumberOfAuxSymbols; + for (const COFFYAML::Symbol &Sym : CP.Obj.Symbols) { + SymbolTableIndexMap[Sym.Name] = CurSymbol; + CurSymbol += 1 + Sym.Header.NumberOfAuxSymbols; } // Output section data. diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp index fdf9aeae1622..e0dde4433d24 100644 --- a/llvm/lib/ObjectYAML/ELFYAML.cpp +++ b/llvm/lib/ObjectYAML/ELFYAML.cpp @@ -155,6 +155,10 @@ void ScalarEnumerationTraits<ELFYAML::ELF_NT>::enumeration( ECase(NT_FREEBSD_PROCSTAT_OSREL); ECase(NT_FREEBSD_PROCSTAT_PSSTRINGS); ECase(NT_FREEBSD_PROCSTAT_AUXV); + // NetBSD core note types. + ECase(NT_NETBSDCORE_PROCINFO); + ECase(NT_NETBSDCORE_AUXV); + ECase(NT_NETBSDCORE_LWPSTATUS); // OpenBSD core note types. ECase(NT_OPENBSD_PROCINFO); ECase(NT_OPENBSD_AUXV); diff --git a/llvm/lib/ObjectYAML/MachOEmitter.cpp b/llvm/lib/ObjectYAML/MachOEmitter.cpp index c653c29ec9a7..e5ffb12df434 100644 --- a/llvm/lib/ObjectYAML/MachOEmitter.cpp +++ b/llvm/lib/ObjectYAML/MachOEmitter.cpp @@ -54,6 +54,7 @@ private: void writeNameList(raw_ostream &OS); void writeStringTable(raw_ostream &OS); void writeExportTrie(raw_ostream &OS); + void writeDynamicSymbolTable(raw_ostream &OS); void dumpExportEntry(raw_ostream &OS, MachOYAML::ExportEntry &Entry); void ZeroToOffset(raw_ostream &OS, size_t offset); @@ -482,6 +483,7 @@ void MachOWriter::writeLinkEditData(raw_ostream &OS) { MachO::dyld_info_command *DyldInfoOnlyCmd = 0; MachO::symtab_command *SymtabCmd = 0; + MachO::dysymtab_command *DSymtabCmd = 0; for (auto &LC : Obj.LoadCommands) { switch (LC.Data.load_command_data.cmd) { case MachO::LC_SYMTAB: @@ -504,6 +506,11 @@ void MachOWriter::writeLinkEditData(raw_ostream &OS) { WriteQueue.push_back(std::make_pair(DyldInfoOnlyCmd->export_off, &MachOWriter::writeExportTrie)); break; + case MachO::LC_DYSYMTAB: + DSymtabCmd = &LC.Data.dysymtab_command_data; + WriteQueue.push_back(std::make_pair( + DSymtabCmd->indirectsymoff, &MachOWriter::writeDynamicSymbolTable)); + break; } } @@ -556,6 +563,12 @@ void MachOWriter::writeStringTable(raw_ostream &OS) { } } +void MachOWriter::writeDynamicSymbolTable(raw_ostream &OS) { + for (auto Data : Obj.LinkEdit.IndirectSymbols) + OS.write(reinterpret_cast<const char *>(&Data), + sizeof(yaml::Hex32::BaseType)); +} + class UniversalWriter { public: UniversalWriter(yaml::YamlObjectFile &ObjectFile) diff --git a/llvm/lib/ObjectYAML/MachOYAML.cpp b/llvm/lib/ObjectYAML/MachOYAML.cpp index c9562bd72258..f32009458110 100644 --- a/llvm/lib/ObjectYAML/MachOYAML.cpp +++ b/llvm/lib/ObjectYAML/MachOYAML.cpp @@ -164,6 +164,7 @@ void MappingTraits<MachOYAML::LinkEditData>::mapping( IO.mapOptional("ExportTrie", LinkEditData.ExportTrie); IO.mapOptional("NameList", LinkEditData.NameList); IO.mapOptional("StringTable", LinkEditData.StringTable); + IO.mapOptional("IndirectSymbols", LinkEditData.IndirectSymbols); } void MappingTraits<MachOYAML::RebaseOpcode>::mapping( diff --git a/llvm/lib/Option/OptTable.cpp b/llvm/lib/Option/OptTable.cpp index 37c2fcbab181..19e05b9272bb 100644 --- a/llvm/lib/Option/OptTable.cpp +++ b/llvm/lib/Option/OptTable.cpp @@ -150,10 +150,9 @@ OptTable::OptTable(ArrayRef<Info> OptionInfos, bool IgnoreCase) for (StringSet<>::const_iterator I = PrefixesUnion.begin(), E = PrefixesUnion.end(); I != E; ++I) { StringRef Prefix = I->getKey(); - for (StringRef::const_iterator C = Prefix.begin(), CE = Prefix.end(); - C != CE; ++C) - if (!is_contained(PrefixChars, *C)) - PrefixChars.push_back(*C); + for (char C : Prefix) + if (!is_contained(PrefixChars, C)) + PrefixChars.push_back(C); } } diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index ac5dfdbdd540..de1b0ace7876 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -1765,6 +1765,8 @@ ModulePassManager PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level, if (LTOPreLink) addRequiredLTOPreLinkPasses(MPM); + MPM.addPass(createModuleToFunctionPassAdaptor(AnnotationRemarksPass())); + return MPM; } diff --git a/llvm/lib/Passes/StandardInstrumentations.cpp b/llvm/lib/Passes/StandardInstrumentations.cpp index 8e6be6730ea4..27a6c519ff82 100644 --- a/llvm/lib/Passes/StandardInstrumentations.cpp +++ b/llvm/lib/Passes/StandardInstrumentations.cpp @@ -225,8 +225,8 @@ std::string doSystemDiff(StringRef Before, StringRef After, return "Unable to read result."; // Clean up. - for (unsigned I = 0; I < NumFiles; ++I) { - std::error_code EC = sys::fs::remove(FileName[I]); + for (const std::string &I : FileName) { + std::error_code EC = sys::fs::remove(I); if (EC) return "Unable to remove temporary file."; } diff --git a/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp b/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp index 94bd4807041d..c6691e321b3c 100644 --- a/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp +++ b/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp @@ -83,7 +83,6 @@ Error RawCoverageReader::readIntMax(uint64_t &Result, uint64_t MaxPlus1) { Error RawCoverageReader::readSize(uint64_t &Result) { if (auto Err = readULEB128(Result)) return Err; - // Sanity check the number. if (Result > Data.size()) return make_error<CoverageMapError>(coveragemap_error::malformed); return Error::success(); diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp index 1168ad27fe52..ab3487ecffe8 100644 --- a/llvm/lib/ProfileData/InstrProf.cpp +++ b/llvm/lib/ProfileData/InstrProf.cpp @@ -657,19 +657,18 @@ void InstrProfValueSiteRecord::merge(InstrProfValueSiteRecord &Input, Input.sortByTargetValues(); auto I = ValueData.begin(); auto IE = ValueData.end(); - for (auto J = Input.ValueData.begin(), JE = Input.ValueData.end(); J != JE; - ++J) { - while (I != IE && I->Value < J->Value) + for (const InstrProfValueData &J : Input.ValueData) { + while (I != IE && I->Value < J.Value) ++I; - if (I != IE && I->Value == J->Value) { + if (I != IE && I->Value == J.Value) { bool Overflowed; - I->Count = SaturatingMultiplyAdd(J->Count, Weight, I->Count, &Overflowed); + I->Count = SaturatingMultiplyAdd(J.Count, Weight, I->Count, &Overflowed); if (Overflowed) Warn(instrprof_error::counter_overflow); ++I; continue; } - ValueData.insert(I, *J); + ValueData.insert(I, J); } } diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp index b4e8025dbef9..885c1fe49240 100644 --- a/llvm/lib/ProfileData/InstrProfReader.cpp +++ b/llvm/lib/ProfileData/InstrProfReader.cpp @@ -62,7 +62,6 @@ InstrProfReader::create(const Twine &Path) { Expected<std::unique_ptr<InstrProfReader>> InstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer) { - // Sanity check the buffer. if (uint64_t(Buffer->getBufferSize()) > std::numeric_limits<uint64_t>::max()) return make_error<InstrProfError>(instrprof_error::too_large); @@ -113,7 +112,6 @@ IndexedInstrProfReader::create(const Twine &Path, const Twine &RemappingPath) { Expected<std::unique_ptr<IndexedInstrProfReader>> IndexedInstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer, std::unique_ptr<MemoryBuffer> RemappingBuffer) { - // Sanity check the buffer. if (uint64_t(Buffer->getBufferSize()) > std::numeric_limits<uint64_t>::max()) return make_error<InstrProfError>(instrprof_error::too_large); diff --git a/llvm/lib/ProfileData/RawMemProfReader.cpp b/llvm/lib/ProfileData/RawMemProfReader.cpp new file mode 100644 index 000000000000..f8d13c74fac3 --- /dev/null +++ b/llvm/lib/ProfileData/RawMemProfReader.cpp @@ -0,0 +1,121 @@ +//===- RawMemProfReader.cpp - Instrumented memory profiling reader --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains support for reading MemProf profiling data. +// +//===----------------------------------------------------------------------===// + +#include <cstdint> +#include <type_traits> + +#include "llvm/ProfileData/InstrProf.h" +#include "llvm/ProfileData/MemProfData.inc" +#include "llvm/ProfileData/RawMemProfReader.h" + +namespace llvm { +namespace memprof { +namespace { + +struct Summary { + uint64_t Version; + uint64_t TotalSizeBytes; + uint64_t NumSegments; + uint64_t NumMIBInfo; + uint64_t NumStackOffsets; +}; + +template <class T = uint64_t> inline T alignedRead(const char *Ptr) { + static_assert(std::is_pod<T>::value, "Not a pod type."); + assert(reinterpret_cast<size_t>(Ptr) % sizeof(T) == 0 && "Unaligned Read"); + return *reinterpret_cast<const T *>(Ptr); +} + +Summary computeSummary(const char *Start) { + auto *H = reinterpret_cast<const Header *>(Start); + + // Check alignment while reading the number of items in each section. + return Summary{ + H->Version, + H->TotalSize, + alignedRead(Start + H->SegmentOffset), + alignedRead(Start + H->MIBOffset), + alignedRead(Start + H->StackOffset), + }; +} + +} // namespace + +Expected<std::unique_ptr<RawMemProfReader>> +RawMemProfReader::create(const Twine &Path) { + auto BufferOr = MemoryBuffer::getFileOrSTDIN(Path, /*IsText=*/true); + if (std::error_code EC = BufferOr.getError()) + return errorCodeToError(EC); + + std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release()); + + if (Buffer->getBufferSize() == 0) + return make_error<InstrProfError>(instrprof_error::empty_raw_profile); + + if (!RawMemProfReader::hasFormat(*Buffer)) + return make_error<InstrProfError>(instrprof_error::bad_magic); + + if (Buffer->getBufferSize() < sizeof(Header)) { + return make_error<InstrProfError>(instrprof_error::truncated); + } + + // The size of the buffer can be > header total size since we allow repeated + // serialization of memprof profiles to the same file. + uint64_t TotalSize = 0; + const char *Next = Buffer->getBufferStart(); + while (Next < Buffer->getBufferEnd()) { + auto *H = reinterpret_cast<const Header *>(Next); + if (H->Version != MEMPROF_RAW_VERSION) { + return make_error<InstrProfError>(instrprof_error::unsupported_version); + } + + TotalSize += H->TotalSize; + Next += H->TotalSize; + } + + if (Buffer->getBufferSize() != TotalSize) { + return make_error<InstrProfError>(instrprof_error::malformed); + } + + return std::make_unique<RawMemProfReader>(std::move(Buffer)); +} + +bool RawMemProfReader::hasFormat(const MemoryBuffer &Buffer) { + if (Buffer.getBufferSize() < sizeof(uint64_t)) + return false; + // Aligned read to sanity check that the buffer was allocated with at least 8b + // alignment. + const uint64_t Magic = alignedRead(Buffer.getBufferStart()); + return Magic == MEMPROF_RAW_MAGIC_64; +} + +void RawMemProfReader::printSummaries(raw_ostream &OS) const { + int Count = 0; + const char *Next = DataBuffer->getBufferStart(); + while (Next < DataBuffer->getBufferEnd()) { + auto Summary = computeSummary(Next); + OS << "MemProf Profile " << ++Count << "\n"; + OS << " Version: " << Summary.Version << "\n"; + OS << " TotalSizeBytes: " << Summary.TotalSizeBytes << "\n"; + OS << " NumSegments: " << Summary.NumSegments << "\n"; + OS << " NumMIBInfo: " << Summary.NumMIBInfo << "\n"; + OS << " NumStackOffsets: " << Summary.NumStackOffsets << "\n"; + // TODO: Print the build ids once we can record them using the + // sanitizer_procmaps library for linux. + + auto *H = reinterpret_cast<const Header *>(Next); + Next += H->TotalSize; + } +} + +} // namespace memprof +} // namespace llvm diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp index c99a19020511..eefb7c2ba627 100644 --- a/llvm/lib/ProfileData/SampleProfReader.cpp +++ b/llvm/lib/ProfileData/SampleProfReader.cpp @@ -1709,7 +1709,7 @@ setupMemoryBuffer(const Twine &Filename) { return EC; auto Buffer = std::move(BufferOrErr.get()); - // Sanity check the file. + // Check the file. if (uint64_t(Buffer->getBufferSize()) > std::numeric_limits<uint32_t>::max()) return sampleprof_error::too_large; diff --git a/llvm/lib/Support/AArch64TargetParser.cpp b/llvm/lib/Support/AArch64TargetParser.cpp index b3136a91e7f5..a3e41ccd199c 100644 --- a/llvm/lib/Support/AArch64TargetParser.cpp +++ b/llvm/lib/Support/AArch64TargetParser.cpp @@ -240,52 +240,4 @@ AArch64::ArchKind AArch64::parseCPUArch(StringRef CPU) { return C.ArchID; } return ArchKind::INVALID; -} - -// Parse a branch protection specification, which has the form -// standard | none | [bti,pac-ret[+b-key,+leaf]*] -// Returns true on success, with individual elements of the specification -// returned in `PBP`. Returns false in error, with `Err` containing -// an erroneous part of the spec. -bool AArch64::parseBranchProtection(StringRef Spec, ParsedBranchProtection &PBP, - StringRef &Err) { - PBP = {"none", "a_key", false}; - if (Spec == "none") - return true; // defaults are ok - - if (Spec == "standard") { - PBP.Scope = "non-leaf"; - PBP.BranchTargetEnforcement = true; - return true; - } - - SmallVector<StringRef, 4> Opts; - Spec.split(Opts, "+"); - for (int I = 0, E = Opts.size(); I != E; ++I) { - StringRef Opt = Opts[I].trim(); - if (Opt == "bti") { - PBP.BranchTargetEnforcement = true; - continue; - } - if (Opt == "pac-ret") { - PBP.Scope = "non-leaf"; - for (; I + 1 != E; ++I) { - StringRef PACOpt = Opts[I + 1].trim(); - if (PACOpt == "leaf") - PBP.Scope = "all"; - else if (PACOpt == "b-key") - PBP.Key = "b_key"; - else - break; - } - continue; - } - if (Opt == "") - Err = "<empty>"; - else - Err = Opt; - return false; - } - - return true; -} +}
\ No newline at end of file diff --git a/llvm/lib/Support/ARMAttributeParser.cpp b/llvm/lib/Support/ARMAttributeParser.cpp index 459691923af8..241cfb1eedbe 100644 --- a/llvm/lib/Support/ARMAttributeParser.cpp +++ b/llvm/lib/Support/ARMAttributeParser.cpp @@ -59,6 +59,10 @@ const ARMAttributeParser::DisplayHandler ARMAttributeParser::displayRoutines[] = ATTRIBUTE_HANDLER(DSP_extension), ATTRIBUTE_HANDLER(T2EE_use), ATTRIBUTE_HANDLER(Virtualization_use), + ATTRIBUTE_HANDLER(PAC_extension), + ATTRIBUTE_HANDLER(BTI_extension), + ATTRIBUTE_HANDLER(PACRET_use), + ATTRIBUTE_HANDLER(BTI_use), ATTRIBUTE_HANDLER(nodefaults), }; @@ -350,6 +354,28 @@ Error ARMAttributeParser::Virtualization_use(AttrType tag) { return parseStringAttribute("Virtualization_use", tag, makeArrayRef(strings)); } +Error ARMAttributeParser::PAC_extension(ARMBuildAttrs::AttrType tag) { + static const char *strings[] = {"Not Permitted", "Permitted in NOP space", + "Permitted"}; + return parseStringAttribute("PAC_extension", tag, makeArrayRef(strings)); +} + +Error ARMAttributeParser::BTI_extension(ARMBuildAttrs::AttrType tag) { + static const char *strings[] = {"Not Permitted", "Permitted in NOP space", + "Permitted"}; + return parseStringAttribute("BTI_extension", tag, makeArrayRef(strings)); +} + +Error ARMAttributeParser::PACRET_use(ARMBuildAttrs::AttrType tag) { + static const char *strings[] = {"Not Used", "Used"}; + return parseStringAttribute("PACRET_use", tag, makeArrayRef(strings)); +} + +Error ARMAttributeParser::BTI_use(ARMBuildAttrs::AttrType tag) { + static const char *strings[] = {"Not Used", "Used"}; + return parseStringAttribute("BTI_use", tag, makeArrayRef(strings)); +} + Error ARMAttributeParser::nodefaults(AttrType tag) { uint64_t value = de.getULEB128(cursor); printAttribute(tag, value, "Unspecified Tags UNDEFINED"); diff --git a/llvm/lib/Support/ARMBuildAttrs.cpp b/llvm/lib/Support/ARMBuildAttrs.cpp index f20521f2a2d4..815cfc62a4b0 100644 --- a/llvm/lib/Support/ARMBuildAttrs.cpp +++ b/llvm/lib/Support/ARMBuildAttrs.cpp @@ -50,6 +50,10 @@ static const TagNameItem tagData[] = { {ARMBuildAttrs::MPextension_use, "Tag_MPextension_use"}, {ARMBuildAttrs::DIV_use, "Tag_DIV_use"}, {ARMBuildAttrs::DSP_extension, "Tag_DSP_extension"}, + {ARMBuildAttrs::PAC_extension, "Tag_PAC_extension"}, + {ARMBuildAttrs::BTI_extension, "Tag_BTI_extension"}, + {ARMBuildAttrs::BTI_use, "Tag_BTI_use"}, + {ARMBuildAttrs::PACRET_use, "Tag_PACRET_use"}, {ARMBuildAttrs::nodefaults, "Tag_nodefaults"}, {ARMBuildAttrs::also_compatible_with, "Tag_also_compatible_with"}, {ARMBuildAttrs::T2EE_use, "Tag_T2EE_use"}, diff --git a/llvm/lib/Support/CommandLine.cpp b/llvm/lib/Support/CommandLine.cpp index e64934aa90cc..5b7004c86f5a 100644 --- a/llvm/lib/Support/CommandLine.cpp +++ b/llvm/lib/Support/CommandLine.cpp @@ -2656,10 +2656,13 @@ cl::getRegisteredSubcommands() { void cl::HideUnrelatedOptions(cl::OptionCategory &Category, SubCommand &Sub) { initCommonOptions(); for (auto &I : Sub.OptionsMap) { + bool Unrelated = true; for (auto &Cat : I.second->Categories) { - if (Cat != &Category && Cat != &CommonOptions->GenericCategory) - I.second->setHiddenFlag(cl::ReallyHidden); + if (Cat == &Category || Cat == &CommonOptions->GenericCategory) + Unrelated = false; } + if (Unrelated) + I.second->setHiddenFlag(cl::ReallyHidden); } } @@ -2667,11 +2670,14 @@ void cl::HideUnrelatedOptions(ArrayRef<const cl::OptionCategory *> Categories, SubCommand &Sub) { initCommonOptions(); for (auto &I : Sub.OptionsMap) { + bool Unrelated = true; for (auto &Cat : I.second->Categories) { - if (!is_contained(Categories, Cat) && - Cat != &CommonOptions->GenericCategory) - I.second->setHiddenFlag(cl::ReallyHidden); + if (is_contained(Categories, Cat) || + Cat == &CommonOptions->GenericCategory) + Unrelated = false; } + if (Unrelated) + I.second->setHiddenFlag(cl::ReallyHidden); } } diff --git a/llvm/lib/Support/HTTPClient.cpp b/llvm/lib/Support/HTTPClient.cpp new file mode 100644 index 000000000000..68ba56d1fe50 --- /dev/null +++ b/llvm/lib/Support/HTTPClient.cpp @@ -0,0 +1,97 @@ +//===-- llvm/Support/HTTPClient.cpp - HTTP client library -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// +/// This file defines the methods of the HTTPRequest, HTTPClient, and +/// BufferedHTTPResponseHandler classes. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/HTTPClient.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/MemoryBuffer.h" + +using namespace llvm; + +HTTPRequest::HTTPRequest(StringRef Url) { this->Url = Url.str(); } + +bool operator==(const HTTPRequest &A, const HTTPRequest &B) { + return A.Url == B.Url && A.Method == B.Method && + A.FollowRedirects == B.FollowRedirects; +} + +HTTPResponseHandler::~HTTPResponseHandler() = default; + +static inline bool parseContentLengthHeader(StringRef LineRef, + size_t &ContentLength) { + // Content-Length is a mandatory header, and the only one we handle. + return LineRef.consume_front("Content-Length: ") && + to_integer(LineRef.trim(), ContentLength, 10); +} + +Error BufferedHTTPResponseHandler::handleHeaderLine(StringRef HeaderLine) { + if (ResponseBuffer.Body) + return Error::success(); + + size_t ContentLength; + if (parseContentLengthHeader(HeaderLine, ContentLength)) + ResponseBuffer.Body = + WritableMemoryBuffer::getNewUninitMemBuffer(ContentLength); + + return Error::success(); +} + +Error BufferedHTTPResponseHandler::handleBodyChunk(StringRef BodyChunk) { + if (!ResponseBuffer.Body) + return createStringError(errc::io_error, + "Unallocated response buffer. HTTP Body data " + "received before Content-Length header."); + if (Offset + BodyChunk.size() > ResponseBuffer.Body->getBufferSize()) + return createStringError(errc::io_error, + "Content size exceeds buffer size."); + memcpy(ResponseBuffer.Body->getBufferStart() + Offset, BodyChunk.data(), + BodyChunk.size()); + Offset += BodyChunk.size(); + return Error::success(); +} + +Error BufferedHTTPResponseHandler::handleStatusCode(unsigned Code) { + ResponseBuffer.Code = Code; + return Error::success(); +} + +Expected<HTTPResponseBuffer> HTTPClient::perform(const HTTPRequest &Request) { + BufferedHTTPResponseHandler Handler; + if (Error Err = perform(Request, Handler)) + return std::move(Err); + return std::move(Handler.ResponseBuffer); +} + +Expected<HTTPResponseBuffer> HTTPClient::get(StringRef Url) { + HTTPRequest Request(Url); + return perform(Request); +} + +HTTPClient::HTTPClient() = default; + +HTTPClient::~HTTPClient() = default; + +bool HTTPClient::isAvailable() { return false; } + +void HTTPClient::cleanup() {} + +void HTTPClient::setTimeout(std::chrono::milliseconds Timeout) {} + +Error HTTPClient::perform(const HTTPRequest &Request, + HTTPResponseHandler &Handler) { + llvm_unreachable("No HTTP Client implementation available."); +} diff --git a/llvm/lib/Support/KnownBits.cpp b/llvm/lib/Support/KnownBits.cpp index 90483817c302..554e3248524c 100644 --- a/llvm/lib/Support/KnownBits.cpp +++ b/llvm/lib/Support/KnownBits.cpp @@ -421,11 +421,10 @@ KnownBits KnownBits::mul(const KnownBits &LHS, const KnownBits &RHS, "Self multiplication knownbits mismatch"); // Compute a conservative estimate for high known-0 bits. - unsigned LeadZ = - std::max(LHS.countMinLeadingZeros() + RHS.countMinLeadingZeros(), - BitWidth) - - BitWidth; - LeadZ = std::min(LeadZ, BitWidth); + unsigned LHSLeadZ = LHS.countMinLeadingZeros(); + unsigned RHSLeadZ = RHS.countMinLeadingZeros(); + unsigned LeadZ = std::max(LHSLeadZ + RHSLeadZ, BitWidth) - BitWidth; + assert(LeadZ <= BitWidth && "More zeros than bits?"); // The result of the bottom bits of an integer multiply can be // inferred by looking at the bottom bits of both operands and diff --git a/llvm/lib/Support/Regex.cpp b/llvm/lib/Support/Regex.cpp index 0d5cc1c00db1..7a804a1a2297 100644 --- a/llvm/lib/Support/Regex.cpp +++ b/llvm/lib/Support/Regex.cpp @@ -218,10 +218,10 @@ bool Regex::isLiteralERE(StringRef Str) { std::string Regex::escape(StringRef String) { std::string RegexStr; - for (unsigned i = 0, e = String.size(); i != e; ++i) { - if (strchr(RegexMetachars, String[i])) + for (char C : String) { + if (strchr(RegexMetachars, C)) RegexStr += '\\'; - RegexStr += String[i]; + RegexStr += C; } return RegexStr; diff --git a/llvm/lib/Support/StringExtras.cpp b/llvm/lib/Support/StringExtras.cpp index 8abf9f7ce0f1..5683d7005584 100644 --- a/llvm/lib/Support/StringExtras.cpp +++ b/llvm/lib/Support/StringExtras.cpp @@ -60,8 +60,7 @@ void llvm::SplitString(StringRef Source, } void llvm::printEscapedString(StringRef Name, raw_ostream &Out) { - for (unsigned i = 0, e = Name.size(); i != e; ++i) { - unsigned char C = Name[i]; + for (unsigned char C : Name) { if (C == '\\') Out << '\\' << C; else if (isPrint(C) && C != '"') diff --git a/llvm/lib/Support/StringRef.cpp b/llvm/lib/Support/StringRef.cpp index c532a1abe906..652303fdb6a0 100644 --- a/llvm/lib/Support/StringRef.cpp +++ b/llvm/lib/Support/StringRef.cpp @@ -227,8 +227,8 @@ size_t StringRef::rfind_insensitive(StringRef Str) const { StringRef::size_type StringRef::find_first_of(StringRef Chars, size_t From) const { std::bitset<1 << CHAR_BIT> CharBits; - for (size_type i = 0; i != Chars.size(); ++i) - CharBits.set((unsigned char)Chars[i]); + for (char C : Chars) + CharBits.set((unsigned char)C); for (size_type i = std::min(From, Length), e = Length; i != e; ++i) if (CharBits.test((unsigned char)Data[i])) @@ -252,8 +252,8 @@ StringRef::size_type StringRef::find_first_not_of(char C, size_t From) const { StringRef::size_type StringRef::find_first_not_of(StringRef Chars, size_t From) const { std::bitset<1 << CHAR_BIT> CharBits; - for (size_type i = 0; i != Chars.size(); ++i) - CharBits.set((unsigned char)Chars[i]); + for (char C : Chars) + CharBits.set((unsigned char)C); for (size_type i = std::min(From, Length), e = Length; i != e; ++i) if (!CharBits.test((unsigned char)Data[i])) @@ -268,8 +268,8 @@ StringRef::size_type StringRef::find_first_not_of(StringRef Chars, StringRef::size_type StringRef::find_last_of(StringRef Chars, size_t From) const { std::bitset<1 << CHAR_BIT> CharBits; - for (size_type i = 0; i != Chars.size(); ++i) - CharBits.set((unsigned char)Chars[i]); + for (char C : Chars) + CharBits.set((unsigned char)C); for (size_type i = std::min(From, Length) - 1, e = -1; i != e; --i) if (CharBits.test((unsigned char)Data[i])) @@ -293,8 +293,8 @@ StringRef::size_type StringRef::find_last_not_of(char C, size_t From) const { StringRef::size_type StringRef::find_last_not_of(StringRef Chars, size_t From) const { std::bitset<1 << CHAR_BIT> CharBits; - for (size_type i = 0, e = Chars.size(); i != e; ++i) - CharBits.set((unsigned char)Chars[i]); + for (char C : Chars) + CharBits.set((unsigned char)C); for (size_type i = std::min(From, Length) - 1, e = -1; i != e; --i) if (!CharBits.test((unsigned char)Data[i])) diff --git a/llvm/lib/Support/TargetParser.cpp b/llvm/lib/Support/TargetParser.cpp index 1dadce4b9040..4acc23dd455b 100644 --- a/llvm/lib/Support/TargetParser.cpp +++ b/llvm/lib/Support/TargetParser.cpp @@ -333,3 +333,51 @@ bool getCPUFeaturesExceptStdExt(CPUKind Kind, } // namespace RISCV } // namespace llvm + +// Parse a branch protection specification, which has the form +// standard | none | [bti,pac-ret[+b-key,+leaf]*] +// Returns true on success, with individual elements of the specification +// returned in `PBP`. Returns false in error, with `Err` containing +// an erroneous part of the spec. +bool ARM::parseBranchProtection(StringRef Spec, ParsedBranchProtection &PBP, + StringRef &Err) { + PBP = {"none", "a_key", false}; + if (Spec == "none") + return true; // defaults are ok + + if (Spec == "standard") { + PBP.Scope = "non-leaf"; + PBP.BranchTargetEnforcement = true; + return true; + } + + SmallVector<StringRef, 4> Opts; + Spec.split(Opts, "+"); + for (int I = 0, E = Opts.size(); I != E; ++I) { + StringRef Opt = Opts[I].trim(); + if (Opt == "bti") { + PBP.BranchTargetEnforcement = true; + continue; + } + if (Opt == "pac-ret") { + PBP.Scope = "non-leaf"; + for (; I + 1 != E; ++I) { + StringRef PACOpt = Opts[I + 1].trim(); + if (PACOpt == "leaf") + PBP.Scope = "all"; + else if (PACOpt == "b-key") + PBP.Key = "b_key"; + else + break; + } + continue; + } + if (Opt == "") + Err = "<empty>"; + else + Err = Opt; + return false; + } + + return true; +} diff --git a/llvm/lib/Support/ThreadPool.cpp b/llvm/lib/Support/ThreadPool.cpp index 81926d8071b2..c11e16d3cf98 100644 --- a/llvm/lib/Support/ThreadPool.cpp +++ b/llvm/lib/Support/ThreadPool.cpp @@ -29,7 +29,7 @@ ThreadPool::ThreadPool(ThreadPoolStrategy S) Threads.emplace_back([S, ThreadID, this] { S.apply_thread_strategy(ThreadID); while (true) { - PackagedTaskTy Task; + std::function<void()> Task; { std::unique_lock<std::mutex> LockGuard(QueueLock); // Wait for tasks to be pushed in the queue @@ -80,23 +80,6 @@ bool ThreadPool::isWorkerThread() const { return false; } -std::shared_future<void> ThreadPool::asyncImpl(TaskTy Task) { - /// Wrap the Task in a packaged_task to return a future object. - PackagedTaskTy PackagedTask(std::move(Task)); - auto Future = PackagedTask.get_future(); - { - // Lock the queue and push the new task - std::unique_lock<std::mutex> LockGuard(QueueLock); - - // Don't allow enqueueing after disabling the pool - assert(EnableFlag && "Queuing a thread during ThreadPool destruction"); - - Tasks.push(std::move(PackagedTask)); - } - QueueCondition.notify_one(); - return Future.share(); -} - // The destructor joins all threads, waiting for completion. ThreadPool::~ThreadPool() { { @@ -128,16 +111,6 @@ void ThreadPool::wait() { } } -std::shared_future<void> ThreadPool::asyncImpl(TaskTy Task) { - // Get a Future with launch::deferred execution using std::async - auto Future = std::async(std::launch::deferred, std::move(Task)).share(); - // Wrap the future so that both ThreadPool::wait() can operate and the - // returned future can be sync'ed on. - PackagedTaskTy PackagedTask([Future]() { Future.get(); }); - Tasks.push(std::move(PackagedTask)); - return Future; -} - ThreadPool::~ThreadPool() { wait(); } #endif diff --git a/llvm/lib/TableGen/TGLexer.cpp b/llvm/lib/TableGen/TGLexer.cpp index 2acac63ce843..25079fe33edb 100644 --- a/llvm/lib/TableGen/TGLexer.cpp +++ b/llvm/lib/TableGen/TGLexer.cpp @@ -1017,12 +1017,10 @@ void TGLexer::prepSkipToLineEnd() { } bool TGLexer::prepIsProcessingEnabled() { - for (auto I = PrepIncludeStack.back()->rbegin(), - E = PrepIncludeStack.back()->rend(); - I != E; ++I) { - if (!I->IsDefined) + for (const PreprocessorControlDesc &I : + llvm::reverse(*PrepIncludeStack.back())) + if (!I.IsDefined) return false; - } return true; } diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp index 9f527a17d390..aeebb49675b2 100644 --- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -818,18 +818,9 @@ void AArch64AsmPrinter::emitJumpTableInfo() { const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); if (JT.empty()) return; - const Function &F = MF->getFunction(); const TargetLoweringObjectFile &TLOF = getObjFileLowering(); - bool JTInDiffSection = - !STI->isTargetCOFF() || - !TLOF.shouldPutJumpTableInFunctionSection( - MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32, - F); - if (JTInDiffSection) { - // Drop it in the readonly section. - MCSection *ReadOnlySec = TLOF.getSectionForJumpTable(F, TM); - OutStreamer->SwitchSection(ReadOnlySec); - } + MCSection *ReadOnlySec = TLOF.getSectionForJumpTable(MF->getFunction(), TM); + OutStreamer->SwitchSection(ReadOnlySec); auto AFI = MF->getInfo<AArch64FunctionInfo>(); for (unsigned JTI = 0, e = JT.size(); JTI != e; ++JTI) { diff --git a/llvm/lib/Target/AArch64/AArch64CondBrTuning.cpp b/llvm/lib/Target/AArch64/AArch64CondBrTuning.cpp index 533ab3b05de9..ff4a4dfc1b95 100644 --- a/llvm/lib/Target/AArch64/AArch64CondBrTuning.cpp +++ b/llvm/lib/Target/AArch64/AArch64CondBrTuning.cpp @@ -88,12 +88,9 @@ MachineInstr *AArch64CondBrTuning::convertToFlagSetting(MachineInstr &MI, // If this is already the flag setting version of the instruction (e.g., SUBS) // just make sure the implicit-def of NZCV isn't marked dead. if (IsFlagSetting) { - for (unsigned I = MI.getNumExplicitOperands(), E = MI.getNumOperands(); - I != E; ++I) { - MachineOperand &MO = MI.getOperand(I); + for (MachineOperand &MO : MI.implicit_operands()) if (MO.isReg() && MO.isDead() && MO.getReg() == AArch64::NZCV) MO.setIsDead(false); - } return &MI; } bool Is64Bit; @@ -104,8 +101,8 @@ MachineInstr *AArch64CondBrTuning::convertToFlagSetting(MachineInstr &MI, MachineInstrBuilder MIB = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(NewOpc), NewDestReg); - for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) - MIB.add(MI.getOperand(I)); + for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) + MIB.add(MO); return MIB; } diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp index 4c04e04a7d3c..ee6e670fe3cd 100644 --- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp +++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp @@ -102,9 +102,8 @@ INITIALIZE_PASS(AArch64ExpandPseudo, "aarch64-expand-pseudo", static void transferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI, MachineInstrBuilder &DefMI) { const MCInstrDesc &Desc = OldMI.getDesc(); - for (unsigned i = Desc.getNumOperands(), e = OldMI.getNumOperands(); i != e; - ++i) { - const MachineOperand &MO = OldMI.getOperand(i); + for (const MachineOperand &MO : + llvm::drop_begin(OldMI.operands(), Desc.getNumOperands())) { assert(MO.isReg() && MO.getReg()); if (MO.isUse()) UseMI.add(MO); @@ -733,8 +732,9 @@ bool AArch64ExpandPseudo::expandCALL_RVMARKER( MOP.getReg(), /*Def=*/false, /*Implicit=*/true)); RegMaskStartIdx++; } - for (; RegMaskStartIdx < MI.getNumOperands(); ++RegMaskStartIdx) - OriginalCall->addOperand(MI.getOperand(RegMaskStartIdx)); + for (const MachineOperand &MO : + llvm::drop_begin(MI.operands(), RegMaskStartIdx)) + OriginalCall->addOperand(MO); auto *Marker = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXrs)) .addReg(AArch64::FP, RegState::Define) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 6e9e61c8e7ac..72461aa1f772 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -890,7 +890,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setTargetDAGCombine(ISD::ADD); setTargetDAGCombine(ISD::ABS); setTargetDAGCombine(ISD::SUB); - setTargetDAGCombine(ISD::SRL); setTargetDAGCombine(ISD::XOR); setTargetDAGCombine(ISD::SINT_TO_FP); setTargetDAGCombine(ISD::UINT_TO_FP); @@ -930,6 +929,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setTargetDAGCombine(ISD::VECREDUCE_ADD); setTargetDAGCombine(ISD::STEP_VECTOR); + setTargetDAGCombine(ISD::FP_EXTEND); + setTargetDAGCombine(ISD::GlobalAddress); // In case of strict alignment, avoid an excessive number of byte wide stores. @@ -1323,6 +1324,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::MGATHER, VT, Custom); setOperationAction(ISD::MSCATTER, VT, Custom); setOperationAction(ISD::MLOAD, VT, Custom); + setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); } setOperationAction(ISD::SPLAT_VECTOR, MVT::nxv8bf16, Custom); @@ -1504,6 +1506,24 @@ void AArch64TargetLowering::addTypeForNEON(MVT VT) { } } +bool AArch64TargetLowering::shouldExpandGetActiveLaneMask(EVT ResVT, + EVT OpVT) const { + // Only SVE has a 1:1 mapping from intrinsic -> instruction (whilelo). + if (!Subtarget->hasSVE()) + return true; + + // We can only support legal predicate result types. + if (ResVT != MVT::nxv2i1 && ResVT != MVT::nxv4i1 && ResVT != MVT::nxv8i1 && + ResVT != MVT::nxv16i1) + return true; + + // The whilelo instruction only works with i32 or i64 scalar inputs. + if (OpVT != MVT::i32 && OpVT != MVT::i64) + return true; + + return false; +} + void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) { assert(VT.isFixedLengthVector() && "Expected fixed length vector type!"); @@ -1528,7 +1548,7 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) { setCondCodeAction(ISD::SETUNE, VT, Expand); } - // Mark integer truncating stores as having custom lowering + // Mark integer truncating stores/extending loads as having custom lowering if (VT.isInteger()) { MVT InnerVT = VT.changeVectorElementType(MVT::i8); while (InnerVT != VT) { @@ -1540,6 +1560,18 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) { } } + // Mark floating-point truncating stores/extending loads as having custom + // lowering + if (VT.isFloatingPoint()) { + MVT InnerVT = VT.changeVectorElementType(MVT::f16); + while (InnerVT != VT) { + setTruncStoreAction(VT, InnerVT, Custom); + setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Custom); + InnerVT = InnerVT.changeVectorElementType( + MVT::getFloatingPointVT(2 * InnerVT.getScalarSizeInBits())); + } + } + // Lower fixed length vector operations to scalable equivalents. setOperationAction(ISD::ABS, VT, Custom); setOperationAction(ISD::ADD, VT, Custom); @@ -1950,6 +1982,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { MAKE_CASE(AArch64ISD::UDIV_PRED) MAKE_CASE(AArch64ISD::UMAX_PRED) MAKE_CASE(AArch64ISD::UMIN_PRED) + MAKE_CASE(AArch64ISD::SRAD_MERGE_OP1) MAKE_CASE(AArch64ISD::FNEG_MERGE_PASSTHRU) MAKE_CASE(AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU) MAKE_CASE(AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU) @@ -2316,6 +2349,8 @@ static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V); static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V); static SDValue convertFixedMaskToScalableVector(SDValue Mask, SelectionDAG &DAG); +static SDValue getPredicateForScalableVector(SelectionDAG &DAG, SDLoc &DL, + EVT VT); /// isZerosVector - Check whether SDNode N is a zero-filled vector. static bool isZerosVector(const SDNode *N) { @@ -4288,6 +4323,12 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); } + case Intrinsic::get_active_lane_mask: { + SDValue ID = + DAG.getTargetConstant(Intrinsic::aarch64_sve_whilelo, dl, MVT::i64); + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, Op.getValueType(), ID, + Op.getOperand(1), Op.getOperand(2)); + } } } @@ -4506,7 +4547,7 @@ SDValue AArch64TargetLowering::LowerMGATHER(SDValue Op, } InputVT = DAG.getValueType(MemVT.changeTypeToInteger()); Mask = DAG.getNode( - ISD::ZERO_EXTEND, DL, + ISD::SIGN_EXTEND, DL, VT.changeVectorElementType(IndexVT.getVectorElementType()), Mask); } @@ -4618,7 +4659,7 @@ SDValue AArch64TargetLowering::LowerMSCATTER(SDValue Op, VT.changeVectorElementType(IndexVT.getVectorElementType()), StoreVal); StoreVal = convertToScalableVector(DAG, IndexVT, StoreVal); Mask = DAG.getNode( - ISD::ZERO_EXTEND, DL, + ISD::SIGN_EXTEND, DL, VT.changeVectorElementType(IndexVT.getVectorElementType()), Mask); } else if (VT.isFloatingPoint()) { // Handle FP data by casting the data so an integer scatter can be used. @@ -10963,8 +11004,40 @@ SDValue AArch64TargetLowering::LowerINSERT_SUBVECTOR(SDValue Op, return SDValue(); } +static bool isPow2Splat(SDValue Op, uint64_t &SplatVal, bool &Negated) { + if (Op.getOpcode() != AArch64ISD::DUP && + Op.getOpcode() != ISD::SPLAT_VECTOR && + Op.getOpcode() != ISD::BUILD_VECTOR) + return false; + + if (Op.getOpcode() == ISD::BUILD_VECTOR && + !isAllConstantBuildVector(Op, SplatVal)) + return false; + + if (Op.getOpcode() != ISD::BUILD_VECTOR && + !isa<ConstantSDNode>(Op->getOperand(0))) + return false; + + SplatVal = Op->getConstantOperandVal(0); + if (Op.getValueType().getVectorElementType() != MVT::i64) + SplatVal = (int32_t)SplatVal; + + Negated = false; + if (isPowerOf2_64(SplatVal)) + return true; + + Negated = true; + if (isPowerOf2_64(-SplatVal)) { + SplatVal = -SplatVal; + return true; + } + + return false; +} + SDValue AArch64TargetLowering::LowerDIV(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); + SDLoc dl(Op); if (useSVEForFixedLengthVectorVT(VT, /*OverrideNEON=*/true)) return LowerFixedLengthVectorIntDivideToSVE(Op, DAG); @@ -10974,6 +11047,19 @@ SDValue AArch64TargetLowering::LowerDIV(SDValue Op, SelectionDAG &DAG) const { bool Signed = Op.getOpcode() == ISD::SDIV; unsigned PredOpcode = Signed ? AArch64ISD::SDIV_PRED : AArch64ISD::UDIV_PRED; + bool Negated; + uint64_t SplatVal; + if (Signed && isPow2Splat(Op.getOperand(1), SplatVal, Negated)) { + SDValue Pg = getPredicateForScalableVector(DAG, dl, VT); + SDValue Res = + DAG.getNode(AArch64ISD::SRAD_MERGE_OP1, dl, VT, Pg, Op->getOperand(0), + DAG.getTargetConstant(Log2_64(SplatVal), dl, MVT::i32)); + if (Negated) + Res = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT), Res); + + return Res; + } + if (VT == MVT::nxv4i32 || VT == MVT::nxv2i64) return LowerToPredicatedOp(Op, DAG, PredOpcode); @@ -10987,7 +11073,6 @@ SDValue AArch64TargetLowering::LowerDIV(SDValue Op, SelectionDAG &DAG) const { else llvm_unreachable("Unexpected Custom DIV operation"); - SDLoc dl(Op); unsigned UnpkLo = Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO; unsigned UnpkHi = Signed ? AArch64ISD::SUNPKHI : AArch64ISD::UUNPKHI; SDValue Op0Lo = DAG.getNode(UnpkLo, dl, WidenedVT, Op.getOperand(0)); @@ -11924,6 +12009,12 @@ static bool areOperandsOfVmullHighP64(Value *Op1, Value *Op2) { return isOperandOfVmullHighP64(Op1) && isOperandOfVmullHighP64(Op2); } +static bool isSplatShuffle(Value *V) { + if (auto *Shuf = dyn_cast<ShuffleVectorInst>(V)) + return is_splat(Shuf->getShuffleMask()); + return false; +} + /// Check if sinking \p I's operands to I's basic block is profitable, because /// the operands can be folded into a target instruction, e.g. /// shufflevectors extracts and/or sext/zext can be folded into (u,s)subl(2). @@ -11934,12 +12025,24 @@ bool AArch64TargetLowering::shouldSinkOperands( if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { switch (II->getIntrinsicID()) { + case Intrinsic::aarch64_neon_smull: case Intrinsic::aarch64_neon_umull: - if (!areExtractShuffleVectors(II->getOperand(0), II->getOperand(1))) - return false; - Ops.push_back(&II->getOperandUse(0)); - Ops.push_back(&II->getOperandUse(1)); - return true; + if (areExtractShuffleVectors(II->getOperand(0), II->getOperand(1))) { + Ops.push_back(&II->getOperandUse(0)); + Ops.push_back(&II->getOperandUse(1)); + return true; + } + LLVM_FALLTHROUGH; + + case Intrinsic::aarch64_neon_sqdmull: + case Intrinsic::aarch64_neon_sqdmulh: + case Intrinsic::aarch64_neon_sqrdmulh: + // Sink splats for index lane variants + if (isSplatShuffle(II->getOperand(0))) + Ops.push_back(&II->getOperandUse(0)); + if (isSplatShuffle(II->getOperand(1))) + Ops.push_back(&II->getOperandUse(1)); + return !Ops.empty(); case Intrinsic::aarch64_neon_pmull64: if (!areOperandsOfVmullHighP64(II->getArgOperand(0), @@ -12961,8 +13064,14 @@ AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, if (isIntDivCheap(N->getValueType(0), Attr)) return SDValue(N,0); // Lower SDIV as SDIV - // fold (sdiv X, pow2) EVT VT = N->getValueType(0); + + // For scalable and fixed types, mark them as cheap so we can handle it much + // later. This allows us to handle larger than legal types. + if (VT.isScalableVector() || Subtarget->useSVEForFixedLengthVectors()) + return SDValue(N, 0); + + // fold (sdiv X, pow2) if ((VT != MVT::i32 && VT != MVT::i64) || !(Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2())) return SDValue(); @@ -13858,34 +13967,6 @@ static SDValue performANDCombine(SDNode *N, return SDValue(); } -static SDValue performSRLCombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI) { - SelectionDAG &DAG = DCI.DAG; - EVT VT = N->getValueType(0); - if (VT != MVT::i32 && VT != MVT::i64) - return SDValue(); - - // Canonicalize (srl (bswap i32 x), 16) to (rotr (bswap i32 x), 16), if the - // high 16-bits of x are zero. Similarly, canonicalize (srl (bswap i64 x), 32) - // to (rotr (bswap i64 x), 32), if the high 32-bits of x are zero. - SDValue N0 = N->getOperand(0); - if (N0.getOpcode() == ISD::BSWAP) { - SDLoc DL(N); - SDValue N1 = N->getOperand(1); - SDValue N00 = N0.getOperand(0); - if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) { - uint64_t ShiftAmt = C->getZExtValue(); - if (VT == MVT::i32 && ShiftAmt == 16 && - DAG.MaskedValueIsZero(N00, APInt::getHighBitsSet(32, 16))) - return DAG.getNode(ISD::ROTR, DL, VT, N0, N1); - if (VT == MVT::i64 && ShiftAmt == 32 && - DAG.MaskedValueIsZero(N00, APInt::getHighBitsSet(64, 32))) - return DAG.getNode(ISD::ROTR, DL, VT, N0, N1); - } - } - return SDValue(); -} - // Attempt to form urhadd(OpA, OpB) from // truncate(vlshr(sub(zext(OpB), xor(zext(OpA), Ones(ElemSizeInBits))), 1)) // or uhadd(OpA, OpB) from truncate(vlshr(add(zext(OpA), zext(OpB)), 1)). @@ -14031,6 +14112,9 @@ static SDValue performConcatVectorsCombine(SDNode *N, SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); unsigned N0Opc = N0->getOpcode(), N1Opc = N1->getOpcode(); + if (VT.isScalableVector()) + return SDValue(); + // Optimize concat_vectors of truncated vectors, where the intermediate // type is illegal, to avoid said illegality, e.g., // (v4i16 (concat_vectors (v2i16 (truncate (v2i64))), @@ -15089,6 +15173,9 @@ static SDValue performIntrinsicCombine(SDNode *N, case Intrinsic::aarch64_sve_uqsub_x: return DAG.getNode(ISD::USUBSAT, SDLoc(N), N->getValueType(0), N->getOperand(1), N->getOperand(2)); + case Intrinsic::aarch64_sve_asrd: + return DAG.getNode(AArch64ISD::SRAD_MERGE_OP1, SDLoc(N), N->getValueType(0), + N->getOperand(1), N->getOperand(2), N->getOperand(3)); case Intrinsic::aarch64_sve_cmphs: if (!N->getOperand(2).getValueType().isFloatingPoint()) return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N), @@ -15883,6 +15970,22 @@ static SDValue performSTORECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG, const AArch64Subtarget *Subtarget) { + StoreSDNode *ST = cast<StoreSDNode>(N); + SDValue Chain = ST->getChain(); + SDValue Value = ST->getValue(); + SDValue Ptr = ST->getBasePtr(); + + // If this is an FP_ROUND followed by a store, fold this into a truncating + // store. We can do this even if this is already a truncstore. + // We purposefully don't care about legality of the nodes here as we know + // they can be split down into something legal. + if (DCI.isBeforeLegalizeOps() && Value.getOpcode() == ISD::FP_ROUND && + Value.getNode()->hasOneUse() && ST->isUnindexed() && + Subtarget->useSVEForFixedLengthVectors() && + Value.getValueType().isFixedLengthVector()) + return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0), Ptr, + ST->getMemoryVT(), ST->getMemOperand()); + if (SDValue Split = splitStores(N, DCI, DAG, Subtarget)) return Split; @@ -17225,6 +17328,37 @@ SDValue performSVESpliceCombine(SDNode *N, SelectionDAG &DAG) { return DAG.getBitcast(Ty, Trunc); } +SDValue performFPExtendCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const AArch64Subtarget *Subtarget) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded. + if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::FP_ROUND) + return SDValue(); + + // fold (fpext (load x)) -> (fpext (fptrunc (extload x))) + // We purposefully don't care about legality of the nodes here as we know + // they can be split down into something legal. + if (DCI.isBeforeLegalizeOps() && ISD::isNormalLoad(N0.getNode()) && + N0.hasOneUse() && Subtarget->useSVEForFixedLengthVectors() && + VT.isFixedLengthVector()) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, + LN0->getChain(), LN0->getBasePtr(), + N0.getValueType(), LN0->getMemOperand()); + DCI.CombineTo(N, ExtLoad); + DCI.CombineTo(N0.getNode(), + DAG.getNode(ISD::FP_ROUND, SDLoc(N0), N0.getValueType(), + ExtLoad, DAG.getIntPtrConstant(1, SDLoc(N0))), + ExtLoad.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + + return SDValue(); +} + SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -17253,8 +17387,6 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, return performORCombine(N, DCI, Subtarget); case ISD::AND: return performANDCombine(N, DCI); - case ISD::SRL: - return performSRLCombine(N, DCI); case ISD::INTRINSIC_WO_CHAIN: return performIntrinsicCombine(N, DCI, Subtarget); case ISD::ANY_EXTEND: @@ -17283,6 +17415,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, return performSTORECombine(N, DCI, DAG, Subtarget); case ISD::VECTOR_SPLICE: return performSVESpliceCombine(N, DAG); + case ISD::FP_EXTEND: + return performFPExtendCombine(N, DAG, DCI, Subtarget); case AArch64ISD::BRCOND: return performBRCONDCombine(N, DCI, DAG); case AArch64ISD::TBNZ: @@ -18414,6 +18548,15 @@ bool AArch64TargetLowering::preferIncOfAddToSubOfNot(EVT VT) const { return VT.isScalarInteger(); } +bool AArch64TargetLowering::shouldConvertFpToSat(unsigned Op, EVT FPVT, + EVT VT) const { + // v8f16 without fp16 need to be extended to v8f32, which is more difficult to + // legalize. + if (FPVT == MVT::v8f16 && !Subtarget->hasFullFP16()) + return false; + return TargetLowering::shouldConvertFpToSat(Op, FPVT, VT); +} + bool AArch64TargetLowering::enableAggressiveFMAFusion(EVT VT) const { return Subtarget->hasAggressiveFMA() && VT.isFloatingPoint(); } @@ -18591,12 +18734,29 @@ SDValue AArch64TargetLowering::LowerFixedLengthVectorLoadToSVE( SDLoc DL(Op); EVT VT = Op.getValueType(); EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT); + EVT LoadVT = ContainerVT; + EVT MemVT = Load->getMemoryVT(); + + auto Pg = getPredicateForFixedLengthVector(DAG, DL, VT); + + if (VT.isFloatingPoint() && Load->getExtensionType() == ISD::EXTLOAD) { + LoadVT = ContainerVT.changeTypeToInteger(); + MemVT = MemVT.changeTypeToInteger(); + } auto NewLoad = DAG.getMaskedLoad( - ContainerVT, DL, Load->getChain(), Load->getBasePtr(), Load->getOffset(), - getPredicateForFixedLengthVector(DAG, DL, VT), DAG.getUNDEF(ContainerVT), - Load->getMemoryVT(), Load->getMemOperand(), Load->getAddressingMode(), - Load->getExtensionType()); + LoadVT, DL, Load->getChain(), Load->getBasePtr(), Load->getOffset(), Pg, + DAG.getUNDEF(LoadVT), MemVT, Load->getMemOperand(), + Load->getAddressingMode(), Load->getExtensionType()); + + if (VT.isFloatingPoint() && Load->getExtensionType() == ISD::EXTLOAD) { + EVT ExtendVT = ContainerVT.changeVectorElementType( + Load->getMemoryVT().getVectorElementType()); + + NewLoad = getSVESafeBitCast(ExtendVT, NewLoad, DAG); + NewLoad = DAG.getNode(AArch64ISD::FP_EXTEND_MERGE_PASSTHRU, DL, ContainerVT, + Pg, NewLoad, DAG.getUNDEF(ContainerVT)); + } auto Result = convertFromScalableVector(DAG, VT, NewLoad); SDValue MergedValues[2] = {Result, Load->getChain()}; @@ -18609,12 +18769,15 @@ static SDValue convertFixedMaskToScalableVector(SDValue Mask, EVT InVT = Mask.getValueType(); EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT); + auto Pg = getPredicateForFixedLengthVector(DAG, DL, InVT); + + if (ISD::isBuildVectorAllOnes(Mask.getNode())) + return Pg; + auto Op1 = convertToScalableVector(DAG, ContainerVT, Mask); auto Op2 = DAG.getConstant(0, DL, ContainerVT); - auto Pg = getPredicateForFixedLengthVector(DAG, DL, InVT); - EVT CmpVT = Pg.getValueType(); - return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, DL, CmpVT, + return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, DL, Pg.getValueType(), {Pg, Op1, Op2, DAG.getCondCode(ISD::SETNE)}); } @@ -18668,13 +18831,26 @@ SDValue AArch64TargetLowering::LowerFixedLengthVectorStoreToSVE( SDLoc DL(Op); EVT VT = Store->getValue().getValueType(); EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT); + EVT MemVT = Store->getMemoryVT(); + auto Pg = getPredicateForFixedLengthVector(DAG, DL, VT); auto NewValue = convertToScalableVector(DAG, ContainerVT, Store->getValue()); - return DAG.getMaskedStore( - Store->getChain(), DL, NewValue, Store->getBasePtr(), Store->getOffset(), - getPredicateForFixedLengthVector(DAG, DL, VT), Store->getMemoryVT(), - Store->getMemOperand(), Store->getAddressingMode(), - Store->isTruncatingStore()); + + if (VT.isFloatingPoint() && Store->isTruncatingStore()) { + EVT TruncVT = ContainerVT.changeVectorElementType( + Store->getMemoryVT().getVectorElementType()); + MemVT = MemVT.changeTypeToInteger(); + NewValue = DAG.getNode(AArch64ISD::FP_ROUND_MERGE_PASSTHRU, DL, TruncVT, Pg, + NewValue, DAG.getTargetConstant(0, DL, MVT::i64), + DAG.getUNDEF(TruncVT)); + NewValue = + getSVESafeBitCast(ContainerVT.changeTypeToInteger(), NewValue, DAG); + } + + return DAG.getMaskedStore(Store->getChain(), DL, NewValue, + Store->getBasePtr(), Store->getOffset(), Pg, MemVT, + Store->getMemOperand(), Store->getAddressingMode(), + Store->isTruncatingStore()); } SDValue AArch64TargetLowering::LowerFixedLengthVectorMStoreToSVE( @@ -18706,6 +18882,21 @@ SDValue AArch64TargetLowering::LowerFixedLengthVectorIntDivideToSVE( bool Signed = Op.getOpcode() == ISD::SDIV; unsigned PredOpcode = Signed ? AArch64ISD::SDIV_PRED : AArch64ISD::UDIV_PRED; + bool Negated; + uint64_t SplatVal; + if (Signed && isPow2Splat(Op.getOperand(1), SplatVal, Negated)) { + EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT); + SDValue Op1 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(0)); + SDValue Op2 = DAG.getTargetConstant(Log2_64(SplatVal), dl, MVT::i32); + + SDValue Pg = getPredicateForFixedLengthVector(DAG, dl, VT); + SDValue Res = DAG.getNode(AArch64ISD::SRAD_MERGE_OP1, dl, ContainerVT, Pg, Op1, Op2); + if (Negated) + Res = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT), Res); + + return convertFromScalableVector(DAG, VT, Res); + } + // Scalable vector i32/i64 DIV is supported. if (EltVT == MVT::i32 || EltVT == MVT::i64) return LowerToPredicatedOp(Op, DAG, PredOpcode, /*OverrideNEON=*/true); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 392e22b68366..ea884cdccd28 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -104,6 +104,8 @@ enum NodeType : unsigned { // Unpredicated vector instructions BIC, + SRAD_MERGE_OP1, + // Predicated instructions with the result of inactive lanes provided by the // last operand. FABS_MERGE_PASSTHRU, @@ -774,6 +776,8 @@ public: bool preferIncOfAddToSubOfNot(EVT VT) const override; + bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override; + bool hasBitPreservingFPLogic(EVT VT) const override { // FIXME: Is this always true? It should be true for vectors at least. return VT == MVT::f32 || VT == MVT::f64; @@ -842,6 +846,8 @@ public: EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown = false) const override; + bool shouldExpandGetActiveLaneMask(EVT VT, EVT OpVT) const override; + private: /// Keep a pointer to the AArch64Subtarget around so that we can /// make the right decision when generating code for different targets. diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index db8e0c5dac4a..decee117d2d5 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -437,6 +437,18 @@ def non_temporal_store : cast<MaskedStoreSDNode>(N)->isNonTemporal(); }]>; +// top16Zero - answer true if the upper 16 bits of $src are 0, false otherwise +def top16Zero: PatLeaf<(i32 GPR32:$src), [{ + return SDValue(N,0)->getValueType(0) == MVT::i32 && + CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(32, 16)); + }]>; + +// top32Zero - answer true if the upper 32 bits of $src are 0, false otherwise +def top32Zero: PatLeaf<(i64 GPR64:$src), [{ + return SDValue(N,0)->getValueType(0) == MVT::i64 && + CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(64, 32)); + }]>; + // Node definitions. def AArch64adrp : SDNode<"AArch64ISD::ADRP", SDTIntUnaryOp, []>; def AArch64adr : SDNode<"AArch64ISD::ADR", SDTIntUnaryOp, []>; @@ -2046,6 +2058,10 @@ def : InstAlias<"rev64 $Rd, $Rn", (REVXr GPR64:$Rd, GPR64:$Rn), 0>; def : Pat<(bswap (rotr GPR32:$Rn, (i64 16))), (REV16Wr GPR32:$Rn)>; def : Pat<(bswap (rotr GPR64:$Rn, (i64 32))), (REV32Xr GPR64:$Rn)>; +// Match (srl (bswap x), C) -> revC if the upper bswap bits are known zero. +def : Pat<(srl (bswap top16Zero:$Rn), (i64 16)), (REV16Wr GPR32:$Rn)>; +def : Pat<(srl (bswap top32Zero:$Rn), (i64 32)), (REV32Xr GPR64:$Rn)>; + //===----------------------------------------------------------------------===// // Bitfield immediate extraction instruction. //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 67d8fbb45cf5..25d53f4ab065 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -199,6 +199,13 @@ def AArch64umax_p : SDNode<"AArch64ISD::UMAX_PRED", SDT_AArch64Arith>; def AArch64umin_p : SDNode<"AArch64ISD::UMIN_PRED", SDT_AArch64Arith>; def AArch64umulh_p : SDNode<"AArch64ISD::MULHU_PRED", SDT_AArch64Arith>; +def SDT_AArch64Arith_Imm : SDTypeProfile<1, 3, [ + SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVT<3,i32>, + SDTCVecEltisVT<1,i1>, SDTCisSameAs<0,2> +]>; + +def AArch64asrd_m1 : SDNode<"AArch64ISD::SRAD_MERGE_OP1", SDT_AArch64Arith_Imm>; + def SDT_AArch64IntExtend : SDTypeProfile<1, 4, [ SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVT<3, OtherVT>, SDTCisVec<4>, SDTCVecEltisVT<1,i1>, SDTCisSameAs<0,2>, SDTCisVTSmallerThanOp<3, 2>, SDTCisSameAs<0,4> @@ -1575,7 +1582,7 @@ let Predicates = [HasSVEorStreamingSVE] in { defm ASR_ZPmI : sve_int_bin_pred_shift_imm_right_dup<0b0000, "asr", "ASR_ZPZI", int_aarch64_sve_asr>; defm LSR_ZPmI : sve_int_bin_pred_shift_imm_right_dup<0b0001, "lsr", "LSR_ZPZI", int_aarch64_sve_lsr>; defm LSL_ZPmI : sve_int_bin_pred_shift_imm_left_dup< 0b0011, "lsl", "LSL_ZPZI", int_aarch64_sve_lsl>; - defm ASRD_ZPmI : sve_int_bin_pred_shift_imm_right< 0b0100, "asrd", "ASRD_ZPZI", int_aarch64_sve_asrd>; + defm ASRD_ZPmI : sve_int_bin_pred_shift_imm_right< 0b0100, "asrd", "ASRD_ZPZI", AArch64asrd_m1>; defm ASR_ZPZI : sve_int_shift_pred_bhsd<AArch64asr_p, SVEShiftImmR8, SVEShiftImmR16, SVEShiftImmR32, SVEShiftImmR64>; defm LSR_ZPZI : sve_int_shift_pred_bhsd<AArch64lsr_p, SVEShiftImmR8, SVEShiftImmR16, SVEShiftImmR32, SVEShiftImmR64>; @@ -1586,7 +1593,7 @@ let Predicates = [HasSVEorStreamingSVE, UseExperimentalZeroingPseudos] in { defm ASR_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_asr>; defm LSR_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_lsr>; defm LSL_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_lsl>; - defm ASRD_ZPZI : sve_int_bin_pred_shift_imm_right_zeroing_bhsd<int_aarch64_sve_asrd>; + defm ASRD_ZPZI : sve_int_bin_pred_shift_imm_right_zeroing_bhsd<AArch64asrd_m1>; } // End HasSVEorStreamingSVE, UseExperimentalZeroingPseudos let Predicates = [HasSVEorStreamingSVE] in { diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 63d6fa5bbb26..34015d2dbd49 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -833,17 +833,12 @@ static Optional<Instruction *> instCombineSVEVectorMul(InstCombiner &IC, return match(SplatValue, m_FPOne()) || match(SplatValue, m_One()); }; - // The OpMultiplier variable should always point to the dup (if any), so - // swap if necessary. - if (IsUnitDup(OpMultiplicand) || IsUnitSplat(OpMultiplicand)) - std::swap(OpMultiplier, OpMultiplicand); - if (IsUnitSplat(OpMultiplier)) { - // [f]mul pg (dupx 1) %n => %n + // [f]mul pg %n, (dupx 1) => %n OpMultiplicand->takeName(&II); return IC.replaceInstUsesWith(II, OpMultiplicand); } else if (IsUnitDup(OpMultiplier)) { - // [f]mul pg (dup pg 1) %n => %n + // [f]mul pg %n, (dup pg 1) => %n auto *DupInst = cast<IntrinsicInst>(OpMultiplier); auto *DupPg = DupInst->getOperand(1); // TODO: this is naive. The optimization is still valid if DupPg @@ -2142,6 +2137,7 @@ bool AArch64TTIImpl::isLegalToVectorizeReduction( case RecurKind::FMax: case RecurKind::SelectICmp: case RecurKind::SelectFCmp: + case RecurKind::FMulAdd: return true; default: return false; diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index e090d87d59a2..3d9a626d3ac3 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -1920,35 +1920,6 @@ bool AArch64InstructionSelector::preISelLower(MachineInstr &I) { MachineRegisterInfo &MRI = MF.getRegInfo(); switch (I.getOpcode()) { - case TargetOpcode::G_SHL: - case TargetOpcode::G_ASHR: - case TargetOpcode::G_LSHR: { - // These shifts are legalized to have 64 bit shift amounts because we want - // to take advantage of the existing imported selection patterns that assume - // the immediates are s64s. However, if the shifted type is 32 bits and for - // some reason we receive input GMIR that has an s64 shift amount that's not - // a G_CONSTANT, insert a truncate so that we can still select the s32 - // register-register variant. - Register SrcReg = I.getOperand(1).getReg(); - Register ShiftReg = I.getOperand(2).getReg(); - const LLT ShiftTy = MRI.getType(ShiftReg); - const LLT SrcTy = MRI.getType(SrcReg); - if (SrcTy.isVector()) - return false; - assert(!ShiftTy.isVector() && "unexpected vector shift ty"); - if (SrcTy.getSizeInBits() != 32 || ShiftTy.getSizeInBits() != 64) - return false; - auto *AmtMI = MRI.getVRegDef(ShiftReg); - assert(AmtMI && "could not find a vreg definition for shift amount"); - if (AmtMI->getOpcode() != TargetOpcode::G_CONSTANT) { - // Insert a subregister copy to implement a 64->32 trunc - auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {}) - .addReg(ShiftReg, 0, AArch64::sub_32); - MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID)); - I.getOperand(2).setReg(Trunc.getReg(0)); - } - return true; - } case TargetOpcode::G_STORE: { bool Changed = contractCrossBankCopyIntoStore(I, MRI); MachineOperand &SrcOp = I.getOperand(0); @@ -2950,6 +2921,28 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { if (Opcode == TargetOpcode::G_SHL && MRI.getType(I.getOperand(0).getReg()).isVector()) return selectVectorSHL(I, MRI); + + // These shifts were legalized to have 64 bit shift amounts because we + // want to take advantage of the selection patterns that assume the + // immediates are s64s, however, selectBinaryOp will assume both operands + // will have the same bit size. + { + Register SrcReg = I.getOperand(1).getReg(); + Register ShiftReg = I.getOperand(2).getReg(); + const LLT ShiftTy = MRI.getType(ShiftReg); + const LLT SrcTy = MRI.getType(SrcReg); + if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 32 && + ShiftTy.getSizeInBits() == 64) { + assert(!ShiftTy.isVector() && "unexpected vector shift ty"); + assert(MRI.getVRegDef(ShiftReg) && + "could not find a vreg definition for shift amount"); + // Insert a subregister copy to implement a 64->32 trunc + auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {}) + .addReg(ShiftReg, 0, AArch64::sub_32); + MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID)); + I.getOperand(2).setReg(Trunc.getReg(0)); + } + } LLVM_FALLTHROUGH; case TargetOpcode::G_FADD: case TargetOpcode::G_FSUB: @@ -6452,8 +6445,7 @@ static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder MIB(MI); // Go through each operand and ensure it has the same regbank. - for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) { - MachineOperand &MO = MI.getOperand(OpIdx); + for (MachineOperand &MO : llvm::drop_begin(MI.operands())) { if (!MO.isReg()) continue; Register OpReg = MO.getReg(); @@ -6511,8 +6503,7 @@ void AArch64InstructionSelector::processPHIs(MachineFunction &MF) { // %endbb: // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2_copy:gpr(s16), %bb2 bool HasGPROp = false, HasFPROp = false; - for (unsigned OpIdx = 1; OpIdx < MI->getNumOperands(); ++OpIdx) { - const auto &MO = MI->getOperand(OpIdx); + for (const MachineOperand &MO : llvm::drop_begin(MI->operands())) { if (!MO.isReg()) continue; const LLT &Ty = MRI.getType(MO.getReg()); diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp index f2a470857d21..78c0e90b1384 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp @@ -177,8 +177,8 @@ public: // We can't just use EmitIntValue here, as that will emit a data mapping // symbol, and swap the endianness on big-endian systems (instructions are // always little-endian). - for (unsigned I = 0; I < 4; ++I) { - Buffer[I] = uint8_t(Inst); + for (char &C : Buffer) { + C = uint8_t(Inst); Inst >>= 8; } diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp index cf1a60643efd..92552c3d41d5 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp @@ -101,8 +101,8 @@ void AArch64TargetStreamer::emitInst(uint32_t Inst) { // We can't just use EmitIntValue here, as that will swap the // endianness on big-endian systems (instructions are always // little-endian). - for (unsigned I = 0; I < 4; ++I) { - Buffer[I] = uint8_t(Inst); + for (char &C : Buffer) { + C = uint8_t(Inst); Inst >>= 8; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td index df2f9a0fa3a9..c7c5ff7bcbe7 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td @@ -26,6 +26,14 @@ def uchar_to_float : GICombineRule< [{ return PostLegalizerHelper.matchUCharToFloat(*${itofp}); }]), (apply [{ PostLegalizerHelper.applyUCharToFloat(*${itofp}); }])>; + +def rcp_sqrt_to_rsq : GICombineRule< + (defs root:$rcp, build_fn_matchinfo:$matchinfo), + (match (wip_match_opcode G_INTRINSIC, G_FSQRT):$rcp, + [{ return PostLegalizerHelper.matchRcpSqrtToRsq(*${rcp}, ${matchinfo}); }]), + (apply [{ Helper.applyBuildFn(*${rcp}, ${matchinfo}); }])>; + + def cvt_f32_ubyteN_matchdata : GIDefMatchData<"AMDGPUPostLegalizerCombinerHelper::CvtF32UByteMatchInfo">; def cvt_f32_ubyteN : GICombineRule< @@ -86,7 +94,8 @@ def AMDGPUPreLegalizerCombinerHelper: GICombinerHelper< def AMDGPUPostLegalizerCombinerHelper: GICombinerHelper< "AMDGPUGenPostLegalizerCombinerHelper", [all_combines, gfx6gfx7_combines, - uchar_to_float, cvt_f32_ubyteN, remove_fcanonicalize, foldable_fneg]> { + uchar_to_float, cvt_f32_ubyteN, remove_fcanonicalize, foldable_fneg, + rcp_sqrt_to_rsq]> { let DisableRuleOption = "amdgpupostlegalizercombiner-disable-rule"; let StateClass = "AMDGPUPostLegalizerCombinerHelperState"; let AdditionalArguments = []; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index cee56ee97294..8236e6672247 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -654,6 +654,9 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) { SelectMAD_64_32(N); return; } + case ISD::SMUL_LOHI: + case ISD::UMUL_LOHI: + return SelectMUL_LOHI(N); case ISD::CopyToReg: { const SITargetLowering& Lowering = *static_cast<const SITargetLowering*>(getTargetLowering()); @@ -719,6 +722,18 @@ bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const { Term->getMetadata("structurizecfg.uniform"); } +bool AMDGPUDAGToDAGISel::isUnneededShiftMask(const SDNode *N, + unsigned ShAmtBits) const { + assert(N->getOpcode() == ISD::AND); + + const APInt &RHS = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue(); + if (RHS.countTrailingOnes() >= ShAmtBits) + return true; + + const APInt &LHSKnownZeros = CurDAG->computeKnownBits(N->getOperand(0)).Zero; + return (LHSKnownZeros | RHS).countTrailingOnes() >= ShAmtBits; +} + static bool getBaseWithOffsetUsingSplitOR(SelectionDAG &DAG, SDValue Addr, SDValue &N0, SDValue &N1) { if (Addr.getValueType() == MVT::i64 && Addr.getOpcode() == ISD::BITCAST && @@ -1001,6 +1016,32 @@ void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) { CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops); } +// We need to handle this here because tablegen doesn't support matching +// instructions with multiple outputs. +void AMDGPUDAGToDAGISel::SelectMUL_LOHI(SDNode *N) { + SDLoc SL(N); + bool Signed = N->getOpcode() == ISD::SMUL_LOHI; + unsigned Opc = Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64; + + SDValue Zero = CurDAG->getTargetConstant(0, SL, MVT::i64); + SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1); + SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Zero, Clamp}; + SDNode *Mad = CurDAG->getMachineNode(Opc, SL, N->getVTList(), Ops); + if (!SDValue(N, 0).use_empty()) { + SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32); + SDNode *Lo = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SL, + MVT::i32, SDValue(Mad, 0), Sub0); + ReplaceUses(SDValue(N, 0), SDValue(Lo, 0)); + } + if (!SDValue(N, 1).use_empty()) { + SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32); + SDNode *Hi = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SL, + MVT::i32, SDValue(Mad, 0), Sub1); + ReplaceUses(SDValue(N, 1), SDValue(Hi, 0)); + } + CurDAG->RemoveDeadNode(N); +} + bool AMDGPUDAGToDAGISel::isDSOffsetLegal(SDValue Base, unsigned Offset) const { if (!isUInt<16>(Offset)) return false; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h index c1d9673f067e..d638d9877a9b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h @@ -136,6 +136,10 @@ private: bool isUniformLoad(const SDNode *N) const; bool isUniformBr(const SDNode *N) const; + // Returns true if ISD::AND SDNode `N`'s masking of the shift amount operand's + // `ShAmtBits` bits is unneeded. + bool isUnneededShiftMask(const SDNode *N, unsigned ShAmtBits) const; + bool isBaseWithConstantOffset64(SDValue Addr, SDValue &LHS, SDValue &RHS) const; @@ -231,6 +235,7 @@ private: void SelectUADDO_USUBO(SDNode *N); void SelectDIV_SCALE(SDNode *N); void SelectMAD_64_32(SDNode *N); + void SelectMUL_LOHI(SDNode *N); void SelectFMA_W_CHAIN(SDNode *N); void SelectFMUL_W_CHAIN(SDNode *N); SDNode *getBFE32(bool IsSigned, const SDLoc &DL, SDValue Val, uint32_t Offset, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 523fa2d3724b..54177564afbc 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -594,6 +594,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, setTargetDAGCombine(ISD::SRL); setTargetDAGCombine(ISD::TRUNCATE); setTargetDAGCombine(ISD::MUL); + setTargetDAGCombine(ISD::SMUL_LOHI); + setTargetDAGCombine(ISD::UMUL_LOHI); setTargetDAGCombine(ISD::MULHU); setTargetDAGCombine(ISD::MULHS); setTargetDAGCombine(ISD::SELECT); @@ -3462,6 +3464,50 @@ SDValue AMDGPUTargetLowering::performMulCombine(SDNode *N, return DAG.getSExtOrTrunc(Mul, DL, VT); } +SDValue +AMDGPUTargetLowering::performMulLoHiCombine(SDNode *N, + DAGCombinerInfo &DCI) const { + if (N->getValueType(0) != MVT::i32) + return SDValue(); + + SelectionDAG &DAG = DCI.DAG; + SDLoc DL(N); + + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + + // SimplifyDemandedBits has the annoying habit of turning useful zero_extends + // in the source into any_extends if the result of the mul is truncated. Since + // we can assume the high bits are whatever we want, use the underlying value + // to avoid the unknown high bits from interfering. + if (N0.getOpcode() == ISD::ANY_EXTEND) + N0 = N0.getOperand(0); + if (N1.getOpcode() == ISD::ANY_EXTEND) + N1 = N1.getOperand(0); + + // Try to use two fast 24-bit multiplies (one for each half of the result) + // instead of one slow extending multiply. + unsigned LoOpcode, HiOpcode; + if (Subtarget->hasMulU24() && isU24(N0, DAG) && isU24(N1, DAG)) { + N0 = DAG.getZExtOrTrunc(N0, DL, MVT::i32); + N1 = DAG.getZExtOrTrunc(N1, DL, MVT::i32); + LoOpcode = AMDGPUISD::MUL_U24; + HiOpcode = AMDGPUISD::MULHI_U24; + } else if (Subtarget->hasMulI24() && isI24(N0, DAG) && isI24(N1, DAG)) { + N0 = DAG.getSExtOrTrunc(N0, DL, MVT::i32); + N1 = DAG.getSExtOrTrunc(N1, DL, MVT::i32); + LoOpcode = AMDGPUISD::MUL_I24; + HiOpcode = AMDGPUISD::MULHI_I24; + } else { + return SDValue(); + } + + SDValue Lo = DAG.getNode(LoOpcode, DL, MVT::i32, N0, N1); + SDValue Hi = DAG.getNode(HiOpcode, DL, MVT::i32, N0, N1); + DCI.CombineTo(N, Lo, Hi); + return SDValue(N, 0); +} + SDValue AMDGPUTargetLowering::performMulhsCombine(SDNode *N, DAGCombinerInfo &DCI) const { EVT VT = N->getValueType(0); @@ -4103,6 +4149,9 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N, return performTruncateCombine(N, DCI); case ISD::MUL: return performMulCombine(N, DCI); + case ISD::SMUL_LOHI: + case ISD::UMUL_LOHI: + return performMulLoHiCombine(N, DCI); case ISD::MULHS: return performMulhsCombine(N, DCI); case ISD::MULHU: diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h index 03632ac18598..daaca8737c5d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -91,6 +91,7 @@ protected: SDValue performSrlCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performTruncateCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performMulCombine(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue performMulLoHiCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performMulhsCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performMulhuCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performCtlz_CttzCombine(const SDLoc &SL, SDValue Cond, SDValue LHS, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 28cb2fc57ac7..e16bead81b65 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -3880,6 +3880,22 @@ bool AMDGPUInstructionSelector::isDSOffset2Legal(Register Base, int64_t Offset0, return KnownBits->signBitIsZero(Base); } +bool AMDGPUInstructionSelector::isUnneededShiftMask(const MachineInstr &MI, + unsigned ShAmtBits) const { + assert(MI.getOpcode() == TargetOpcode::G_AND); + + Optional<APInt> RHS = getIConstantVRegVal(MI.getOperand(2).getReg(), *MRI); + if (!RHS) + return false; + + if (RHS->countTrailingOnes() >= ShAmtBits) + return true; + + const APInt &LHSKnownZeros = + KnownBits->getKnownZeroes(MI.getOperand(1).getReg()); + return (LHSKnownZeros | *RHS).countTrailingOnes() >= ShAmtBits; +} + InstructionSelector::ComplexRendererFns AMDGPUInstructionSelector::selectMUBUFScratchOffset( MachineOperand &Root) const { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h index b70e6883bae2..26996e42af53 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -300,6 +300,10 @@ private: bool isInlineImmediate64(int64_t Imm) const; bool isInlineImmediate(const APFloat &Imm) const; + // Returns true if TargetOpcode::G_AND MachineInstr `MI`'s masking of the + // shift amount operand's `ShAmtBits` bits is unneeded. + bool isUnneededShiftMask(const MachineInstr &MI, unsigned ShAmtBits) const; + const SIInstrInfo &TII; const SIRegisterInfo &TRI; const AMDGPURegisterBankInfo &RBI; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td index bad9f6265b36..0528b552f475 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -242,25 +242,41 @@ def AMDGPUmul_i24_oneuse : HasOneUseBinOp<AMDGPUmul_i24>; //===----------------------------------------------------------------------===// // Constrained shift PatFrags. + +def csh_mask_16 : PatFrag<(ops node:$src0), (and node:$src0, imm), + [{ return isUnneededShiftMask(N, 4); }]> { + let GISelPredicateCode = [{ return isUnneededShiftMask(MI, 4); }]; + } + +def csh_mask_32 : PatFrag<(ops node:$src0), (and node:$src0, imm), + [{ return isUnneededShiftMask(N, 5); }]> { + let GISelPredicateCode = [{ return isUnneededShiftMask(MI, 5); }]; + } + +def csh_mask_64 : PatFrag<(ops node:$src0), (and node:$src0, imm), + [{ return isUnneededShiftMask(N, 6); }]> { + let GISelPredicateCode = [{ return isUnneededShiftMask(MI, 6); }]; + } + foreach width = [16, 32, 64] in { -defvar mask = !sub(width, 1); +defvar csh_mask = !cast<SDPatternOperator>("csh_mask_"#width); def cshl_#width : PatFrags<(ops node:$src0, node:$src1), - [(shl node:$src0, node:$src1), (shl node:$src0, (and node:$src1, mask))]>; + [(shl node:$src0, node:$src1), (shl node:$src0, (csh_mask node:$src1))]>; defvar cshl = !cast<SDPatternOperator>("cshl_"#width); def cshl_#width#_oneuse : HasOneUseBinOp<cshl>; def clshl_rev_#width : PatFrag <(ops node:$src0, node:$src1), (cshl $src1, $src0)>; def csrl_#width : PatFrags<(ops node:$src0, node:$src1), - [(srl node:$src0, node:$src1), (srl node:$src0, (and node:$src1, mask))]>; + [(srl node:$src0, node:$src1), (srl node:$src0, (csh_mask node:$src1))]>; defvar csrl = !cast<SDPatternOperator>("csrl_"#width); def csrl_#width#_oneuse : HasOneUseBinOp<csrl>; def clshr_rev_#width : PatFrag <(ops node:$src0, node:$src1), (csrl $src1, $src0)>; def csra_#width : PatFrags<(ops node:$src0, node:$src1), - [(sra node:$src0, node:$src1), (sra node:$src0, (and node:$src1, mask))]>; + [(sra node:$src0, node:$src1), (sra node:$src0, (csh_mask node:$src1))]>; defvar csra = !cast<SDPatternOperator>("csra_"#width); def csra_#width#_oneuse : HasOneUseBinOp<csra>; def cashr_rev_#width : PatFrag <(ops node:$src0, node:$src1), @@ -696,11 +712,6 @@ class RcpPat<Instruction RcpInst, ValueType vt> : AMDGPUPat < (RcpInst $src) >; -class RsqPat<Instruction RsqInst, ValueType vt> : AMDGPUPat < - (AMDGPUrcp (fsqrt vt:$src)), - (RsqInst $src) ->; - // Instructions which select to the same v_min_f* def fminnum_like : PatFrags<(ops node:$src0, node:$src1), [(fminnum_ieee node:$src0, node:$src1), diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp index fc984d2dda64..1479933a2850 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp @@ -23,6 +23,7 @@ #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/Target/TargetMachine.h" #define DEBUG_TYPE "amdgpu-postlegalizer-combiner" @@ -58,6 +59,9 @@ public: bool matchUCharToFloat(MachineInstr &MI); void applyUCharToFloat(MachineInstr &MI); + bool matchRcpSqrtToRsq(MachineInstr &MI, + std::function<void(MachineIRBuilder &)> &MatchInfo); + // FIXME: Should be able to have 2 separate matchdatas rather than custom // struct boilerplate. struct CvtF32UByteMatchInfo { @@ -203,6 +207,48 @@ void AMDGPUPostLegalizerCombinerHelper::applyUCharToFloat(MachineInstr &MI) { MI.eraseFromParent(); } +bool AMDGPUPostLegalizerCombinerHelper::matchRcpSqrtToRsq( + MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { + + auto getRcpSrc = [=](const MachineInstr &MI) { + MachineInstr *ResMI = nullptr; + if (MI.getOpcode() == TargetOpcode::G_INTRINSIC && + MI.getIntrinsicID() == Intrinsic::amdgcn_rcp) + ResMI = MRI.getVRegDef(MI.getOperand(2).getReg()); + + return ResMI; + }; + + auto getSqrtSrc = [=](const MachineInstr &MI) { + MachineInstr *SqrtSrcMI = nullptr; + mi_match(MI.getOperand(0).getReg(), MRI, m_GFSqrt(m_MInstr(SqrtSrcMI))); + return SqrtSrcMI; + }; + + MachineInstr *RcpSrcMI = nullptr, *SqrtSrcMI = nullptr; + // rcp(sqrt(x)) + if ((RcpSrcMI = getRcpSrc(MI)) && (SqrtSrcMI = getSqrtSrc(*RcpSrcMI))) { + MatchInfo = [SqrtSrcMI, &MI](MachineIRBuilder &B) { + B.buildIntrinsic(Intrinsic::amdgcn_rsq, {MI.getOperand(0)}, false) + .addUse(SqrtSrcMI->getOperand(0).getReg()) + .setMIFlags(MI.getFlags()); + }; + return true; + } + + // sqrt(rcp(x)) + if ((SqrtSrcMI = getSqrtSrc(MI)) && (RcpSrcMI = getRcpSrc(*SqrtSrcMI))) { + MatchInfo = [RcpSrcMI, &MI](MachineIRBuilder &B) { + B.buildIntrinsic(Intrinsic::amdgcn_rsq, {MI.getOperand(0)}, false) + .addUse(RcpSrcMI->getOperand(0).getReg()) + .setMIFlags(MI.getFlags()); + }; + return true; + } + + return false; +} + bool AMDGPUPostLegalizerCombinerHelper::matchCvtF32UByteN( MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo) { Register SrcReg = MI.getOperand(1).getReg(); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp index d560d2043f42..7c4eb71882c7 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp @@ -280,10 +280,10 @@ bool AMDGPUPrintfRuntimeBindingImpl::lowerPrintfForGpu(Module &M) { } LLVM_DEBUG(dbgs() << "Printf format string in source = " << Str.str() << '\n'); - for (size_t I = 0; I < Str.size(); ++I) { + for (char C : Str) { // Rest of the C escape sequences (e.g. \') are handled correctly // by the MDParser - switch (Str[I]) { + switch (C) { case '\a': Sizes << "\\a"; break; @@ -308,7 +308,7 @@ bool AMDGPUPrintfRuntimeBindingImpl::lowerPrintfForGpu(Module &M) { Sizes << "\\72"; break; default: - Sizes << Str[I]; + Sizes << C; break; } } diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp index d7dc9ee4117b..12b5830ef930 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp @@ -45,6 +45,7 @@ public: TRI(*MF.getSubtarget().getRegisterInfo()), Helper(Helper){}; bool isVgprRegBank(Register Reg); + Register getAsVgpr(Register Reg); struct MinMaxMedOpc { unsigned Min, Max, Med; @@ -69,6 +70,23 @@ bool AMDGPURegBankCombinerHelper::isVgprRegBank(Register Reg) { return RBI.getRegBank(Reg, MRI, TRI)->getID() == AMDGPU::VGPRRegBankID; } +Register AMDGPURegBankCombinerHelper::getAsVgpr(Register Reg) { + if (isVgprRegBank(Reg)) + return Reg; + + // Search for existing copy of Reg to vgpr. + for (MachineInstr &Use : MRI.use_instructions(Reg)) { + Register Def = Use.getOperand(0).getReg(); + if (Use.getOpcode() == AMDGPU::COPY && isVgprRegBank(Def)) + return Def; + } + + // Copy Reg to vgpr. + Register VgprReg = B.buildCopy(MRI.getType(Reg), Reg).getReg(0); + MRI.setRegBank(VgprReg, RBI.getRegBank(AMDGPU::VGPRRegBankID)); + return VgprReg; +} + AMDGPURegBankCombinerHelper::MinMaxMedOpc AMDGPURegBankCombinerHelper::getMinMaxPair(unsigned Opc) { switch (Opc) { @@ -134,7 +152,9 @@ void AMDGPURegBankCombinerHelper::applyMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo) { B.setInstrAndDebugLoc(MI); B.buildInstr(MatchInfo.Opc, {MI.getOperand(0)}, - {MatchInfo.Val0, MatchInfo.Val1, MatchInfo.Val2}, MI.getFlags()); + {getAsVgpr(MatchInfo.Val0), getAsVgpr(MatchInfo.Val1), + getAsVgpr(MatchInfo.Val2)}, + MI.getFlags()); MI.eraseFromParent(); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index ab3ce980c3f6..5988403c0a29 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -3189,10 +3189,10 @@ unsigned AMDGPURegisterBankInfo::getMappingType(const MachineRegisterInfo &MRI, const MachineInstr &MI) const { unsigned RegBank = AMDGPU::InvalidRegBankID; - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - if (!MI.getOperand(i).isReg()) + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isReg()) continue; - Register Reg = MI.getOperand(i).getReg(); + Register Reg = MO.getReg(); if (const RegisterBank *Bank = getRegBank(Reg, MRI, *TRI)) { RegBank = regBankUnion(RegBank, Bank->getID()); if (RegBank == AMDGPU::VGPRRegBankID) @@ -3206,10 +3206,10 @@ unsigned AMDGPURegisterBankInfo::getMappingType(const MachineRegisterInfo &MRI, bool AMDGPURegisterBankInfo::isSALUMapping(const MachineInstr &MI) const { const MachineFunction &MF = *MI.getParent()->getParent(); const MachineRegisterInfo &MRI = MF.getRegInfo(); - for (unsigned i = 0, e = MI.getNumOperands();i != e; ++i) { - if (!MI.getOperand(i).isReg()) + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isReg()) continue; - Register Reg = MI.getOperand(i).getReg(); + Register Reg = MO.getReg(); if (const RegisterBank *Bank = getRegBank(Reg, MRI, *TRI)) { if (Bank->getID() != AMDGPU::SGPRRegBankID) return false; diff --git a/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp b/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp index 1a9255f3240f..712f6dece911 100644 --- a/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp +++ b/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp @@ -706,9 +706,7 @@ bool AMDGPUCFGStructurizer::prepare() { // Remove unconditional branch instr. // Add dummy exit block iff there are multiple returns. - for (SmallVectorImpl<MachineBasicBlock *>::const_iterator - It = OrderedBlks.begin(), E = OrderedBlks.end(); It != E; ++It) { - MachineBasicBlock *MBB = *It; + for (MachineBasicBlock *MBB : OrderedBlks) { removeUnconditionalBranch(MBB); removeRedundantConditionalBranch(MBB); if (isReturnBlock(MBB)) { @@ -851,14 +849,10 @@ bool AMDGPUCFGStructurizer::run() { void AMDGPUCFGStructurizer::orderBlocks(MachineFunction *MF) { int SccNum = 0; - MachineBasicBlock *MBB; for (scc_iterator<MachineFunction *> It = scc_begin(MF); !It.isAtEnd(); ++It, ++SccNum) { const std::vector<MachineBasicBlock *> &SccNext = *It; - for (std::vector<MachineBasicBlock *>::const_iterator - blockIter = SccNext.begin(), blockEnd = SccNext.end(); - blockIter != blockEnd; ++blockIter) { - MBB = *blockIter; + for (MachineBasicBlock *MBB : SccNext) { OrderedBlks.push_back(MBB); recordSccnum(MBB, SccNum); } @@ -1601,11 +1595,8 @@ void AMDGPUCFGStructurizer::addDummyExitBlock( FuncRep->push_back(DummyExitBlk); //insert to function insertInstrEnd(DummyExitBlk, R600::RETURN); - for (SmallVectorImpl<MachineBasicBlock *>::iterator It = RetMBB.begin(), - E = RetMBB.end(); It != E; ++It) { - MachineBasicBlock *MBB = *It; - MachineInstr *MI = getReturnInstr(MBB); - if (MI) + for (MachineBasicBlock *MBB : RetMBB) { + if (MachineInstr *MI = getReturnInstr(MBB)) MI->eraseFromParent(); MBB->addSuccessor(DummyExitBlk); LLVM_DEBUG(dbgs() << "Add dummyExitBlock to BB" << MBB->getNumber() diff --git a/llvm/lib/Target/AMDGPU/CaymanInstructions.td b/llvm/lib/Target/AMDGPU/CaymanInstructions.td index f4ddbf1131c3..d18dab0554bd 100644 --- a/llvm/lib/Target/AMDGPU/CaymanInstructions.td +++ b/llvm/lib/Target/AMDGPU/CaymanInstructions.td @@ -48,8 +48,6 @@ def SIN_cm : SIN_Common<0x8D>; def COS_cm : COS_Common<0x8E>; } // End isVector = 1 -def : RsqPat<RECIPSQRT_IEEE_cm, f32>; - def : SqrtPat<RECIPSQRT_IEEE_cm, RECIP_IEEE_cm>; def : POW_Common <LOG_IEEE_cm, EXP_IEEE_cm, MUL>; diff --git a/llvm/lib/Target/AMDGPU/EvergreenInstructions.td b/llvm/lib/Target/AMDGPU/EvergreenInstructions.td index 12224cb3f797..a9a3421e8192 100644 --- a/llvm/lib/Target/AMDGPU/EvergreenInstructions.td +++ b/llvm/lib/Target/AMDGPU/EvergreenInstructions.td @@ -126,7 +126,6 @@ def EXP_IEEE_eg : EXP_IEEE_Common<0x81>; def LOG_IEEE_eg : LOG_IEEE_Common<0x83>; def RECIP_CLAMPED_eg : RECIP_CLAMPED_Common<0x84>; def RECIPSQRT_IEEE_eg : RECIPSQRT_IEEE_Common<0x89>; -def : RsqPat<RECIPSQRT_IEEE_eg, f32>; def : SqrtPat<RECIPSQRT_IEEE_eg, RECIP_IEEE_eg>; def SIN_eg : SIN_Common<0x8D>; diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index ff5d0b0af6a4..0f8dd0b3bf58 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -1442,12 +1442,10 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) { bool FullReg; const MachineInstr *MI1; - auto IsOverlappedDGEMMorXDLFn = [Reg, &IsMFMAFn, &FullReg, &MI1, - this](const MachineInstr &MI) { + auto IsOverlappedMFMAFn = [Reg, &IsMFMAFn, &FullReg, &MI1, + this](const MachineInstr &MI) { if (!IsMFMAFn(MI)) return false; - if (!isDGEMM(MI.getOpcode()) && !isXDL(ST, MI)) - return false; Register DstReg = MI.getOperand(0).getReg(); FullReg = (DstReg == Reg); MI1 = &MI; @@ -1458,8 +1456,8 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) { getWaitStatesSinceDef(Reg, IsLegacyVALUNotDotFn, MaxWaitStates); WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); - int NumWaitStates = getWaitStatesSinceDef(Reg, IsOverlappedDGEMMorXDLFn, - MaxWaitStates); + int NumWaitStates = + getWaitStatesSinceDef(Reg, IsOverlappedMFMAFn, MaxWaitStates); if (NumWaitStates == std::numeric_limits<int>::max()) continue; @@ -1619,12 +1617,9 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) { const MachineInstr *MFMA = nullptr; unsigned Reg; - auto IsDGEMMorXDLWriteFn = [&Reg, &IsMFMAFn, &MFMA, - this](const MachineInstr &MI) { + auto IsMFMAWriteFn = [&Reg, &IsMFMAFn, &MFMA, this](const MachineInstr &MI) { if (!IsMFMAFn(MI) || !TRI.regsOverlap(MI.getOperand(0).getReg(), Reg)) return false; - if (!isDGEMM(MI.getOpcode()) && !isXDL(ST, MI)) - return false; MFMA = &MI; return true; }; @@ -1675,8 +1670,8 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) { } MFMA = nullptr; - WaitStatesSinceDef = getWaitStatesSinceDef(Reg, IsDGEMMorXDLWriteFn, - MaxWaitStates); + WaitStatesSinceDef = + getWaitStatesSinceDef(Reg, IsMFMAWriteFn, MaxWaitStates); if (!MFMA) continue; @@ -1750,8 +1745,8 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) { WaitStatesSinceDef); MFMA = nullptr; - WaitStatesSinceDef = getWaitStatesSinceDef(Reg, IsDGEMMorXDLWriteFn, - MaxWaitStates); + WaitStatesSinceDef = + getWaitStatesSinceDef(Reg, IsMFMAWriteFn, MaxWaitStates); if (MFMA) { int NeedWaitStates = MaxWaitStates; switch (TSchedModel.computeInstrLatency(MFMA)) { diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp index 3456f9a6156c..82c09378acac 100644 --- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp +++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp @@ -74,11 +74,11 @@ unsigned GCNRegPressure::getRegKind(Register Reg, assert(Reg.isVirtual()); const auto RC = MRI.getRegClass(Reg); auto STI = static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo()); - return STI->isSGPRClass(RC) ? - (STI->getRegSizeInBits(*RC) == 32 ? SGPR32 : SGPR_TUPLE) : - STI->hasAGPRs(RC) ? - (STI->getRegSizeInBits(*RC) == 32 ? AGPR32 : AGPR_TUPLE) : - (STI->getRegSizeInBits(*RC) == 32 ? VGPR32 : VGPR_TUPLE); + return STI->isSGPRClass(RC) + ? (STI->getRegSizeInBits(*RC) == 32 ? SGPR32 : SGPR_TUPLE) + : STI->isAGPRClass(RC) + ? (STI->getRegSizeInBits(*RC) == 32 ? AGPR32 : AGPR_TUPLE) + : (STI->getRegSizeInBits(*RC) == 32 ? VGPR32 : VGPR_TUPLE); } void GCNRegPressure::inc(unsigned Reg, diff --git a/llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp b/llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp index 1d93165f9eec..715fd69fc7ae 100644 --- a/llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp +++ b/llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp @@ -177,9 +177,7 @@ bool R600ClauseMergePass::runOnMachineFunction(MachineFunction &MF) { const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>(); TII = ST.getInstrInfo(); - for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); - BB != BB_E; ++BB) { - MachineBasicBlock &MBB = *BB; + for (MachineBasicBlock &MBB : MF) { MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); MachineBasicBlock::iterator LatestCFAlu = E; while (I != E) { diff --git a/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp b/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp index d5eaa33ef964..b9ca7f928d56 100644 --- a/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp +++ b/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp @@ -301,9 +301,7 @@ public: const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>(); TII = ST.getInstrInfo(); - for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); - BB != BB_E; ++BB) { - MachineBasicBlock &MBB = *BB; + for (MachineBasicBlock &MBB : MF) { MachineBasicBlock::iterator I = MBB.begin(); if (I != MBB.end() && I->getOpcode() == R600::CF_ALU) continue; // BB was already parsed diff --git a/llvm/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp b/llvm/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp index 838a497b4df1..194879fef53c 100644 --- a/llvm/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp +++ b/llvm/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp @@ -73,9 +73,7 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) { const R600RegisterInfo &TRI = TII->getRegisterInfo(); - for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); - BB != BB_E; ++BB) { - MachineBasicBlock &MBB = *BB; + for (MachineBasicBlock &MBB : MF) { MachineBasicBlock::iterator I = MBB.begin(); while (I != MBB.end()) { MachineInstr &MI = *I; diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp index 0215eb9f9fea..bd757e9e3d70 100644 --- a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -285,9 +285,8 @@ R600TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, NewMI = BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::getLDSNoRetOp(MI.getOpcode()))); - for (unsigned i = 1, e = MI.getNumOperands(); i < e; ++i) { - NewMI.add(MI.getOperand(i)); - } + for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) + NewMI.add(MO); } else { return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); } diff --git a/llvm/lib/Target/AMDGPU/R600Instructions.td b/llvm/lib/Target/AMDGPU/R600Instructions.td index 4487864888b6..b3da2fdefacc 100644 --- a/llvm/lib/Target/AMDGPU/R600Instructions.td +++ b/llvm/lib/Target/AMDGPU/R600Instructions.td @@ -1265,7 +1265,6 @@ let Predicates = [isR600] in { defm DIV_r600 : DIV_Common<RECIP_IEEE_r600>; def : POW_Common <LOG_IEEE_r600, EXP_IEEE_r600, MUL>; - def : RsqPat<RECIPSQRT_IEEE_r600, f32>; def : SqrtPat<RECIPSQRT_IEEE_r600, RECIP_IEEE_r600>; def R600_ExportSwz : ExportSwzInst { diff --git a/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp b/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp index 36acfafa72aa..6aee2f591b56 100644 --- a/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp +++ b/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp @@ -124,11 +124,9 @@ SUnit* R600SchedStrategy::pickNode(bool &IsTopNode) { DAG->dumpNode(*SU); } else { dbgs() << "NO NODE \n"; - for (unsigned i = 0; i < DAG->SUnits.size(); i++) { - const SUnit &S = DAG->SUnits[i]; + for (const SUnit &S : DAG->SUnits) if (!S.isScheduled) DAG->dumpNode(S); - } }); return SU; diff --git a/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp b/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp index 1a723279dc9f..72cf48c04e7f 100644 --- a/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp +++ b/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp @@ -323,14 +323,12 @@ bool R600VectorRegMerger::runOnMachineFunction(MachineFunction &Fn) { TII = ST.getInstrInfo(); MRI = &Fn.getRegInfo(); - for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); - MBB != MBBe; ++MBB) { - MachineBasicBlock *MB = &*MBB; + for (MachineBasicBlock &MB : Fn) { PreviousRegSeq.clear(); PreviousRegSeqByReg.clear(); PreviousRegSeqByUndefCount.clear(); - for (MachineBasicBlock::iterator MII = MB->begin(), MIIE = MB->end(); + for (MachineBasicBlock::iterator MII = MB.begin(), MIIE = MB.end(); MII != MIIE; ++MII) { MachineInstr &MI = *MII; if (MI.getOpcode() != R600::REG_SEQUENCE) { diff --git a/llvm/lib/Target/AMDGPU/R600Packetizer.cpp b/llvm/lib/Target/AMDGPU/R600Packetizer.cpp index e858bba2983c..beb0aad86e89 100644 --- a/llvm/lib/Target/AMDGPU/R600Packetizer.cpp +++ b/llvm/lib/Target/AMDGPU/R600Packetizer.cpp @@ -343,20 +343,11 @@ bool R600Packetizer::runOnMachineFunction(MachineFunction &Fn) { // dependence between Insn 0 and Insn 2. This can lead to incorrect // packetization // - for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); - MBB != MBBe; ++MBB) { - MachineBasicBlock::iterator End = MBB->end(); - MachineBasicBlock::iterator MI = MBB->begin(); - while (MI != End) { - if (MI->isKill() || MI->getOpcode() == R600::IMPLICIT_DEF || - (MI->getOpcode() == R600::CF_ALU && !MI->getOperand(8).getImm())) { - MachineBasicBlock::iterator DeleteMI = MI; - ++MI; - MBB->erase(DeleteMI); - End = MBB->end(); - continue; - } - ++MI; + for (MachineBasicBlock &MBB : Fn) { + for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) { + if (MI.isKill() || MI.getOpcode() == R600::IMPLICIT_DEF || + (MI.getOpcode() == R600::CF_ALU && !MI.getOperand(8).getImm())) + MBB.erase(MI); } } diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h index 777744f08cde..580e4bc417a4 100644 --- a/llvm/lib/Target/AMDGPU/SIDefines.h +++ b/llvm/lib/Target/AMDGPU/SIDefines.h @@ -18,7 +18,8 @@ namespace llvm { enum SIRCFlags : uint8_t { // For vector registers. HasVGPR = 1 << 0, - HasAGPR = 1 << 1 + HasAGPR = 1 << 1, + HasSGPR = 1 << 2 }; // enum SIRCFlags namespace SIInstrFlags { diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp index cf93a63f26a0..f54778535b7c 100644 --- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp +++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp @@ -127,11 +127,11 @@ FunctionPass *llvm::createSIFixSGPRCopiesPass() { static bool hasVectorOperands(const MachineInstr &MI, const SIRegisterInfo *TRI) { const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - if (!MI.getOperand(i).isReg() || !MI.getOperand(i).getReg().isVirtual()) + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isReg() || !MO.getReg().isVirtual()) continue; - if (TRI->hasVectorRegisters(MRI.getRegClass(MI.getOperand(i).getReg()))) + if (TRI->hasVectorRegisters(MRI.getRegClass(MO.getReg()))) return true; } return false; @@ -259,7 +259,7 @@ static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI, // VGPRz = REG_SEQUENCE VGPRx, sub0 MI.getOperand(0).setReg(CopyUse.getOperand(0).getReg()); - bool IsAGPR = TRI->hasAGPRs(DstRC); + bool IsAGPR = TRI->isAGPRClass(DstRC); for (unsigned I = 1, N = MI.getNumOperands(); I != N; I += 2) { Register SrcReg = MI.getOperand(I).getReg(); @@ -853,7 +853,7 @@ MachineBasicBlock *SIFixSGPRCopies::processPHINode(MachineInstr &MI) { Register PHIRes = MI.getOperand(0).getReg(); const TargetRegisterClass *RC0 = MRI->getRegClass(PHIRes); - if (AllAGPRUses && numVGPRUses && !TRI->hasAGPRs(RC0)) { + if (AllAGPRUses && numVGPRUses && !TRI->isAGPRClass(RC0)) { LLVM_DEBUG(dbgs() << "Moving PHI to AGPR: " << MI); MRI->setRegClass(PHIRes, TRI->getEquivalentAGPRClass(RC0)); for (unsigned I = 1, N = MI.getNumOperands(); I != N; I += 2) { diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index a3a0e9c9b9ac..200e00ee5521 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -1586,17 +1586,9 @@ bool SIFoldOperands::tryFoldRegSequence(MachineInstr &MI) { unsigned OpIdx = Op - &UseMI->getOperand(0); const MCInstrDesc &InstDesc = UseMI->getDesc(); - const MCOperandInfo &OpInfo = InstDesc.OpInfo[OpIdx]; - switch (OpInfo.RegClass) { - case AMDGPU::AV_32RegClassID: LLVM_FALLTHROUGH; - case AMDGPU::AV_64RegClassID: LLVM_FALLTHROUGH; - case AMDGPU::AV_96RegClassID: LLVM_FALLTHROUGH; - case AMDGPU::AV_128RegClassID: LLVM_FALLTHROUGH; - case AMDGPU::AV_160RegClassID: - break; - default: + if (!TRI->isVectorSuperClass( + TRI->getRegClass(InstDesc.OpInfo[OpIdx].RegClass))) return false; - } const auto *NewDstRC = TRI->getEquivalentAGPRClass(MRI->getRegClass(Reg)); auto Dst = MRI->createVirtualRegister(NewDstRC); diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp index 882b9a203755..4706c74be721 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -1364,6 +1364,34 @@ bool SIFrameLowering::assignCalleeSavedSpillSlots( return false; } +bool SIFrameLowering::allocateScavengingFrameIndexesNearIncomingSP( + const MachineFunction &MF) const { + + const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); + const MachineFrameInfo &MFI = MF.getFrameInfo(); + uint64_t EstStackSize = MFI.estimateStackSize(MF); + uint64_t MaxOffset = EstStackSize - 1; + + // We need the emergency stack slots to be allocated in range of the + // MUBUF/flat scratch immediate offset from the base register, so assign these + // first at the incoming SP position. + // + // TODO: We could try sorting the objects to find a hole in the first bytes + // rather than allocating as close to possible. This could save a lot of space + // on frames with alignment requirements. + if (ST.enableFlatScratch()) { + const SIInstrInfo *TII = ST.getInstrInfo(); + if (TII->isLegalFLATOffset(MaxOffset, AMDGPUAS::PRIVATE_ADDRESS, + SIInstrFlags::FlatScratch)) + return false; + } else { + if (SIInstrInfo::isLegalMUBUFImmOffset(MaxOffset)) + return false; + } + + return true; +} + MachineBasicBlock::iterator SIFrameLowering::eliminateCallFramePseudoInstr( MachineFunction &MF, MachineBasicBlock &MBB, diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.h b/llvm/lib/Target/AMDGPU/SIFrameLowering.h index 951ea79b2809..56fbb875ffd9 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.h +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.h @@ -43,6 +43,9 @@ public: const TargetRegisterInfo *TRI, std::vector<CalleeSavedInfo> &CSI) const override; + bool allocateScavengingFrameIndexesNearIncomingSP( + const MachineFunction &MF) const override; + bool isSupportedStackID(TargetStackID::Value ID) const override; void processFunctionBeforeFrameFinalized( diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 519c5b936536..35b72f5d201b 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -809,6 +809,11 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, setOperationAction(ISD::SMULO, MVT::i64, Custom); setOperationAction(ISD::UMULO, MVT::i64, Custom); + if (Subtarget->hasMad64_32()) { + setOperationAction(ISD::SMUL_LOHI, MVT::i32, Custom); + setOperationAction(ISD::UMUL_LOHI, MVT::i32, Custom); + } + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f32, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom); @@ -919,6 +924,16 @@ bool SITargetLowering::isFPExtFoldable(const SelectionDAG &DAG, unsigned Opcode, !hasFP32Denormals(DAG.getMachineFunction()); } +bool SITargetLowering::isFPExtFoldable(const MachineInstr &MI, unsigned Opcode, + LLT DestTy, LLT SrcTy) const { + return ((Opcode == TargetOpcode::G_FMAD && Subtarget->hasMadMixInsts()) || + (Opcode == TargetOpcode::G_FMA && Subtarget->hasFmaMixInsts())) && + DestTy.getScalarSizeInBits() == 32 && + SrcTy.getScalarSizeInBits() == 16 && + // TODO: This probably only requires no input flushing? + !hasFP32Denormals(*MI.getMF()); +} + bool SITargetLowering::isShuffleMaskLegal(ArrayRef<int>, EVT) const { // SI has some legal vector types, but no legal vector operations. Say no // shuffles are legal in order to prefer scalarizing some vector operations. @@ -4290,8 +4305,8 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter( MachineInstrBuilder MIB; MIB = BuildMI(*BB, MI, DL, TII->get(AMDGPU::SI_CALL), ReturnAddrReg); - for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) - MIB.add(MI.getOperand(I)); + for (const MachineOperand &MO : MI.operands()) + MIB.add(MO); MIB.cloneMemRefs(MI); MI.eraseFromParent(); @@ -4457,6 +4472,8 @@ bool SITargetLowering::enableAggressiveFMAFusion(EVT VT) const { return true; } +bool SITargetLowering::enableAggressiveFMAFusion(LLT Ty) const { return true; } + EVT SITargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &Ctx, EVT VT) const { if (!VT.isVector()) { @@ -4522,6 +4539,34 @@ bool SITargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, return false; } +bool SITargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, + LLT Ty) const { + switch (Ty.getScalarSizeInBits()) { + case 16: + return isFMAFasterThanFMulAndFAdd(MF, MVT::f16); + case 32: + return isFMAFasterThanFMulAndFAdd(MF, MVT::f32); + case 64: + return isFMAFasterThanFMulAndFAdd(MF, MVT::f64); + default: + break; + } + + return false; +} + +bool SITargetLowering::isFMADLegal(const MachineInstr &MI, LLT Ty) const { + if (!Ty.isScalar()) + return false; + + if (Ty.getScalarSizeInBits() == 16) + return Subtarget->hasMadF16() && !hasFP64FP16Denormals(*MI.getMF()); + if (Ty.getScalarSizeInBits() == 32) + return Subtarget->hasMadMacF32Insts() && !hasFP32Denormals(*MI.getMF()); + + return false; +} + bool SITargetLowering::isFMADLegal(const SelectionDAG &DAG, const SDNode *N) const { // TODO: Check future ftz flag @@ -4691,6 +4736,9 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SMULO: case ISD::UMULO: return lowerXMULO(Op, DAG); + case ISD::SMUL_LOHI: + case ISD::UMUL_LOHI: + return lowerXMUL_LOHI(Op, DAG); case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); } @@ -5304,6 +5352,21 @@ SDValue SITargetLowering::lowerXMULO(SDValue Op, SelectionDAG &DAG) const { return DAG.getMergeValues({ Result, Overflow }, SL); } +SDValue SITargetLowering::lowerXMUL_LOHI(SDValue Op, SelectionDAG &DAG) const { + if (Op->isDivergent()) { + // Select to V_MAD_[IU]64_[IU]32. + return Op; + } + if (Subtarget->hasSMulHi()) { + // Expand to S_MUL_I32 + S_MUL_HI_[IU]32. + return SDValue(); + } + // The multiply is uniform but we would have to use V_MUL_HI_[IU]32 to + // calculate the high part, so we might as well do the whole thing with + // V_MAD_[IU]64_[IU]32. + return Op; +} + SDValue SITargetLowering::lowerTRAP(SDValue Op, SelectionDAG &DAG) const { if (!Subtarget->isTrapHandlerEnabled() || Subtarget->getTrapHandlerAbi() != GCNSubtarget::TrapHandlerAbi::AMDHSA) @@ -9790,10 +9853,9 @@ bool SITargetLowering::isCanonicalized(Register Reg, MachineFunction &MF, if (Subtarget->supportsMinMaxDenormModes() || denormalsEnabledForType(MRI.getType(Reg), MF)) return true; - for (unsigned I = 1, E = MI->getNumOperands(); I != E; ++I) { - if (!isCanonicalized(MI->getOperand(I).getReg(), MF, MaxDepth - 1)) + for (const MachineOperand &MO : llvm::drop_begin(MI->operands())) + if (!isCanonicalized(MO.getReg(), MF, MaxDepth - 1)) return false; - } return true; } default: @@ -11460,15 +11522,15 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI, if (I == -1) break; MachineOperand &Op = MI.getOperand(I); - if ((OpInfo[I].RegClass != llvm::AMDGPU::AV_64RegClassID && - OpInfo[I].RegClass != llvm::AMDGPU::AV_32RegClassID) || - !Op.getReg().isVirtual() || !TRI->isAGPR(MRI, Op.getReg())) + if (!Op.isReg() || !Op.getReg().isVirtual()) + continue; + auto *RC = TRI->getRegClassForReg(MRI, Op.getReg()); + if (!TRI->hasAGPRs(RC)) continue; auto *Src = MRI.getUniqueVRegDef(Op.getReg()); if (!Src || !Src->isCopy() || !TRI->isSGPRReg(MRI, Src->getOperand(1).getReg())) continue; - auto *RC = TRI->getRegClassForReg(MRI, Op.getReg()); auto *NewRC = TRI->getEquivalentVGPRClass(RC); // All uses of agpr64 and agpr32 can also accept vgpr except for // v_accvgpr_read, but we do not produce agpr reads during selection, diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h index 1e48c96ad3c8..1315cc15dd02 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -135,6 +135,7 @@ private: SDValue lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; SDValue lowerFMINNUM_FMAXNUM(SDValue Op, SelectionDAG &DAG) const; SDValue lowerXMULO(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerXMUL_LOHI(SDValue Op, SelectionDAG &DAG) const; SDValue getSegmentAperture(unsigned AS, const SDLoc &DL, SelectionDAG &DAG) const; @@ -252,6 +253,9 @@ public: bool isFPExtFoldable(const SelectionDAG &DAG, unsigned Opcode, EVT DestVT, EVT SrcVT) const override; + bool isFPExtFoldable(const MachineInstr &MI, unsigned Opcode, LLT DestTy, + LLT SrcTy) const override; + bool isShuffleMaskLegal(ArrayRef<int> /*Mask*/, EVT /*VT*/) const override; bool getTgtMemIntrinsic(IntrinsicInfo &, const CallInst &, @@ -377,6 +381,7 @@ public: bool hasBitPreservingFPLogic(EVT VT) const override; bool enableAggressiveFMAFusion(EVT VT) const override; + bool enableAggressiveFMAFusion(LLT Ty) const override; EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override; MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override; @@ -384,7 +389,10 @@ public: bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override; + bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, + const LLT Ty) const override; bool isFMADLegal(const SelectionDAG &DAG, const SDNode *N) const override; + bool isFMADLegal(const MachineInstr &MI, const LLT Ty) const override; SDValue splitUnaryVectorOp(SDValue Op, SelectionDAG &DAG) const; SDValue splitBinaryVectorOp(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index f4e5771d2a2a..c9d9dd1fb82c 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -150,6 +150,8 @@ enum VmemType { VMEM_NOSAMPLER, // MIMG instructions with a sampler. VMEM_SAMPLER, + // BVH instructions + VMEM_BVH }; VmemType getVmemType(const MachineInstr &Inst) { @@ -157,9 +159,10 @@ VmemType getVmemType(const MachineInstr &Inst) { if (!SIInstrInfo::isMIMG(Inst)) return VMEM_NOSAMPLER; const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Inst.getOpcode()); - return AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode)->Sampler - ? VMEM_SAMPLER - : VMEM_NOSAMPLER; + const AMDGPU::MIMGBaseOpcodeInfo *BaseInfo = + AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); + return BaseInfo->BVH ? VMEM_BVH + : BaseInfo->Sampler ? VMEM_SAMPLER : VMEM_NOSAMPLER; } void addWait(AMDGPU::Waitcnt &Wait, InstCounterType T, unsigned Count) { diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 4a928123b68f..92f5322b8ad2 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -898,10 +898,10 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, unsigned EltSize = 4; unsigned Opcode = AMDGPU::V_MOV_B32_e32; - if (RI.hasAGPRs(RC)) { + if (RI.isAGPRClass(RC)) { Opcode = (RI.hasVGPRs(SrcRC)) ? AMDGPU::V_ACCVGPR_WRITE_B32_e64 : AMDGPU::INSTRUCTION_LIST_END; - } else if (RI.hasVGPRs(RC) && RI.hasAGPRs(SrcRC)) { + } else if (RI.hasVGPRs(RC) && RI.isAGPRClass(SrcRC)) { Opcode = AMDGPU::V_ACCVGPR_READ_B32_e64; } else if ((Size % 64 == 0) && RI.hasVGPRs(RC) && (RI.isProperlyAlignedRC(*RC) && @@ -1205,7 +1205,7 @@ Register SIInstrInfo::insertNE(MachineBasicBlock *MBB, unsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass *DstRC) const { - if (RI.hasAGPRs(DstRC)) + if (RI.isAGPRClass(DstRC)) return AMDGPU::COPY; if (RI.getRegSizeInBits(*DstRC) == 32) { return RI.isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32; @@ -1435,6 +1435,7 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, FrameInfo.getObjectAlign(FrameIndex)); unsigned SpillSize = TRI->getSpillSize(*RC); + MachineRegisterInfo &MRI = MF->getRegInfo(); if (RI.isSGPRClass(RC)) { MFI->setHasSpilledSGPRs(); assert(SrcReg != AMDGPU::M0 && "m0 should not be spilled"); @@ -1448,7 +1449,6 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, // The SGPR spill/restore instructions only work on number sgprs, so we need // to make sure we are using the correct register class. if (SrcReg.isVirtual() && SpillSize == 4) { - MachineRegisterInfo &MRI = MF->getRegInfo(); MRI.constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0_XEXECRegClass); } @@ -1463,10 +1463,21 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, return; } - unsigned Opcode = RI.hasAGPRs(RC) ? getAGPRSpillSaveOpcode(SpillSize) - : getVGPRSpillSaveOpcode(SpillSize); + unsigned Opcode = RI.isAGPRClass(RC) ? getAGPRSpillSaveOpcode(SpillSize) + : getVGPRSpillSaveOpcode(SpillSize); MFI->setHasSpilledVGPRs(); + if (RI.isVectorSuperClass(RC)) { + // Convert an AV spill into a VGPR spill. Introduce a copy from AV to an + // equivalent VGPR register beforehand. Regalloc might want to introduce + // AV spills only to be relevant until rewriter at which they become + // either spills of VGPRs or AGPRs. + Register TmpVReg = MRI.createVirtualRegister(RI.getEquivalentVGPRClass(RC)); + BuildMI(MBB, MI, DL, get(TargetOpcode::COPY), TmpVReg) + .addReg(SrcReg, RegState::Kill); + SrcReg = TmpVReg; + } + BuildMI(MBB, MI, DL, get(Opcode)) .addReg(SrcReg, getKillRegState(isKill)) // data .addFrameIndex(FrameIndex) // addr @@ -1598,13 +1609,26 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, return; } - unsigned Opcode = RI.hasAGPRs(RC) ? getAGPRSpillRestoreOpcode(SpillSize) - : getVGPRSpillRestoreOpcode(SpillSize); + unsigned Opcode = RI.isAGPRClass(RC) ? getAGPRSpillRestoreOpcode(SpillSize) + : getVGPRSpillRestoreOpcode(SpillSize); + + bool IsVectorSuperClass = RI.isVectorSuperClass(RC); + Register TmpReg = DestReg; + if (IsVectorSuperClass) { + // For AV classes, insert the spill restore to a VGPR followed by a copy + // into an equivalent AV register. + MachineRegisterInfo &MRI = MF->getRegInfo(); + DestReg = MRI.createVirtualRegister(RI.getEquivalentVGPRClass(RC)); + } BuildMI(MBB, MI, DL, get(Opcode), DestReg) .addFrameIndex(FrameIndex) // vaddr .addReg(MFI->getStackPtrOffsetReg()) // scratch_offset .addImm(0) // offset .addMemOperand(MMO); + + if (IsVectorSuperClass) + BuildMI(MBB, MI, DL, get(TargetOpcode::COPY), TmpReg) + .addReg(DestReg, RegState::Kill); } void SIInstrInfo::insertNoop(MachineBasicBlock &MBB, @@ -2802,12 +2826,11 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, } if (Is16Bit) { - if (isVGPRCopy) - return false; // Do not clobber vgpr_hi16 + if (isVGPRCopy) + return false; // Do not clobber vgpr_hi16 - if (DstReg.isVirtual() && - UseMI.getOperand(0).getSubReg() != AMDGPU::lo16) - return false; + if (DstReg.isVirtual() && UseMI.getOperand(0).getSubReg() != AMDGPU::lo16) + return false; UseMI.getOperand(0).setSubReg(0); if (DstReg.isPhysical()) { @@ -3896,9 +3919,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI, // verification is broken anyway if (ST.needsAlignedVGPRs()) { const TargetRegisterClass *RC = RI.getRegClassForReg(MRI, Reg); - const bool IsVGPR = RI.hasVGPRs(RC); - const bool IsAGPR = !IsVGPR && RI.hasAGPRs(RC); - if ((IsVGPR || IsAGPR) && MO.getSubReg()) { + if (RI.hasVectorRegisters(RC) && MO.getSubReg()) { const TargetRegisterClass *SubRC = RI.getSubRegClass(RC, MO.getSubReg()); RC = RI.getCompatibleSubRegClass(RC, SubRC, MO.getSubReg()); @@ -5522,13 +5543,13 @@ SIInstrInfo::legalizeOperands(MachineInstr &MI, if (getOpRegClass(MI, 0) == &AMDGPU::VReg_1RegClass) { VRC = &AMDGPU::VReg_1RegClass; } else - VRC = RI.hasAGPRs(getOpRegClass(MI, 0)) + VRC = RI.isAGPRClass(getOpRegClass(MI, 0)) ? RI.getEquivalentAGPRClass(SRC) : RI.getEquivalentVGPRClass(SRC); } else { - VRC = RI.hasAGPRs(getOpRegClass(MI, 0)) - ? RI.getEquivalentAGPRClass(VRC) - : RI.getEquivalentVGPRClass(VRC); + VRC = RI.isAGPRClass(getOpRegClass(MI, 0)) + ? RI.getEquivalentAGPRClass(VRC) + : RI.getEquivalentVGPRClass(VRC); } RC = VRC; } else { @@ -7065,8 +7086,8 @@ const TargetRegisterClass *SIInstrInfo::getDestEquivalentVGPRClass( case AMDGPU::STRICT_WWM: case AMDGPU::STRICT_WQM: { const TargetRegisterClass *SrcRC = getOpRegClass(Inst, 1); - if (RI.hasAGPRs(SrcRC)) { - if (RI.hasAGPRs(NewDstRC)) + if (RI.isAGPRClass(SrcRC)) { + if (RI.isAGPRClass(NewDstRC)) return nullptr; switch (Inst.getOpcode()) { @@ -7082,7 +7103,7 @@ const TargetRegisterClass *SIInstrInfo::getDestEquivalentVGPRClass( if (!NewDstRC) return nullptr; } else { - if (RI.hasVGPRs(NewDstRC) || NewDstRC == &AMDGPU::VReg_1RegClass) + if (RI.isVGPRClass(NewDstRC) || NewDstRC == &AMDGPU::VReg_1RegClass) return nullptr; NewDstRC = RI.getEquivalentVGPRClass(NewDstRC); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 8c24268e379e..47ee83eb9351 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -2246,7 +2246,7 @@ class VOP_NO_EXT <VOPProfile p> : VOPProfile <p.ArgVT> { let HasExtSDWA9 = 0; } -class VOP_PAT_GEN <VOPProfile p, int mode=PatGenMode.Pattern> : VOPProfile <p.ArgVT> { +class VOP_PAT_GEN <VOPProfile p, int mode=PatGenMode.NoPattern> : VOPProfile <p.ArgVT> { let NeedPatGen = mode; } diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index d5f9cb8ba493..d55d8da8699a 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -827,10 +827,6 @@ def : Pat < let OtherPredicates = [UnsafeFPMath] in { -//defm : RsqPat<V_RSQ_F32_e32, f32>; - -def : RsqPat<V_RSQ_F32_e32, f32>; - // Convert (x - floor(x)) to fract(x) def : GCNPat < (f32 (fsub (f32 (VOP3Mods f32:$x, i32:$mods)), @@ -1372,61 +1368,48 @@ def : GCNPat < >; } + /********** ================================ **********/ /********** Floating point absolute/negative **********/ /********** ================================ **********/ -// Prevent expanding both fneg and fabs. -// TODO: Add IgnoredBySelectionDAG bit? -let AddedComplexity = 1 in { // Prefer SALU to VALU patterns for DAG - def : GCNPat < - (fneg (fabs (f32 SReg_32:$src))), + (UniformUnaryFrag<fneg> (fabs (f32 SReg_32:$src))), (S_OR_B32 SReg_32:$src, (S_MOV_B32 (i32 0x80000000))) // Set sign bit >; def : GCNPat < - (fabs (f32 SReg_32:$src)), + (UniformUnaryFrag<fabs> (f32 SReg_32:$src)), (S_AND_B32 SReg_32:$src, (S_MOV_B32 (i32 0x7fffffff))) >; def : GCNPat < - (fneg (f32 SReg_32:$src)), + (UniformUnaryFrag<fneg> (f32 SReg_32:$src)), (S_XOR_B32 SReg_32:$src, (S_MOV_B32 (i32 0x80000000))) >; def : GCNPat < - (fneg (f16 SReg_32:$src)), + (UniformUnaryFrag<fneg> (f16 SReg_32:$src)), (S_XOR_B32 SReg_32:$src, (S_MOV_B32 (i32 0x00008000))) >; def : GCNPat < - (fneg (f16 VGPR_32:$src)), - (V_XOR_B32_e32 (S_MOV_B32 (i32 0x00008000)), VGPR_32:$src) ->; - -def : GCNPat < - (fabs (f16 SReg_32:$src)), + (UniformUnaryFrag<fabs> (f16 SReg_32:$src)), (S_AND_B32 SReg_32:$src, (S_MOV_B32 (i32 0x00007fff))) >; def : GCNPat < - (fneg (fabs (f16 SReg_32:$src))), + (UniformUnaryFrag<fneg> (fabs (f16 SReg_32:$src))), (S_OR_B32 SReg_32:$src, (S_MOV_B32 (i32 0x00008000))) // Set sign bit >; def : GCNPat < - (fneg (fabs (f16 VGPR_32:$src))), - (V_OR_B32_e32 (S_MOV_B32 (i32 0x00008000)), VGPR_32:$src) // Set sign bit ->; - -def : GCNPat < - (fneg (v2f16 SReg_32:$src)), + (UniformUnaryFrag<fneg> (v2f16 SReg_32:$src)), (S_XOR_B32 SReg_32:$src, (S_MOV_B32 (i32 0x80008000))) >; def : GCNPat < - (fabs (v2f16 SReg_32:$src)), + (UniformUnaryFrag<fabs> (v2f16 SReg_32:$src)), (S_AND_B32 SReg_32:$src, (S_MOV_B32 (i32 0x7fff7fff))) >; @@ -1435,51 +1418,20 @@ def : GCNPat < // fabs is not reported as free because there is modifier for it in // VOP3P instructions, so it is turned into the bit op. def : GCNPat < - (fneg (v2f16 (bitconvert (and_oneuse (i32 SReg_32:$src), 0x7fff7fff)))), + (UniformUnaryFrag<fneg> (v2f16 (bitconvert (and_oneuse (i32 SReg_32:$src), 0x7fff7fff)))), (S_OR_B32 SReg_32:$src, (S_MOV_B32 (i32 0x80008000))) // Set sign bit >; def : GCNPat < - (fneg (v2f16 (fabs SReg_32:$src))), + (UniformUnaryFrag<fneg> (v2f16 (fabs SReg_32:$src))), (S_OR_B32 SReg_32:$src, (S_MOV_B32 (i32 0x80008000))) // Set sign bit >; -// FIXME: The implicit-def of scc from S_[X]OR/AND_B32 is mishandled - // def : GCNPat < -// (fneg (f64 SReg_64:$src)), -// (REG_SEQUENCE SReg_64, -// (i32 (EXTRACT_SUBREG SReg_64:$src, sub0)), -// sub0, -// (S_XOR_B32 (i32 (EXTRACT_SUBREG SReg_64:$src, sub1)), -// (i32 (S_MOV_B32 (i32 0x80000000)))), -// sub1) -// >; - -// def : GCNPat < -// (fneg (fabs (f64 SReg_64:$src))), -// (REG_SEQUENCE SReg_64, -// (i32 (EXTRACT_SUBREG SReg_64:$src, sub0)), -// sub0, -// (S_OR_B32 (i32 (EXTRACT_SUBREG SReg_64:$src, sub1)), -// (S_MOV_B32 (i32 0x80000000))), // Set sign bit. -// sub1) -// >; - -// FIXME: Use S_BITSET0_B32/B64? -// def : GCNPat < -// (fabs (f64 SReg_64:$src)), -// (REG_SEQUENCE SReg_64, -// (i32 (EXTRACT_SUBREG SReg_64:$src, sub0)), -// sub0, -// (S_AND_B32 (i32 (EXTRACT_SUBREG SReg_64:$src, sub1)), -// (i32 (S_MOV_B32 (i32 0x7fffffff)))), -// sub1) -// >; // COPY_TO_REGCLASS is needed to avoid using SCC from S_XOR_B32 instead // of the real value. def : GCNPat < - (fneg (v2f32 SReg_64:$src)), + (UniformUnaryFrag<fneg> (v2f32 SReg_64:$src)), (v2f32 (REG_SEQUENCE SReg_64, (f32 (COPY_TO_REGCLASS (S_XOR_B32 (i32 (EXTRACT_SUBREG $src, sub0)), (i32 (S_MOV_B32 (i32 0x80000000)))), @@ -1489,36 +1441,103 @@ def : GCNPat < SReg_32)), sub1)) >; -} // End let AddedComplexity = 1 +def : GCNPat < + (UniformUnaryFrag<fabs> (v2f32 SReg_64:$src)), + (v2f32 (REG_SEQUENCE SReg_64, + (f32 (COPY_TO_REGCLASS (S_AND_B32 (i32 (EXTRACT_SUBREG $src, sub0)), + (i32 (S_MOV_B32 (i32 0x7fffffff)))), + SReg_32)), sub0, + (f32 (COPY_TO_REGCLASS (S_AND_B32 (i32 (EXTRACT_SUBREG $src, sub1)), + (i32 (S_MOV_B32 (i32 0x7fffffff)))), + SReg_32)), sub1)) +>; + +def : GCNPat < + (UniformUnaryFrag<fneg> (fabs (v2f32 SReg_64:$src))), + (v2f32 (REG_SEQUENCE SReg_64, + (f32 (COPY_TO_REGCLASS (S_OR_B32 (i32 (EXTRACT_SUBREG $src, sub0)), + (i32 (S_MOV_B32 (i32 0x80000000)))), + SReg_32)), sub0, + (f32 (COPY_TO_REGCLASS (S_OR_B32 (i32 (EXTRACT_SUBREG $src, sub1)), + (i32 (S_MOV_B32 (i32 0x80000000)))), + SReg_32)), sub1)) +>; + +// FIXME: Use S_BITSET0_B32/B64? +def : GCNPat < + (UniformUnaryFrag<fabs> (f64 SReg_64:$src)), + (REG_SEQUENCE SReg_64, + (i32 (EXTRACT_SUBREG SReg_64:$src, sub0)), + sub0, + (i32 (COPY_TO_REGCLASS (S_AND_B32 (i32 (EXTRACT_SUBREG SReg_64:$src, sub1)), + (S_MOV_B32 (i32 0x7fffffff))), SReg_32)), // Set sign bit. + sub1) +>; + +def : GCNPat < + (UniformUnaryFrag<fneg> (f64 SReg_64:$src)), + (REG_SEQUENCE SReg_64, + (i32 (EXTRACT_SUBREG SReg_64:$src, sub0)), + sub0, + (i32 (COPY_TO_REGCLASS (S_XOR_B32 (i32 (EXTRACT_SUBREG SReg_64:$src, sub1)), + (i32 (S_MOV_B32 (i32 0x80000000)))), SReg_32)), + sub1) +>; + +def : GCNPat < + (UniformUnaryFrag<fneg> (fabs (f64 SReg_64:$src))), + (REG_SEQUENCE SReg_64, + (i32 (EXTRACT_SUBREG SReg_64:$src, sub0)), + sub0, + (i32 (COPY_TO_REGCLASS (S_OR_B32 (i32 (EXTRACT_SUBREG SReg_64:$src, sub1)), + (S_MOV_B32 (i32 0x80000000))), SReg_32)),// Set sign bit. + sub1) +>; + + +def : GCNPat < + (fneg (fabs (f32 VGPR_32:$src))), + (V_OR_B32_e64 (S_MOV_B32 (i32 0x80000000)), VGPR_32:$src) // Set sign bit +>; def : GCNPat < (fabs (f32 VGPR_32:$src)), - (V_AND_B32_e32 (S_MOV_B32 (i32 0x7fffffff)), VGPR_32:$src) + (V_AND_B32_e64 (S_MOV_B32 (i32 0x7fffffff)), VGPR_32:$src) >; def : GCNPat < (fneg (f32 VGPR_32:$src)), - (V_XOR_B32_e32 (S_MOV_B32 (i32 0x80000000)), VGPR_32:$src) + (V_XOR_B32_e64 (S_MOV_B32 (i32 0x80000000)), VGPR_32:$src) >; def : GCNPat < (fabs (f16 VGPR_32:$src)), - (V_AND_B32_e32 (S_MOV_B32 (i32 0x00007fff)), VGPR_32:$src) + (V_AND_B32_e64 (S_MOV_B32 (i32 0x00007fff)), VGPR_32:$src) +>; + +def : GCNPat < + (fneg (f16 VGPR_32:$src)), + (V_XOR_B32_e64 (S_MOV_B32 (i32 0x00008000)), VGPR_32:$src) +>; + +def : GCNPat < + (fneg (fabs (f16 VGPR_32:$src))), + (V_OR_B32_e64 (S_MOV_B32 (i32 0x00008000)), VGPR_32:$src) // Set sign bit >; def : GCNPat < (fneg (v2f16 VGPR_32:$src)), - (V_XOR_B32_e32 (S_MOV_B32 (i32 0x80008000)), VGPR_32:$src) + (V_XOR_B32_e64 (S_MOV_B32 (i32 0x80008000)), VGPR_32:$src) >; def : GCNPat < (fabs (v2f16 VGPR_32:$src)), - (V_AND_B32_e32 (S_MOV_B32 (i32 0x7fff7fff)), VGPR_32:$src) + (V_AND_B32_e64 (S_MOV_B32 (i32 0x7fff7fff)), VGPR_32:$src) >; def : GCNPat < (fneg (v2f16 (fabs VGPR_32:$src))), - (V_OR_B32_e32 (S_MOV_B32 (i32 0x80008000)), VGPR_32:$src) // Set sign bit + (V_OR_B32_e64 (S_MOV_B32 (i32 0x80008000)), VGPR_32:$src) >; def : GCNPat < @@ -1526,30 +1545,28 @@ def : GCNPat < (REG_SEQUENCE VReg_64, (i32 (EXTRACT_SUBREG VReg_64:$src, sub0)), sub0, - (V_AND_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$src, sub1)), - (V_MOV_B32_e32 (i32 0x7fffffff))), // Set sign bit. + (V_AND_B32_e64 (i32 (S_MOV_B32 (i32 0x7fffffff))), + (i32 (EXTRACT_SUBREG VReg_64:$src, sub1))), sub1) >; -// TODO: Use SGPR for constant def : GCNPat < (fneg (f64 VReg_64:$src)), (REG_SEQUENCE VReg_64, (i32 (EXTRACT_SUBREG VReg_64:$src, sub0)), sub0, - (V_XOR_B32_e32 (i32 (EXTRACT_SUBREG VReg_64:$src, sub1)), - (i32 (V_MOV_B32_e32 (i32 0x80000000)))), + (V_XOR_B32_e64 (i32 (S_MOV_B32 (i32 0x80000000))), + (i32 (EXTRACT_SUBREG VReg_64:$src, sub1))), sub1) >; -// TODO: Use SGPR for constant def : GCNPat < (fneg (fabs (f64 VReg_64:$src))), (REG_SEQUENCE VReg_64, (i32 (EXTRACT_SUBREG VReg_64:$src, sub0)), sub0, - (V_OR_B32_e32 (i32 (EXTRACT_SUBREG VReg_64:$src, sub1)), - (V_MOV_B32_e32 (i32 0x80000000))), // Set sign bit. + (V_OR_B32_e64 (i32 (S_MOV_B32 (i32 0x80000000))), + (i32 (EXTRACT_SUBREG VReg_64:$src, sub1))), sub1) >; @@ -1681,14 +1698,9 @@ def : GCNPat < /********** Intrinsic Patterns **********/ /********** ================== **********/ -let OtherPredicates = [isNotGFX90APlus] in -// FIXME: Should use _e64 and select source modifiers. -def : POW_Common <V_LOG_F32_e32, V_EXP_F32_e32, V_MUL_LEGACY_F32_e32>; - -let OtherPredicates = [isGFX90APlus] in def : GCNPat < - (fpow f32:$src0, f32:$src1), - (V_EXP_F32_e32 (V_MUL_LEGACY_F32_e64 0, f32:$src1, SRCMODS.NONE, (V_LOG_F32_e32 f32:$src0), 0, 0)) + (f32 (fpow (VOP3Mods f32:$src0, i32:$src0_mods), (VOP3Mods f32:$src1, i32:$src1_mods))), + (V_EXP_F32_e64 SRCMODS.NONE, (V_MUL_LEGACY_F32_e64 $src1_mods, $src1, SRCMODS.NONE, (V_LOG_F32_e64 $src0_mods, $src0), 0, 0)) >; def : GCNPat < diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp index 34cbb49dcd16..f4d9002e930e 100644 --- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -1609,7 +1609,7 @@ SILoadStoreOptimizer::getTargetRegisterClass(const CombineInfo &CI, } unsigned BitWidth = 32 * (CI.Width + Paired.Width); - return TRI->hasAGPRs(getDataRegClass(*CI.I)) + return TRI->isAGPRClass(getDataRegClass(*CI.I)) ? TRI->getAGPRClassForBitWidth(BitWidth) : TRI->getVGPRClassForBitWidth(BitWidth); } diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp index 29f072ca1e6c..fff4f6729c99 100644 --- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp +++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp @@ -795,6 +795,8 @@ bool SIGfx6CacheControl::enableLoadCacheBypass( switch (Scope) { case SIAtomicScope::SYSTEM: case SIAtomicScope::AGENT: + // Set L1 cache policy to MISS_EVICT. + // Note: there is no L2 cache bypass policy at the ISA level. Changed |= enableGLCBit(MI); break; case SIAtomicScope::WORKGROUP: @@ -837,8 +839,10 @@ bool SIGfx6CacheControl::enableRMWCacheBypass( assert(MI->mayLoad() && MI->mayStore()); bool Changed = false; - /// The L1 cache is write through so does not need to be bypassed. There is no - /// bypass control for the L2 cache at the isa level. + /// Do not set GLC for RMW atomic operations as L0/L1 cache is automatically + /// bypassed, and the GLC bit is instead used to indicate if they are + /// return or no-return. + /// Note: there is no L2 cache coherent bypass control at the ISA level. return Changed; } @@ -860,6 +864,9 @@ bool SIGfx6CacheControl::enableVolatileAndOrNonTemporal( bool Changed = false; if (IsVolatile) { + // Set L1 cache policy to be MISS_EVICT for load instructions + // and MISS_LRU for store instructions. + // Note: there is no L2 cache bypass policy at the ISA level. if (Op == SIMemOp::LOAD) Changed |= enableGLCBit(MI); @@ -875,7 +882,8 @@ bool SIGfx6CacheControl::enableVolatileAndOrNonTemporal( } if (IsNonTemporal) { - // Request L1 MISS_EVICT and L2 STREAM for load and store instructions. + // Setting both GLC and SLC configures L1 cache policy to MISS_EVICT + // for both loads and stores, and the L2 cache policy to STREAM. Changed |= enableGLCBit(MI); Changed |= enableSLCBit(MI); return Changed; @@ -1097,6 +1105,8 @@ bool SIGfx90ACacheControl::enableLoadCacheBypass( switch (Scope) { case SIAtomicScope::SYSTEM: case SIAtomicScope::AGENT: + // Set the L1 cache policy to MISS_LRU. + // Note: there is no L2 cache bypass policy at the ISA level. Changed |= enableGLCBit(MI); break; case SIAtomicScope::WORKGROUP: @@ -1206,6 +1216,9 @@ bool SIGfx90ACacheControl::enableVolatileAndOrNonTemporal( bool Changed = false; if (IsVolatile) { + // Set L1 cache policy to be MISS_EVICT for load instructions + // and MISS_LRU for store instructions. + // Note: there is no L2 cache bypass policy at the ISA level. if (Op == SIMemOp::LOAD) Changed |= enableGLCBit(MI); @@ -1221,7 +1234,8 @@ bool SIGfx90ACacheControl::enableVolatileAndOrNonTemporal( } if (IsNonTemporal) { - // Request L1 MISS_EVICT and L2 STREAM for load and store instructions. + // Setting both GLC and SLC configures L1 cache policy to MISS_EVICT + // for both loads and stores, and the L2 cache policy to STREAM. Changed |= enableGLCBit(MI); Changed |= enableSLCBit(MI); return Changed; @@ -1380,12 +1394,11 @@ bool SIGfx10CacheControl::enableLoadCacheBypass( bool Changed = false; if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) { - /// TODO Do not set glc for rmw atomic operations as they - /// implicitly bypass the L0/L1 caches. - switch (Scope) { case SIAtomicScope::SYSTEM: case SIAtomicScope::AGENT: + // Set the L0 and L1 cache policies to MISS_EVICT. + // Note: there is no L2 cache coherent bypass control at the ISA level. Changed |= enableGLCBit(MI); Changed |= enableDLCBit(MI); break; @@ -1434,6 +1447,9 @@ bool SIGfx10CacheControl::enableVolatileAndOrNonTemporal( bool Changed = false; if (IsVolatile) { + // Set L0 and L1 cache policy to be MISS_EVICT for load instructions + // and MISS_LRU for store instructions. + // Note: there is no L2 cache coherent bypass control at the ISA level. if (Op == SIMemOp::LOAD) { Changed |= enableGLCBit(MI); Changed |= enableDLCBit(MI); @@ -1450,8 +1466,14 @@ bool SIGfx10CacheControl::enableVolatileAndOrNonTemporal( } if (IsNonTemporal) { - // Request L0/L1 HIT_EVICT and L2 STREAM for load and store instructions. + // For loads setting SLC configures L0 and L1 cache policy to HIT_EVICT + // and L2 cache policy to STREAM. + // For stores setting both GLC and SLC configures L0 and L1 cache policy + // to MISS_EVICT and the L2 cache policy to STREAM. + if (Op == SIMemOp::STORE) + Changed |= enableGLCBit(MI); Changed |= enableSLCBit(MI); + return Changed; } diff --git a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp index 6a698348d389..da41a5e2478a 100644 --- a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp +++ b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp @@ -1170,7 +1170,7 @@ void SIPeepholeSDWA::legalizeScalarOperands(MachineInstr &MI, unsigned I = MI.getOperandNo(&Op); if (Desc.OpInfo[I].RegClass == -1 || - !TRI->hasVGPRs(TRI->getRegClass(Desc.OpInfo[I].RegClass))) + !TRI->isVSSuperClass(TRI->getRegClass(Desc.OpInfo[I].RegClass))) continue; if (ST.hasSDWAScalar() && ConstantBusCount == 0 && Op.isReg() && diff --git a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp index d1b8e217471e..b0e45dd3e3e3 100644 --- a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp +++ b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp @@ -291,20 +291,19 @@ bool SIPreEmitPeephole::mustRetainExeczBranch( MBBI != End && MBBI != ToI; ++MBBI) { const MachineBasicBlock &MBB = *MBBI; - for (MachineBasicBlock::const_iterator I = MBB.begin(), E = MBB.end(); - I != E; ++I) { + for (const MachineInstr &MI : MBB) { // When a uniform loop is inside non-uniform control flow, the branch // leaving the loop might never be taken when EXEC = 0. // Hence we should retain cbranch out of the loop lest it become infinite. - if (I->isConditionalBranch()) + if (MI.isConditionalBranch()) return true; - if (TII->hasUnwantedEffectsWhenEXECEmpty(*I)) + if (TII->hasUnwantedEffectsWhenEXECEmpty(MI)) return true; // These instructions are potentially expensive even if EXEC = 0. - if (TII->isSMRD(*I) || TII->isVMEM(*I) || TII->isFLAT(*I) || - TII->isDS(*I) || I->getOpcode() == AMDGPU::S_WAITCNT) + if (TII->isSMRD(MI) || TII->isVMEM(MI) || TII->isFLAT(MI) || + TII->isDS(MI) || MI.getOpcode() == AMDGPU::S_WAITCNT) return true; ++NumInstr; diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index bfbe84f696f8..a1d9a23a5084 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -402,6 +402,62 @@ const uint32_t *SIRegisterInfo::getNoPreservedMask() const { return CSR_AMDGPU_NoRegs_RegMask; } +const TargetRegisterClass * +SIRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC, + const MachineFunction &MF) const { + // FIXME: Should have a helper function like getEquivalentVGPRClass to get the + // equivalent AV class. If used one, the verifier will crash after + // RegBankSelect in the GISel flow. The aligned regclasses are not fully given + // until Instruction selection. + if (MF.getSubtarget<GCNSubtarget>().hasMAIInsts() && + (isVGPRClass(RC) || isAGPRClass(RC))) { + if (RC == &AMDGPU::VGPR_32RegClass || RC == &AMDGPU::AGPR_32RegClass) + return &AMDGPU::AV_32RegClass; + if (RC == &AMDGPU::VReg_64RegClass || RC == &AMDGPU::AReg_64RegClass) + return &AMDGPU::AV_64RegClass; + if (RC == &AMDGPU::VReg_64_Align2RegClass || + RC == &AMDGPU::AReg_64_Align2RegClass) + return &AMDGPU::AV_64_Align2RegClass; + if (RC == &AMDGPU::VReg_96RegClass || RC == &AMDGPU::AReg_96RegClass) + return &AMDGPU::AV_96RegClass; + if (RC == &AMDGPU::VReg_96_Align2RegClass || + RC == &AMDGPU::AReg_96_Align2RegClass) + return &AMDGPU::AV_96_Align2RegClass; + if (RC == &AMDGPU::VReg_128RegClass || RC == &AMDGPU::AReg_128RegClass) + return &AMDGPU::AV_128RegClass; + if (RC == &AMDGPU::VReg_128_Align2RegClass || + RC == &AMDGPU::AReg_128_Align2RegClass) + return &AMDGPU::AV_128_Align2RegClass; + if (RC == &AMDGPU::VReg_160RegClass || RC == &AMDGPU::AReg_160RegClass) + return &AMDGPU::AV_160RegClass; + if (RC == &AMDGPU::VReg_160_Align2RegClass || + RC == &AMDGPU::AReg_160_Align2RegClass) + return &AMDGPU::AV_160_Align2RegClass; + if (RC == &AMDGPU::VReg_192RegClass || RC == &AMDGPU::AReg_192RegClass) + return &AMDGPU::AV_192RegClass; + if (RC == &AMDGPU::VReg_192_Align2RegClass || + RC == &AMDGPU::AReg_192_Align2RegClass) + return &AMDGPU::AV_192_Align2RegClass; + if (RC == &AMDGPU::VReg_256RegClass || RC == &AMDGPU::AReg_256RegClass) + return &AMDGPU::AV_256RegClass; + if (RC == &AMDGPU::VReg_256_Align2RegClass || + RC == &AMDGPU::AReg_256_Align2RegClass) + return &AMDGPU::AV_256_Align2RegClass; + if (RC == &AMDGPU::VReg_512RegClass || RC == &AMDGPU::AReg_512RegClass) + return &AMDGPU::AV_512RegClass; + if (RC == &AMDGPU::VReg_512_Align2RegClass || + RC == &AMDGPU::AReg_512_Align2RegClass) + return &AMDGPU::AV_512_Align2RegClass; + if (RC == &AMDGPU::VReg_1024RegClass || RC == &AMDGPU::AReg_1024RegClass) + return &AMDGPU::AV_1024RegClass; + if (RC == &AMDGPU::VReg_1024_Align2RegClass || + RC == &AMDGPU::AReg_1024_Align2RegClass) + return &AMDGPU::AV_1024_Align2RegClass; + } + + return TargetRegisterInfo::getLargestLegalSuperClass(RC, MF); +} + Register SIRegisterInfo::getFrameRegister(const MachineFunction &MF) const { const SIFrameLowering *TFI = MF.getSubtarget<GCNSubtarget>().getFrameLowering(); @@ -994,10 +1050,22 @@ static MachineInstrBuilder spillVGPRtoAGPR(const GCNSubtarget &ST, unsigned Dst = IsStore ? Reg : ValueReg; unsigned Src = IsStore ? ValueReg : Reg; - unsigned Opc = (IsStore ^ TRI->isVGPR(MRI, Reg)) ? AMDGPU::V_ACCVGPR_WRITE_B32_e64 - : AMDGPU::V_ACCVGPR_READ_B32_e64; + bool IsVGPR = TRI->isVGPR(MRI, Reg); + DebugLoc DL = MI->getDebugLoc(); + if (IsVGPR == TRI->isVGPR(MRI, ValueReg)) { + // Spiller during regalloc may restore a spilled register to its superclass. + // It could result in AGPR spills restored to VGPRs or the other way around, + // making the src and dst with identical regclasses at this point. It just + // needs a copy in such cases. + auto CopyMIB = BuildMI(MBB, MI, DL, TII->get(AMDGPU::COPY), Dst) + .addReg(Src, getKillRegState(IsKill)); + CopyMIB->setAsmPrinterFlag(MachineInstr::ReloadReuse); + return CopyMIB; + } + unsigned Opc = (IsStore ^ IsVGPR) ? AMDGPU::V_ACCVGPR_WRITE_B32_e64 + : AMDGPU::V_ACCVGPR_READ_B32_e64; - auto MIB = BuildMI(MBB, MI, MI->getDebugLoc(), TII->get(Opc), Dst) + auto MIB = BuildMI(MBB, MI, DL, TII->get(Opc), Dst) .addReg(Src, getKillRegState(IsKill)); MIB->setAsmPrinterFlag(MachineInstr::ReloadReuse); return MIB; @@ -1099,7 +1167,7 @@ void SIRegisterInfo::buildSpillLoadStore( const TargetRegisterClass *RC = getRegClassForReg(MF->getRegInfo(), ValueReg); // On gfx90a+ AGPR is a regular VGPR acceptable for loads and stores. - const bool IsAGPR = !ST.hasGFX90AInsts() && hasAGPRs(RC); + const bool IsAGPR = !ST.hasGFX90AInsts() && isAGPRClass(RC); const unsigned RegWidth = AMDGPU::getRegBitWidth(RC->getID()) / 8; // Always use 4 byte operations for AGPRs because we need to scavenge @@ -2163,6 +2231,65 @@ SIRegisterInfo::getAGPRClassForBitWidth(unsigned BitWidth) const { : getAnyAGPRClassForBitWidth(BitWidth); } +static const TargetRegisterClass * +getAnyVectorSuperClassForBitWidth(unsigned BitWidth) { + if (BitWidth <= 64) + return &AMDGPU::AV_64RegClass; + if (BitWidth <= 96) + return &AMDGPU::AV_96RegClass; + if (BitWidth <= 128) + return &AMDGPU::AV_128RegClass; + if (BitWidth <= 160) + return &AMDGPU::AV_160RegClass; + if (BitWidth <= 192) + return &AMDGPU::AV_192RegClass; + if (BitWidth <= 224) + return &AMDGPU::AV_224RegClass; + if (BitWidth <= 256) + return &AMDGPU::AV_256RegClass; + if (BitWidth <= 512) + return &AMDGPU::AV_512RegClass; + if (BitWidth <= 1024) + return &AMDGPU::AV_1024RegClass; + + return nullptr; +} + +static const TargetRegisterClass * +getAlignedVectorSuperClassForBitWidth(unsigned BitWidth) { + if (BitWidth <= 64) + return &AMDGPU::AV_64_Align2RegClass; + if (BitWidth <= 96) + return &AMDGPU::AV_96_Align2RegClass; + if (BitWidth <= 128) + return &AMDGPU::AV_128_Align2RegClass; + if (BitWidth <= 160) + return &AMDGPU::AV_160_Align2RegClass; + if (BitWidth <= 192) + return &AMDGPU::AV_192_Align2RegClass; + if (BitWidth <= 224) + return &AMDGPU::AV_224_Align2RegClass; + if (BitWidth <= 256) + return &AMDGPU::AV_256_Align2RegClass; + if (BitWidth <= 512) + return &AMDGPU::AV_512_Align2RegClass; + if (BitWidth <= 1024) + return &AMDGPU::AV_1024_Align2RegClass; + + return nullptr; +} + +const TargetRegisterClass * +SIRegisterInfo::getVectorSuperClassForBitWidth(unsigned BitWidth) const { + if (BitWidth <= 16) + return &AMDGPU::VGPR_LO16RegClass; + if (BitWidth <= 32) + return &AMDGPU::AV_32RegClass; + return ST.needsAlignedVGPRs() + ? getAlignedVectorSuperClassForBitWidth(BitWidth) + : getAnyVectorSuperClassForBitWidth(BitWidth); +} + const TargetRegisterClass * SIRegisterInfo::getSGPRClassForBitWidth(unsigned BitWidth) { if (BitWidth <= 16) @@ -2305,15 +2432,14 @@ const TargetRegisterClass *SIRegisterInfo::getSubRegClass( // We can assume that each lane corresponds to one 32-bit register. unsigned Size = getNumChannelsFromSubReg(SubIdx) * 32; - if (isSGPRClass(RC)) { - if (Size == 32) - RC = &AMDGPU::SGPR_32RegClass; - else - RC = getSGPRClassForBitWidth(Size); - } else if (hasAGPRs(RC)) { + if (isAGPRClass(RC)) { RC = getAGPRClassForBitWidth(Size); - } else { + } else if (isVGPRClass(RC)) { RC = getVGPRClassForBitWidth(Size); + } else if (isVectorSuperClass(RC)) { + RC = getVectorSuperClassForBitWidth(Size); + } else { + RC = getSGPRClassForBitWidth(Size); } assert(RC && "Invalid sub-register class size"); return RC; @@ -2626,10 +2752,13 @@ bool SIRegisterInfo::isProperlyAlignedRC(const TargetRegisterClass &RC) const { if (!ST.needsAlignedVGPRs()) return true; - if (hasVGPRs(&RC)) + if (isVGPRClass(&RC)) return RC.hasSuperClassEq(getVGPRClassForBitWidth(getRegSizeInBits(RC))); - if (hasAGPRs(&RC)) + if (isAGPRClass(&RC)) return RC.hasSuperClassEq(getAGPRClassForBitWidth(getRegSizeInBits(RC))); + if (isVectorSuperClass(&RC)) + return RC.hasSuperClassEq( + getVectorSuperClassForBitWidth(getRegSizeInBits(RC))); return true; } diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h index 8d90ddb1cf4c..f1fe0a1d9329 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h @@ -77,6 +77,10 @@ public: return 100; } + const TargetRegisterClass * + getLargestLegalSuperClass(const TargetRegisterClass *RC, + const MachineFunction &MF) const override; + Register getFrameRegister(const MachineFunction &MF) const override; bool hasBasePointer(const MachineFunction &MF) const; @@ -156,6 +160,10 @@ public: const TargetRegisterClass *getAGPRClassForBitWidth(unsigned BitWidth) const; LLVM_READONLY + const TargetRegisterClass * + getVectorSuperClassForBitWidth(unsigned BitWidth) const; + + LLVM_READONLY static const TargetRegisterClass *getSGPRClassForBitWidth(unsigned BitWidth); /// Return the 'base' register class for this register. @@ -164,7 +172,7 @@ public: /// \returns true if this class contains only SGPR registers static bool isSGPRClass(const TargetRegisterClass *RC) { - return !hasVGPRs(RC) && !hasAGPRs(RC); + return hasSGPRs(RC) && !hasVGPRs(RC) && !hasAGPRs(RC); } /// \returns true if this class ID contains only SGPR registers @@ -176,12 +184,22 @@ public: /// \returns true if this class contains only VGPR registers static bool isVGPRClass(const TargetRegisterClass *RC) { - return hasVGPRs(RC) && !hasAGPRs(RC); + return hasVGPRs(RC) && !hasAGPRs(RC) && !hasSGPRs(RC); } /// \returns true if this class contains only AGPR registers static bool isAGPRClass(const TargetRegisterClass *RC) { - return hasAGPRs(RC) && !hasVGPRs(RC); + return hasAGPRs(RC) && !hasVGPRs(RC) && !hasSGPRs(RC); + } + + /// \returns true only if this class contains both VGPR and AGPR registers + bool isVectorSuperClass(const TargetRegisterClass *RC) const { + return hasVGPRs(RC) && hasAGPRs(RC) && !hasSGPRs(RC); + } + + /// \returns true only if this class contains both VGPR and SGPR registers + bool isVSSuperClass(const TargetRegisterClass *RC) const { + return hasVGPRs(RC) && hasSGPRs(RC) && !hasAGPRs(RC); } /// \returns true if this class contains VGPR registers. @@ -194,6 +212,11 @@ public: return RC->TSFlags & SIRCFlags::HasAGPR; } + /// \returns true if this class contains SGPR registers. + static bool hasSGPRs(const TargetRegisterClass *RC) { + return RC->TSFlags & SIRCFlags::HasSGPR; + } + /// \returns true if this class contains any vector registers. static bool hasVectorRegisters(const TargetRegisterClass *RC) { return hasVGPRs(RC) || hasAGPRs(RC); diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td index cf1d90484228..340e2b48e5cd 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -133,9 +133,13 @@ class SIRegisterClass <string n, list<ValueType> rTypes, int Align, dag rList> field bit HasVGPR = 0; field bit HasAGPR = 0; + // For scalar register classes. + field bit HasSGPR = 0; + // These need to be kept in sync with the enum SIRCFlags. let TSFlags{0} = HasVGPR; let TSFlags{1} = HasAGPR; + let TSFlags{2} = HasSGPR; } multiclass SIRegLoHi16 <string n, bits<16> regIdx, bit ArtificialHigh = 1, @@ -307,45 +311,51 @@ foreach Index = 0...255 in { // Groupings using register classes and tuples //===----------------------------------------------------------------------===// -def SCC_CLASS : RegisterClass<"AMDGPU", [i1], 1, (add SCC)> { +def SCC_CLASS : SIRegisterClass<"AMDGPU", [i1], 1, (add SCC)> { let CopyCost = -1; let isAllocatable = 0; + let HasSGPR = 1; } -def M0_CLASS : RegisterClass<"AMDGPU", [i32], 32, (add M0)> { +def M0_CLASS : SIRegisterClass<"AMDGPU", [i32], 32, (add M0)> { let CopyCost = 1; let isAllocatable = 0; + let HasSGPR = 1; } -def M0_CLASS_LO16 : RegisterClass<"AMDGPU", [i16, f16], 16, (add M0_LO16)> { +def M0_CLASS_LO16 : SIRegisterClass<"AMDGPU", [i16, f16], 16, (add M0_LO16)> { let CopyCost = 1; let Size = 16; let isAllocatable = 0; + let HasSGPR = 1; } // TODO: Do we need to set DwarfRegAlias on register tuples? -def SGPR_LO16 : RegisterClass<"AMDGPU", [i16, f16], 16, +def SGPR_LO16 : SIRegisterClass<"AMDGPU", [i16, f16], 16, (add (sequence "SGPR%u_LO16", 0, 105))> { let AllocationPriority = 9; let Size = 16; let GeneratePressureSet = 0; + let HasSGPR = 1; } -def SGPR_HI16 : RegisterClass<"AMDGPU", [i16, f16], 16, +def SGPR_HI16 : SIRegisterClass<"AMDGPU", [i16, f16], 16, (add (sequence "SGPR%u_HI16", 0, 105))> { let isAllocatable = 0; let Size = 16; let GeneratePressureSet = 0; + let HasSGPR = 1; } // SGPR 32-bit registers -def SGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, +def SGPR_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, (add (sequence "SGPR%u", 0, 105))> { // Give all SGPR classes higher priority than VGPR classes, because // we want to spill SGPRs to VGPRs. let AllocationPriority = 9; let GeneratePressureSet = 0; + let HasSGPR = 1; } // SGPR 64-bit registers @@ -376,16 +386,18 @@ def SGPR_512Regs : SIRegisterTuples<getSubRegs<16>.ret, SGPR_32, 105, 4, 16, "s" def SGPR_1024Regs : SIRegisterTuples<getSubRegs<32>.ret, SGPR_32, 105, 4, 32, "s">; // Trap handler TMP 32-bit registers -def TTMP_32 : RegisterClass<"AMDGPU", [i32, f32, v2i16, v2f16], 32, +def TTMP_32 : SIRegisterClass<"AMDGPU", [i32, f32, v2i16, v2f16], 32, (add (sequence "TTMP%u", 0, 15))> { let isAllocatable = 0; + let HasSGPR = 1; } // Trap handler TMP 16-bit registers -def TTMP_LO16 : RegisterClass<"AMDGPU", [i16, f16], 16, +def TTMP_LO16 : SIRegisterClass<"AMDGPU", [i16, f16], 16, (add (sequence "TTMP%u_LO16", 0, 15))> { let Size = 16; let isAllocatable = 0; + let HasSGPR = 1; } // Trap handler TMP 64-bit registers @@ -598,16 +610,18 @@ def AGPR_1024 : SIRegisterTuples<getSubRegs<32>.ret, AGPR_32, 255, 1, 32, "a">; // Register classes used as source and destination //===----------------------------------------------------------------------===// -def Pseudo_SReg_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, +def Pseudo_SReg_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, (add FP_REG, SP_REG)> { let isAllocatable = 0; let CopyCost = -1; + let HasSGPR = 1; } -def Pseudo_SReg_128 : RegisterClass<"AMDGPU", [v4i32, v2i64, v2f64], 32, +def Pseudo_SReg_128 : SIRegisterClass<"AMDGPU", [v4i32, v2i64, v2f64], 32, (add PRIVATE_RSRC_REG)> { let isAllocatable = 0; let CopyCost = -1; + let HasSGPR = 1; } def LDS_DIRECT_CLASS : RegisterClass<"AMDGPU", [i32], 32, @@ -616,10 +630,10 @@ def LDS_DIRECT_CLASS : RegisterClass<"AMDGPU", [i32], 32, let CopyCost = -1; } -let GeneratePressureSet = 0 in { +let GeneratePressureSet = 0, HasSGPR = 1 in { // Subset of SReg_32 without M0 for SMRD instructions and alike. // See comments in SIInstructions.td for more info. -def SReg_32_XM0_XEXEC : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32, +def SReg_32_XM0_XEXEC : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32, (add SGPR_32, VCC_LO, VCC_HI, FLAT_SCR_LO, FLAT_SCR_HI, XNACK_MASK_LO, XNACK_MASK_HI, SGPR_NULL, TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI, SRC_SHARED_BASE, SRC_SHARED_LIMIT, SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT, SRC_POPS_EXITING_WAVE_ID, @@ -627,7 +641,7 @@ def SReg_32_XM0_XEXEC : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f1 let AllocationPriority = 10; } -def SReg_LO16_XM0_XEXEC : RegisterClass<"AMDGPU", [i16, f16], 16, +def SReg_LO16_XM0_XEXEC : SIRegisterClass<"AMDGPU", [i16, f16], 16, (add SGPR_LO16, VCC_LO_LO16, VCC_HI_LO16, FLAT_SCR_LO_LO16, FLAT_SCR_HI_LO16, XNACK_MASK_LO_LO16, XNACK_MASK_HI_LO16, SGPR_NULL_LO16, TTMP_LO16, TMA_LO_LO16, TMA_HI_LO16, TBA_LO_LO16, TBA_HI_LO16, SRC_SHARED_BASE_LO16, @@ -637,29 +651,29 @@ def SReg_LO16_XM0_XEXEC : RegisterClass<"AMDGPU", [i16, f16], 16, let AllocationPriority = 10; } -def SReg_32_XEXEC_HI : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32, +def SReg_32_XEXEC_HI : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32, (add SReg_32_XM0_XEXEC, EXEC_LO, M0_CLASS)> { let AllocationPriority = 10; } -def SReg_LO16_XEXEC_HI : RegisterClass<"AMDGPU", [i16, f16], 16, +def SReg_LO16_XEXEC_HI : SIRegisterClass<"AMDGPU", [i16, f16], 16, (add SReg_LO16_XM0_XEXEC, EXEC_LO_LO16, M0_CLASS_LO16)> { let Size = 16; let AllocationPriority = 10; } -def SReg_32_XM0 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32, +def SReg_32_XM0 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32, (add SReg_32_XM0_XEXEC, EXEC_LO, EXEC_HI)> { let AllocationPriority = 10; } -def SReg_LO16_XM0 : RegisterClass<"AMDGPU", [i16, f16], 16, +def SReg_LO16_XM0 : SIRegisterClass<"AMDGPU", [i16, f16], 16, (add SReg_LO16_XM0_XEXEC, EXEC_LO_LO16, EXEC_HI_LO16)> { let Size = 16; let AllocationPriority = 10; } -def SReg_LO16 : RegisterClass<"AMDGPU", [i16, f16], 16, +def SReg_LO16 : SIRegisterClass<"AMDGPU", [i16, f16], 16, (add SGPR_LO16, SReg_LO16_XM0, M0_CLASS_LO16, EXEC_LO_LO16, EXEC_HI_LO16, SReg_LO16_XEXEC_HI)> { let Size = 16; let AllocationPriority = 10; @@ -667,65 +681,75 @@ def SReg_LO16 : RegisterClass<"AMDGPU", [i16, f16], 16, } // End GeneratePressureSet = 0 // Register class for all scalar registers (SGPRs + Special Registers) -def SReg_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32, +def SReg_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32, (add SReg_32_XM0, M0_CLASS, EXEC_LO, EXEC_HI, SReg_32_XEXEC_HI)> { let AllocationPriority = 10; + let HasSGPR = 1; } let GeneratePressureSet = 0 in { -def SRegOrLds_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, +def SRegOrLds_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, (add SReg_32, LDS_DIRECT_CLASS)> { let isAllocatable = 0; + let HasSGPR = 1; } -def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, v4i16, v4f16], 32, +def SGPR_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, v4i16, v4f16], 32, (add SGPR_64Regs)> { let CopyCost = 1; let AllocationPriority = 11; + let HasSGPR = 1; } // CCR (call clobbered registers) SGPR 64-bit registers -def CCR_SGPR_64 : RegisterClass<"AMDGPU", SGPR_64.RegTypes, 32, +def CCR_SGPR_64 : SIRegisterClass<"AMDGPU", SGPR_64.RegTypes, 32, (add (trunc SGPR_64, 16))> { let CopyCost = SGPR_64.CopyCost; let AllocationPriority = SGPR_64.AllocationPriority; + let HasSGPR = 1; } // Call clobbered 64-bit SGPRs for AMDGPU_Gfx CC -def Gfx_CCR_SGPR_64 : RegisterClass<"AMDGPU", SGPR_64.RegTypes, 32, +def Gfx_CCR_SGPR_64 : SIRegisterClass<"AMDGPU", SGPR_64.RegTypes, 32, (add (trunc (shl SGPR_64, 15), 1), // s[30:31] (trunc (shl SGPR_64, 18), 14))> { // s[36:37]-s[s62:63] let CopyCost = SGPR_64.CopyCost; let AllocationPriority = SGPR_64.AllocationPriority; + let HasSGPR = 1; } -def TTMP_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64, v4i16, v4f16], 32, +def TTMP_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, f64, v4i16, v4f16], 32, (add TTMP_64Regs)> { let isAllocatable = 0; + let HasSGPR = 1; } -def SReg_64_XEXEC : RegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16], 32, +def SReg_64_XEXEC : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16], 32, (add SGPR_64, VCC, FLAT_SCR, XNACK_MASK, TTMP_64, TBA, TMA)> { let CopyCost = 1; let AllocationPriority = 13; + let HasSGPR = 1; } -def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16], 32, +def SReg_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16], 32, (add SReg_64_XEXEC, EXEC)> { let CopyCost = 1; let AllocationPriority = 13; + let HasSGPR = 1; } -def SReg_1_XEXEC : RegisterClass<"AMDGPU", [i1], 32, +def SReg_1_XEXEC : SIRegisterClass<"AMDGPU", [i1], 32, (add SReg_64_XEXEC, SReg_32_XM0_XEXEC)> { let CopyCost = 1; let isAllocatable = 0; + let HasSGPR = 1; } -def SReg_1 : RegisterClass<"AMDGPU", [i1], 32, +def SReg_1 : SIRegisterClass<"AMDGPU", [i1], 32, (add SReg_1_XEXEC, EXEC, EXEC_LO)> { let CopyCost = 1; let isAllocatable = 0; + let HasSGPR = 1; } multiclass SRegClass<int numRegs, int priority, @@ -738,18 +762,18 @@ multiclass SRegClass<int numRegs, int priority, defvar sgprName = !strconcat("SGPR_", suffix); defvar ttmpName = !strconcat("TTMP_", suffix); - let AllocationPriority = priority, CopyCost = copyCost in { - def "" # sgprName : RegisterClass<"AMDGPU", regTypes, 32, (add regList)> { + let AllocationPriority = priority, CopyCost = copyCost, HasSGPR = 1 in { + def "" # sgprName : SIRegisterClass<"AMDGPU", regTypes, 32, (add regList)> { } if hasTTMP then { - def "" # ttmpName : RegisterClass<"AMDGPU", regTypes, 32, (add ttmpList)> { + def "" # ttmpName : SIRegisterClass<"AMDGPU", regTypes, 32, (add ttmpList)> { let isAllocatable = 0; } } def SReg_ # suffix : - RegisterClass<"AMDGPU", regTypes, 32, + SIRegisterClass<"AMDGPU", regTypes, 32, !con(!dag(add, [!cast<RegisterClass>(sgprName)], ["sgpr"]), !if(hasTTMP, !dag(add, [!cast<RegisterClass>(ttmpName)], ["ttmp"]), @@ -855,44 +879,45 @@ def VS_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, (add VGPR_32, SReg_32, LDS_DIRECT_CLASS)> { let isAllocatable = 0; let HasVGPR = 1; + let HasSGPR = 1; } def VS_64 : SIRegisterClass<"AMDGPU", [i64, f64, v2f32], 32, (add VReg_64, SReg_64)> { let isAllocatable = 0; let HasVGPR = 1; + let HasSGPR = 1; } -def AV_32 : SIRegisterClass<"AMDGPU", VGPR_32.RegTypes, 32, - (add AGPR_32, VGPR_32)> { - let isAllocatable = 0; - let HasVGPR = 1; - let HasAGPR = 1; -} - -def AV_64 : SIRegisterClass<"AMDGPU", VReg_64.RegTypes, 32, - (add AReg_64, VReg_64)> { - let isAllocatable = 0; +def AV_32 : SIRegisterClass<"AMDGPU", VGPR_32.RegTypes, 32, (add VGPR_32, AGPR_32)> { let HasVGPR = 1; let HasAGPR = 1; } } // End GeneratePressureSet = 0 -let HasVGPR = 1, HasAGPR = 1 in { -def AV_96 : SIRegisterClass<"AMDGPU", VReg_96.RegTypes, 32, - (add AReg_96, VReg_96)> { - let isAllocatable = 0; -} +// Define a register tuple class, along with one requiring an even +// aligned base register. +multiclass AVRegClass<int numRegs, list<ValueType> regTypes, + dag vregList, dag aregList> { + let HasVGPR = 1, HasAGPR = 1 in { + // Define the regular class. + def "" : VRegClassBase<numRegs, regTypes, (add vregList, aregList)>; -def AV_128 : SIRegisterClass<"AMDGPU", VReg_128.RegTypes, 32, - (add AReg_128, VReg_128)> { - let isAllocatable = 0; + // Define 2-aligned variant + def _Align2 : VRegClassBase<numRegs, regTypes, + (add (decimate vregList, 2), + (decimate aregList, 2))>; + } } -def AV_160 : SIRegisterClass<"AMDGPU", VReg_160.RegTypes, 32, - (add AReg_160, VReg_160)> { - let isAllocatable = 0; -} -} // End HasVGPR = 1, HasAGPR = 1 +defm AV_64 : AVRegClass<2, VReg_64.RegTypes, (add VGPR_64), (add AGPR_64)>; +defm AV_96 : AVRegClass<3, VReg_96.RegTypes, (add VGPR_96), (add AGPR_96)>; +defm AV_128 : AVRegClass<4, VReg_128.RegTypes, (add VGPR_128), (add AGPR_128)>; +defm AV_160 : AVRegClass<5, VReg_160.RegTypes, (add VGPR_160), (add AGPR_160)>; +defm AV_192 : AVRegClass<6, VReg_160.RegTypes, (add VGPR_192), (add AGPR_192)>; +defm AV_224 : AVRegClass<7, VReg_160.RegTypes, (add VGPR_224), (add AGPR_224)>; +defm AV_256 : AVRegClass<8, VReg_160.RegTypes, (add VGPR_256), (add AGPR_256)>; +defm AV_512 : AVRegClass<16, VReg_160.RegTypes, (add VGPR_512), (add AGPR_512)>; +defm AV_1024 : AVRegClass<32, VReg_160.RegTypes, (add VGPR_1024), (add AGPR_1024)>; //===----------------------------------------------------------------------===// // Register operands diff --git a/llvm/lib/Target/AMDGPU/SISchedule.td b/llvm/lib/Target/AMDGPU/SISchedule.td index 0792b303b830..18d424a3bc9f 100644 --- a/llvm/lib/Target/AMDGPU/SISchedule.td +++ b/llvm/lib/Target/AMDGPU/SISchedule.td @@ -93,16 +93,16 @@ def HWBranch : ProcResource<1> { let BufferSize = 1; } def HWExport : ProcResource<1> { - let BufferSize = 7; // Taken from S_WAITCNT + let BufferSize = 1; } def HWLGKM : ProcResource<1> { - let BufferSize = 31; // Taken from S_WAITCNT + let BufferSize = 1; } def HWSALU : ProcResource<1> { let BufferSize = 1; } def HWVMEM : ProcResource<1> { - let BufferSize = 15; // Taken from S_WAITCNT + let BufferSize = 1; } def HWVALU : ProcResource<1> { let BufferSize = 1; diff --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp index 6f63f686635a..46012e5d7d97 100644 --- a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp +++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp @@ -487,6 +487,8 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF, bool WQMOutputs = MF.getFunction().hasFnAttribute("amdgpu-ps-wqm-outputs"); SmallVector<MachineInstr *, 4> SetInactiveInstrs; SmallVector<MachineInstr *, 4> SoftWQMInstrs; + bool HasImplicitDerivatives = + MF.getFunction().getCallingConv() == CallingConv::AMDGPU_PS; // We need to visit the basic blocks in reverse post-order so that we visit // defs before uses, in particular so that we don't accidentally mark an @@ -497,8 +499,7 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF, MachineBasicBlock &MBB = **BI; BlockInfo &BBI = Blocks[&MBB]; - for (auto II = MBB.begin(), IE = MBB.end(); II != IE; ++II) { - MachineInstr &MI = *II; + for (MachineInstr &MI : MBB) { InstrInfo &III = Instructions[&MI]; unsigned Opcode = MI.getOpcode(); char Flags = 0; @@ -507,6 +508,11 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF, // If LOD is not supported WQM is not needed. if (!ST->hasExtendedImageInsts()) continue; + // Only generate implicit WQM if implicit derivatives are required. + // This avoids inserting unintended WQM if a shader type without + // implicit derivatives uses an image sampling instruction. + if (!HasImplicitDerivatives) + continue; // Sampling instructions don't need to produce results for all pixels // in a quad, they just require all inputs of a quad to have been // computed for derivatives. diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 9da7b9f5145d..d20eaaaa65e8 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -1626,13 +1626,14 @@ unsigned getRegBitWidth(unsigned RCID) { return 32; case AMDGPU::SGPR_64RegClassID: case AMDGPU::VS_64RegClassID: - case AMDGPU::AV_64RegClassID: case AMDGPU::SReg_64RegClassID: case AMDGPU::VReg_64RegClassID: case AMDGPU::AReg_64RegClassID: case AMDGPU::SReg_64_XEXECRegClassID: case AMDGPU::VReg_64_Align2RegClassID: case AMDGPU::AReg_64_Align2RegClassID: + case AMDGPU::AV_64RegClassID: + case AMDGPU::AV_64_Align2RegClassID: return 64; case AMDGPU::SGPR_96RegClassID: case AMDGPU::SReg_96RegClassID: @@ -1641,6 +1642,7 @@ unsigned getRegBitWidth(unsigned RCID) { case AMDGPU::VReg_96_Align2RegClassID: case AMDGPU::AReg_96_Align2RegClassID: case AMDGPU::AV_96RegClassID: + case AMDGPU::AV_96_Align2RegClassID: return 96; case AMDGPU::SGPR_128RegClassID: case AMDGPU::SReg_128RegClassID: @@ -1649,6 +1651,7 @@ unsigned getRegBitWidth(unsigned RCID) { case AMDGPU::VReg_128_Align2RegClassID: case AMDGPU::AReg_128_Align2RegClassID: case AMDGPU::AV_128RegClassID: + case AMDGPU::AV_128_Align2RegClassID: return 128; case AMDGPU::SGPR_160RegClassID: case AMDGPU::SReg_160RegClassID: @@ -1657,6 +1660,7 @@ unsigned getRegBitWidth(unsigned RCID) { case AMDGPU::VReg_160_Align2RegClassID: case AMDGPU::AReg_160_Align2RegClassID: case AMDGPU::AV_160RegClassID: + case AMDGPU::AV_160_Align2RegClassID: return 160; case AMDGPU::SGPR_192RegClassID: case AMDGPU::SReg_192RegClassID: @@ -1664,6 +1668,8 @@ unsigned getRegBitWidth(unsigned RCID) { case AMDGPU::AReg_192RegClassID: case AMDGPU::VReg_192_Align2RegClassID: case AMDGPU::AReg_192_Align2RegClassID: + case AMDGPU::AV_192RegClassID: + case AMDGPU::AV_192_Align2RegClassID: return 192; case AMDGPU::SGPR_224RegClassID: case AMDGPU::SReg_224RegClassID: @@ -1671,6 +1677,8 @@ unsigned getRegBitWidth(unsigned RCID) { case AMDGPU::AReg_224RegClassID: case AMDGPU::VReg_224_Align2RegClassID: case AMDGPU::AReg_224_Align2RegClassID: + case AMDGPU::AV_224RegClassID: + case AMDGPU::AV_224_Align2RegClassID: return 224; case AMDGPU::SGPR_256RegClassID: case AMDGPU::SReg_256RegClassID: @@ -1678,6 +1686,8 @@ unsigned getRegBitWidth(unsigned RCID) { case AMDGPU::AReg_256RegClassID: case AMDGPU::VReg_256_Align2RegClassID: case AMDGPU::AReg_256_Align2RegClassID: + case AMDGPU::AV_256RegClassID: + case AMDGPU::AV_256_Align2RegClassID: return 256; case AMDGPU::SGPR_512RegClassID: case AMDGPU::SReg_512RegClassID: @@ -1685,6 +1695,8 @@ unsigned getRegBitWidth(unsigned RCID) { case AMDGPU::AReg_512RegClassID: case AMDGPU::VReg_512_Align2RegClassID: case AMDGPU::AReg_512_Align2RegClassID: + case AMDGPU::AV_512RegClassID: + case AMDGPU::AV_512_Align2RegClassID: return 512; case AMDGPU::SGPR_1024RegClassID: case AMDGPU::SReg_1024RegClassID: @@ -1692,6 +1704,8 @@ unsigned getRegBitWidth(unsigned RCID) { case AMDGPU::AReg_1024RegClassID: case AMDGPU::VReg_1024_Align2RegClassID: case AMDGPU::AReg_1024_Align2RegClassID: + case AMDGPU::AV_1024RegClassID: + case AMDGPU::AV_1024_Align2RegClassID: return 1024; default: llvm_unreachable("Unexpected register class"); diff --git a/llvm/lib/Target/ARC/ARCMCInstLower.cpp b/llvm/lib/Target/ARC/ARCMCInstLower.cpp index 62462b77eccf..50ba9fe75232 100644 --- a/llvm/lib/Target/ARC/ARCMCInstLower.cpp +++ b/llvm/lib/Target/ARC/ARCMCInstLower.cpp @@ -104,8 +104,7 @@ MCOperand ARCMCInstLower::LowerOperand(const MachineOperand &MO, void ARCMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { OutMI.setOpcode(MI->getOpcode()); - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); + for (const MachineOperand &MO : MI->operands()) { MCOperand MCOp = LowerOperand(MO); if (MCOp.isValid()) diff --git a/llvm/lib/Target/ARM/ARM.h b/llvm/lib/Target/ARM/ARM.h index 5500783f74db..1d5e45aec06c 100644 --- a/llvm/lib/Target/ARM/ARM.h +++ b/llvm/lib/Target/ARM/ARM.h @@ -44,6 +44,7 @@ FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM, FunctionPass *createA15SDOptimizerPass(); FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false); FunctionPass *createARMExpandPseudoPass(); +FunctionPass *createARMBranchTargetsPass(); FunctionPass *createARMConstantIslandPass(); FunctionPass *createMLxExpansionPass(); FunctionPass *createThumb2ITBlockPass(); @@ -66,6 +67,7 @@ void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, void initializeARMParallelDSPPass(PassRegistry &); void initializeARMLoadStoreOptPass(PassRegistry &); void initializeARMPreAllocLoadStoreOptPass(PassRegistry &); +void initializeARMBranchTargetsPass(PassRegistry &); void initializeARMConstantIslandsPass(PassRegistry &); void initializeARMExpandPseudoPass(PassRegistry &); void initializeThumb2SizeReducePass(PassRegistry &); diff --git a/llvm/lib/Target/ARM/ARM.td b/llvm/lib/Target/ARM/ARM.td index 8cbd80f1bf65..e03dd597eb65 100644 --- a/llvm/lib/Target/ARM/ARM.td +++ b/llvm/lib/Target/ARM/ARM.td @@ -442,6 +442,10 @@ def FeatureFixCMSE_CVE_2021_35465 : SubtargetFeature<"fix-cmse-cve-2021-35465", "Mitigate against the cve-2021-35465 " "security vulnurability">; +def FeaturePACBTI : SubtargetFeature<"pacbti", "HasPACBTI", "true", + "Enable Pointer Authentication and Branch " + "Target Identification">; + //===----------------------------------------------------------------------===// // ARM architecture class // diff --git a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp index 9901b86b0e87..6a88ac485e69 100644 --- a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp @@ -763,6 +763,32 @@ void ARMAsmPrinter::emitAttributes() { int EnumBuildAttr = EnumWidth == 1 ? 1 : 2; ATS.emitAttribute(ARMBuildAttrs::ABI_enum_size, EnumBuildAttr); } + + auto *PACValue = mdconst::extract_or_null<ConstantInt>( + SourceModule->getModuleFlag("sign-return-address")); + if (PACValue && PACValue->getZExtValue() == 1) { + // If "+pacbti" is used as an architecture extension, + // Tag_PAC_extension is emitted in + // ARMTargetStreamer::emitTargetAttributes(). + if (!STI.hasPACBTI()) { + ATS.emitAttribute(ARMBuildAttrs::PAC_extension, + ARMBuildAttrs::AllowPACInNOPSpace); + } + ATS.emitAttribute(ARMBuildAttrs::PACRET_use, ARMBuildAttrs::PACRETUsed); + } + + auto *BTIValue = mdconst::extract_or_null<ConstantInt>( + SourceModule->getModuleFlag("branch-target-enforcement")); + if (BTIValue && BTIValue->getZExtValue() == 1) { + // If "+pacbti" is used as an architecture extension, + // Tag_BTI_extension is emitted in + // ARMTargetStreamer::emitTargetAttributes(). + if (!STI.hasPACBTI()) { + ATS.emitAttribute(ARMBuildAttrs::BTI_extension, + ARMBuildAttrs::AllowBTIInNOPSpace); + } + ATS.emitAttribute(ARMBuildAttrs::BTI_use, ARMBuildAttrs::BTIUsed); + } } } @@ -1535,17 +1561,17 @@ void ARMAsmPrinter::emitInstruction(const MachineInstr *MI) { MCInst.addExpr(BranchTarget); } - if (Opc == ARM::t2BFic) { - const MCExpr *ElseLabel = MCSymbolRefExpr::create( - getBFLabel(DL.getPrivateGlobalPrefix(), getFunctionNumber(), - MI->getOperand(2).getIndex(), OutContext), - OutContext); - MCInst.addExpr(ElseLabel); - MCInst.addImm(MI->getOperand(3).getImm()); - } else { - MCInst.addImm(MI->getOperand(2).getImm()) - .addReg(MI->getOperand(3).getReg()); - } + if (Opc == ARM::t2BFic) { + const MCExpr *ElseLabel = MCSymbolRefExpr::create( + getBFLabel(DL.getPrivateGlobalPrefix(), getFunctionNumber(), + MI->getOperand(2).getIndex(), OutContext), + OutContext); + MCInst.addExpr(ElseLabel); + MCInst.addImm(MI->getOperand(3).getImm()); + } else { + MCInst.addImm(MI->getOperand(2).getImm()) + .addReg(MI->getOperand(3).getReg()); + } EmitToStreamer(*OutStreamer, MCInst); return; diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp index 2d981be4cfc1..2a12947d24a8 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -310,8 +310,7 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineInstr &MI, LiveVariables *LV, // Transfer LiveVariables states, kill / dead info. if (LV) { - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI.getOperand(i); + for (const MachineOperand &MO : MI.operands()) { if (MO.isReg() && Register::isVirtualRegister(MO.getReg())) { Register Reg = MO.getReg(); @@ -634,8 +633,7 @@ bool ARMBaseInstrInfo::ClobbersPredicate(MachineInstr &MI, std::vector<MachineOperand> &Pred, bool SkipDead) const { bool Found = false; - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI.getOperand(i); + for (const MachineOperand &MO : MI.operands()) { bool ClobbersCPSR = MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR); bool IsCPSR = MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR; if (ClobbersCPSR || IsCPSR) { @@ -732,8 +730,7 @@ bool ARMBaseInstrInfo::isPredicable(const MachineInstr &MI) const { namespace llvm { template <> bool IsCPSRDead<MachineInstr>(const MachineInstr *MI) { - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); + for (const MachineOperand &MO : MI->operands()) { if (!MO.isReg() || MO.isUndef() || MO.isUse()) continue; if (MO.getReg() != ARM::CPSR) @@ -1860,15 +1857,11 @@ bool ARMBaseInstrInfo::produceSameValue(const MachineInstr &MI0, const MachineInstr &MI1, const MachineRegisterInfo *MRI) const { unsigned Opcode = MI0.getOpcode(); - if (Opcode == ARM::t2LDRpci || - Opcode == ARM::t2LDRpci_pic || - Opcode == ARM::tLDRpci || - Opcode == ARM::tLDRpci_pic || - Opcode == ARM::LDRLIT_ga_pcrel || - Opcode == ARM::LDRLIT_ga_pcrel_ldr || - Opcode == ARM::tLDRLIT_ga_pcrel || - Opcode == ARM::MOV_ga_pcrel || - Opcode == ARM::MOV_ga_pcrel_ldr || + if (Opcode == ARM::t2LDRpci || Opcode == ARM::t2LDRpci_pic || + Opcode == ARM::tLDRpci || Opcode == ARM::tLDRpci_pic || + Opcode == ARM::LDRLIT_ga_pcrel || Opcode == ARM::LDRLIT_ga_pcrel_ldr || + Opcode == ARM::tLDRLIT_ga_pcrel || Opcode == ARM::t2LDRLIT_ga_pcrel || + Opcode == ARM::MOV_ga_pcrel || Opcode == ARM::MOV_ga_pcrel_ldr || Opcode == ARM::t2MOV_ga_pcrel) { if (MI1.getOpcode() != Opcode) return false; @@ -1880,11 +1873,9 @@ bool ARMBaseInstrInfo::produceSameValue(const MachineInstr &MI0, if (MO0.getOffset() != MO1.getOffset()) return false; - if (Opcode == ARM::LDRLIT_ga_pcrel || - Opcode == ARM::LDRLIT_ga_pcrel_ldr || - Opcode == ARM::tLDRLIT_ga_pcrel || - Opcode == ARM::MOV_ga_pcrel || - Opcode == ARM::MOV_ga_pcrel_ldr || + if (Opcode == ARM::LDRLIT_ga_pcrel || Opcode == ARM::LDRLIT_ga_pcrel_ldr || + Opcode == ARM::tLDRLIT_ga_pcrel || Opcode == ARM::t2LDRLIT_ga_pcrel || + Opcode == ARM::MOV_ga_pcrel || Opcode == ARM::MOV_ga_pcrel_ldr || Opcode == ARM::t2MOV_ga_pcrel) // Ignore the PC labels. return MO0.getGlobal() == MO1.getGlobal(); @@ -2312,8 +2303,7 @@ ARMBaseInstrInfo::canFoldIntoMOVCC(Register Reg, const MachineRegisterInfo &MRI, return nullptr; // Check if MI has any non-dead defs or physreg uses. This also detects // predicated instructions which will be reading CPSR. - for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); + for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 1)) { // Reject frame index operands, PEI can't handle the predicated pseudos. if (MO.isFI() || MO.isCPI() || MO.isJTI()) return nullptr; @@ -4857,11 +4847,10 @@ bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr &MI, if (MI.getOpcode() == ARM::tPUSH || MI.getOpcode() == ARM::tPOP || MI.getOpcode() == ARM::tPOP_RET) { - for (int i = 2, e = MI.getNumOperands(); i < e; ++i) { - if (MI.getOperand(i).isImplicit() || - !MI.getOperand(i).isReg()) + for (const MachineOperand &MO : llvm::drop_begin(MI.operands(), 2)) { + if (MO.isImplicit() || !MO.isReg()) continue; - Register Reg = MI.getOperand(i).getReg(); + Register Reg = MO.getReg(); if (Reg < ARM::R0 || Reg > ARM::R7) { if (!(MI.getOpcode() == ARM::tPUSH && Reg == ARM::LR) && !(MI.getOpcode() == ARM::tPOP_RET && Reg == ARM::PC)) { @@ -5748,17 +5737,17 @@ enum MachineOutlinerMBBFlags { }; struct OutlinerCosts { - const int CallTailCall; - const int FrameTailCall; - const int CallThunk; - const int FrameThunk; - const int CallNoLRSave; - const int FrameNoLRSave; - const int CallRegSave; - const int FrameRegSave; - const int CallDefault; - const int FrameDefault; - const int SaveRestoreLROnStack; + int CallTailCall; + int FrameTailCall; + int CallThunk; + int FrameThunk; + int CallNoLRSave; + int FrameNoLRSave; + int CallRegSave; + int FrameRegSave; + int CallDefault; + int FrameDefault; + int SaveRestoreLROnStack; OutlinerCosts(const ARMSubtarget &target) : CallTailCall(target.isThumb() ? 4 : 4), @@ -5879,6 +5868,24 @@ outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo( return outliner::OutlinedFunction(); } + // Partition the candidates in two sets: one with BTI enabled and one with BTI + // disabled. Remove the candidates from the smaller set. We expect the + // majority of the candidates to be in consensus with regard to branch target + // enforcement with just a few oddballs, but if they are the same number + // prefer the non-BTI ones for outlining, since they have less overhead. + auto NoBTI = + llvm::partition(RepeatedSequenceLocs, [](const outliner::Candidate &C) { + const ARMFunctionInfo &AFI = *C.getMF()->getInfo<ARMFunctionInfo>(); + return AFI.branchTargetEnforcement(); + }); + if (std::distance(RepeatedSequenceLocs.begin(), NoBTI) > + std::distance(NoBTI, RepeatedSequenceLocs.end())) + RepeatedSequenceLocs.erase(NoBTI, RepeatedSequenceLocs.end()); + else + RepeatedSequenceLocs.erase(RepeatedSequenceLocs.begin(), NoBTI); + if (RepeatedSequenceLocs.size() < 2) + return outliner::OutlinedFunction(); + // At this point, we have only "safe" candidates to outline. Figure out // frame + call instruction information. @@ -5892,6 +5899,16 @@ outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo( }; OutlinerCosts Costs(Subtarget); + const auto &SomeMFI = + *RepeatedSequenceLocs.front().getMF()->getInfo<ARMFunctionInfo>(); + // Adjust costs to account for the BTI instructions. + if (SomeMFI.branchTargetEnforcement()) { + Costs.FrameDefault += 4; + Costs.FrameNoLRSave += 4; + Costs.FrameRegSave += 4; + Costs.FrameTailCall += 4; + Costs.FrameThunk += 4; + } unsigned FrameID = MachineOutlinerDefault; unsigned NumBytesToCreateFrame = Costs.FrameDefault; @@ -6004,16 +6021,18 @@ bool ARMBaseInstrInfo::checkAndUpdateStackOffset(MachineInstr *MI, // Stack might be involved but addressing mode doesn't handle any offset. // Rq: AddrModeT1_[1|2|4] don't operate on SP - if (AddrMode == ARMII::AddrMode1 // Arithmetic instructions - || AddrMode == ARMII::AddrMode4 // Load/Store Multiple - || AddrMode == ARMII::AddrMode6 // Neon Load/Store Multiple - || AddrMode == ARMII::AddrModeT2_so // SP can't be used as based register - || AddrMode == ARMII::AddrModeT2_pc // PCrel access - || AddrMode == ARMII::AddrMode2 // Used by PRE and POST indexed LD/ST - || AddrMode == ARMII::AddrModeT2_i7 // v8.1-M MVE - || AddrMode == ARMII::AddrModeT2_i7s2 // v8.1-M MVE - || AddrMode == ARMII::AddrModeT2_i7s4 // v8.1-M sys regs VLDR/VSTR - || AddrMode == ARMII::AddrModeNone) + if (AddrMode == ARMII::AddrMode1 || // Arithmetic instructions + AddrMode == ARMII::AddrMode4 || // Load/Store Multiple + AddrMode == ARMII::AddrMode6 || // Neon Load/Store Multiple + AddrMode == ARMII::AddrModeT2_so || // SP can't be used as based register + AddrMode == ARMII::AddrModeT2_pc || // PCrel access + AddrMode == ARMII::AddrMode2 || // Used by PRE and POST indexed LD/ST + AddrMode == ARMII::AddrModeT2_i7 || // v8.1-M MVE + AddrMode == ARMII::AddrModeT2_i7s2 || // v8.1-M MVE + AddrMode == ARMII::AddrModeT2_i7s4 || // v8.1-M sys regs VLDR/VSTR + AddrMode == ARMII::AddrModeNone || + AddrMode == ARMII::AddrModeT2_i8 || // Pre/Post inc instructions + AddrMode == ARMII::AddrModeT2_i8neg) // Always negative imm return false; unsigned NumOps = MI->getDesc().getNumOperands(); @@ -6051,7 +6070,7 @@ bool ARMBaseInstrInfo::checkAndUpdateStackOffset(MachineInstr *MI, NumBits = 8; Scale = 2; break; - case ARMII::AddrModeT2_i8: + case ARMII::AddrModeT2_i8pos: NumBits = 8; break; case ARMII::AddrModeT2_i8s4: @@ -6089,7 +6108,18 @@ bool ARMBaseInstrInfo::checkAndUpdateStackOffset(MachineInstr *MI, } return false; +} + +void ARMBaseInstrInfo::mergeOutliningCandidateAttributes( + Function &F, std::vector<outliner::Candidate> &Candidates) const { + outliner::Candidate &C = Candidates.front(); + // branch-target-enforcement is guaranteed to be consistent between all + // candidates, so we only need to look at one. + const Function &CFn = C.getMF()->getFunction(); + if (CFn.hasFnAttribute("branch-target-enforcement")) + F.addFnAttr(CFn.getFnAttribute("branch-target-enforcement")); + ARMGenInstrInfo::mergeOutliningCandidateAttributes(F, Candidates); } bool ARMBaseInstrInfo::isFunctionSafeToOutlineFrom( diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h index db9320962e81..5fa912ae35d7 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h @@ -349,6 +349,8 @@ public: bool OutlineFromLinkOnceODRs) const override; outliner::OutlinedFunction getOutliningCandidateInfo( std::vector<outliner::Candidate> &RepeatedSequenceLocs) const override; + void mergeOutliningCandidateAttributes( + Function &F, std::vector<outliner::Candidate> &Candidates) const override; outliner::InstrType getOutliningType(MachineBasicBlock::iterator &MIT, unsigned Flags) const override; bool isMBBSafeToOutlineFrom(MachineBasicBlock &MBB, @@ -877,19 +879,23 @@ inline bool isLegalAddressImm(unsigned Opcode, int Imm, unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); switch (AddrMode) { case ARMII::AddrModeT2_i7: - return std::abs(Imm) < (((1 << 7) * 1) - 1); + return std::abs(Imm) < ((1 << 7) * 1); case ARMII::AddrModeT2_i7s2: - return std::abs(Imm) < (((1 << 7) * 2) - 1) && Imm % 2 == 0; + return std::abs(Imm) < ((1 << 7) * 2) && Imm % 2 == 0; case ARMII::AddrModeT2_i7s4: - return std::abs(Imm) < (((1 << 7) * 4) - 1) && Imm % 4 == 0; + return std::abs(Imm) < ((1 << 7) * 4) && Imm % 4 == 0; case ARMII::AddrModeT2_i8: - return std::abs(Imm) < (((1 << 8) * 1) - 1); - case ARMII::AddrMode2: - return std::abs(Imm) < (((1 << 12) * 1) - 1); - case ARMII::AddrModeT2_i12: - return Imm >= 0 && Imm < (((1 << 12) * 1) - 1); + return std::abs(Imm) < ((1 << 8) * 1); + case ARMII::AddrModeT2_i8pos: + return Imm >= 0 && Imm < ((1 << 8) * 1); + case ARMII::AddrModeT2_i8neg: + return Imm < 0 && -Imm < ((1 << 8) * 1); case ARMII::AddrModeT2_i8s4: - return std::abs(Imm) < (((1 << 8) * 4) - 1) && Imm % 4 == 0; + return std::abs(Imm) < ((1 << 8) * 4) && Imm % 4 == 0; + case ARMII::AddrModeT2_i12: + return Imm >= 0 && Imm < ((1 << 12) * 1); + case ARMII::AddrMode2: + return std::abs(Imm) < ((1 << 12) * 1); default: llvm_unreachable("Unhandled Addressing mode"); } diff --git a/llvm/lib/Target/ARM/ARMBranchTargets.cpp b/llvm/lib/Target/ARM/ARMBranchTargets.cpp new file mode 100644 index 000000000000..1091c1f970fa --- /dev/null +++ b/llvm/lib/Target/ARM/ARMBranchTargets.cpp @@ -0,0 +1,135 @@ +//===-- ARMBranchTargets.cpp -- Harden code using v8.1-M BTI extension -----==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass inserts BTI instructions at the start of every function and basic +// block which could be indirectly called. The hardware will (when enabled) +// trap when an indirect branch or call instruction targets an instruction +// which is not a valid BTI instruction. This is intended to guard against +// control-flow hijacking attacks. +// +//===----------------------------------------------------------------------===// + +#include "ARM.h" +#include "ARMInstrInfo.h" +#include "ARMMachineFunctionInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +#define DEBUG_TYPE "arm-branch-targets" +#define ARM_BRANCH_TARGETS_NAME "ARM Branch Targets" + +namespace { +class ARMBranchTargets : public MachineFunctionPass { +public: + static char ID; + ARMBranchTargets() : MachineFunctionPass(ID) {} + void getAnalysisUsage(AnalysisUsage &AU) const override; + bool runOnMachineFunction(MachineFunction &MF) override; + StringRef getPassName() const override { return ARM_BRANCH_TARGETS_NAME; } + +private: + void addBTI(const ARMInstrInfo &TII, MachineBasicBlock &MBB, bool IsFirstBB); +}; +} // end anonymous namespace + +char ARMBranchTargets::ID = 0; + +INITIALIZE_PASS(ARMBranchTargets, "arm-branch-targets", ARM_BRANCH_TARGETS_NAME, + false, false) + +void ARMBranchTargets::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +FunctionPass *llvm::createARMBranchTargetsPass() { + return new ARMBranchTargets(); +} + +bool ARMBranchTargets::runOnMachineFunction(MachineFunction &MF) { + if (!MF.getInfo<ARMFunctionInfo>()->branchTargetEnforcement()) + return false; + + LLVM_DEBUG(dbgs() << "********** ARM Branch Targets **********\n" + << "********** Function: " << MF.getName() << '\n'); + const ARMInstrInfo &TII = + *static_cast<const ARMInstrInfo *>(MF.getSubtarget().getInstrInfo()); + + // LLVM does not consider basic blocks which are the targets of jump tables + // to be address-taken (the address can't escape anywhere else), but they are + // used for indirect branches, so need BTI instructions. + SmallPtrSet<const MachineBasicBlock *, 8> JumpTableTargets; + if (const MachineJumpTableInfo *JTI = MF.getJumpTableInfo()) + for (const MachineJumpTableEntry &JTE : JTI->getJumpTables()) + for (const MachineBasicBlock *MBB : JTE.MBBs) + JumpTableTargets.insert(MBB); + + bool MadeChange = false; + for (MachineBasicBlock &MBB : MF) { + bool NeedBTI = false; + bool IsFirstBB = &MBB == &MF.front(); + + // Every function can potentially be called indirectly (even if it has + // static linkage, due to linker-generated veneers). + if (IsFirstBB) + NeedBTI = true; + + // If the block itself is address-taken, or is an exception landing pad, it + // could be indirectly branched to. + if (MBB.hasAddressTaken() || MBB.isEHPad() || JumpTableTargets.count(&MBB)) + NeedBTI = true; + + if (NeedBTI) { + addBTI(TII, MBB, IsFirstBB); + MadeChange = true; + } + } + + return MadeChange; +} + +/// Insert a BTI/PACBTI instruction into a given basic block \c MBB. If +/// \c IsFirstBB is true (meaning that this is the first BB in a function) try +/// to find a PAC instruction and replace it with PACBTI. Otherwise just insert +/// a BTI instruction. +/// The point of insertion is in the beginning of the BB, immediately after meta +/// instructions (such labels in exception handling landing pads). +void ARMBranchTargets::addBTI(const ARMInstrInfo &TII, MachineBasicBlock &MBB, + bool IsFirstBB) { + // Which instruction to insert: BTI or PACBTI + unsigned OpCode = ARM::t2BTI; + + // Skip meta instructions, including EH labels + auto MBBI = llvm::find_if_not(MBB.instrs(), [](const MachineInstr &MI) { + return MI.isMetaInstruction(); + }); + + // If this is the first BB in a function, check if it starts with a PAC + // instruction and in that case remove the PAC instruction. + if (IsFirstBB) { + if (MBBI != MBB.instr_end() && MBBI->getOpcode() == ARM::t2PAC) { + LLVM_DEBUG(dbgs() << "Removing a 'PAC' instr from BB '" << MBB.getName() + << "' to replace with PACBTI\n"); + OpCode = ARM::t2PACBTI; + auto NextMBBI = std::next(MBBI); + MBBI->eraseFromParent(); + MBBI = NextMBBI; + } + } + + LLVM_DEBUG(dbgs() << "Inserting a '" + << (OpCode == ARM::t2BTI ? "BTI" : "PACBTI") + << "' instr into BB '" << MBB.getName() << "'\n"); + // Finally, insert a new instruction (either PAC or PACBTI) + BuildMI(MBB, MBBI, MBB.findDebugLoc(MBBI), TII.get(OpCode)); +} diff --git a/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp index 121558276c3e..c2ca4708c208 100644 --- a/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -184,6 +184,9 @@ namespace { /// base address. DenseMap<int, int> JumpTableUserIndices; + // Maps a MachineBasicBlock to the number of jump tables entries. + DenseMap<const MachineBasicBlock *, int> BlockJumpTableRefCount; + /// ImmBranch - One per immediate branch, keeping the machine instruction /// pointer, conditional or unconditional, the max displacement, /// and (if isCond is true) the corresponding unconditional branch @@ -274,7 +277,10 @@ namespace { unsigned &DeadSize, bool &CanDeleteLEA, bool &BaseRegKill); bool optimizeThumb2JumpTables(); - MachineBasicBlock *adjustJTTargetBlockForward(MachineBasicBlock *BB, + void fixupBTI(unsigned JTI, MachineBasicBlock &OldBB, + MachineBasicBlock &NewBB); + MachineBasicBlock *adjustJTTargetBlockForward(unsigned JTI, + MachineBasicBlock *BB, MachineBasicBlock *JTBB); unsigned getUserOffset(CPUser&) const; @@ -518,6 +524,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { CPEntries.clear(); JumpTableEntryIndices.clear(); JumpTableUserIndices.clear(); + BlockJumpTableRefCount.clear(); ImmBranches.clear(); PushPopMIs.clear(); T2JumpTables.clear(); @@ -720,6 +727,14 @@ Align ARMConstantIslands::getCPEAlign(const MachineInstr *CPEMI) { return MCP->getConstants()[CPI].getAlign(); } +// Exception landing pads, blocks that has their adress taken, and function +// entry blocks will always be (potential) indirect jump targets, regardless of +// whether they are referenced by or not by jump tables. +static bool isAlwaysIndirectTarget(const MachineBasicBlock &MBB) { + return MBB.isEHPad() || MBB.hasAddressTaken() || + &MBB == &MBB.getParent()->front(); +} + /// scanFunctionJumpTables - Do a scan of the function, building up /// information about the sizes of each block and the locations of all /// the jump tables. @@ -730,6 +745,20 @@ void ARMConstantIslands::scanFunctionJumpTables() { (I.getOpcode() == ARM::t2BR_JT || I.getOpcode() == ARM::tBR_JTr)) T2JumpTables.push_back(&I); } + + if (!MF->getInfo<ARMFunctionInfo>()->branchTargetEnforcement()) + return; + + if (const MachineJumpTableInfo *JTI = MF->getJumpTableInfo()) + for (const MachineJumpTableEntry &JTE : JTI->getJumpTables()) + for (const MachineBasicBlock *MBB : JTE.MBBs) { + if (isAlwaysIndirectTarget(*MBB)) + // Set the reference count essentially to infinity, it will never + // reach zero and the BTI Instruction will never be removed. + BlockJumpTableRefCount[MBB] = std::numeric_limits<int>::max(); + else + ++BlockJumpTableRefCount[MBB]; + } } /// initializeFunctionInfo - Do the initial scan of the function, building up @@ -1219,9 +1248,9 @@ int ARMConstantIslands::findInRangeCPEntry(CPUser& U, unsigned UserOffset) { // Point the CPUser node to the replacement U.CPEMI = CPEs[i].CPEMI; // Change the CPI in the instruction operand to refer to the clone. - for (unsigned j = 0, e = UserMI->getNumOperands(); j != e; ++j) - if (UserMI->getOperand(j).isCPI()) { - UserMI->getOperand(j).setIndex(CPEs[i].CPI); + for (MachineOperand &MO : UserMI->operands()) + if (MO.isCPI()) { + MO.setIndex(CPEs[i].CPI); break; } // Adjust the refcount of the clone... @@ -1601,9 +1630,9 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex, BBUtils->adjustBBOffsetsAfter(&*--NewIsland->getIterator()); // Finally, change the CPI in the instruction operand to be ID. - for (unsigned i = 0, e = UserMI->getNumOperands(); i != e; ++i) - if (UserMI->getOperand(i).isCPI()) { - UserMI->getOperand(i).setIndex(ID); + for (MachineOperand &MO : UserMI->operands()) + if (MO.isCPI()) { + MO.setIndex(ID); break; } @@ -2211,8 +2240,7 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() { unsigned JTOffset = BBUtils->getOffsetOf(MI) + 4; const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs; BBInfoVector &BBInfo = BBUtils->getBBInfo(); - for (unsigned j = 0, ee = JTBBs.size(); j != ee; ++j) { - MachineBasicBlock *MBB = JTBBs[j]; + for (MachineBasicBlock *MBB : JTBBs) { unsigned DstOffset = BBInfo[MBB->getNumber()].Offset; // Negative offset is not ok. FIXME: We should change BB layout to make // sure all the branches are forward. @@ -2405,17 +2433,16 @@ bool ARMConstantIslands::reorderThumb2JumpTables() { // and try to adjust them such that that's true. int JTNumber = MI->getParent()->getNumber(); const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs; - for (unsigned j = 0, ee = JTBBs.size(); j != ee; ++j) { - MachineBasicBlock *MBB = JTBBs[j]; + for (MachineBasicBlock *MBB : JTBBs) { int DTNumber = MBB->getNumber(); if (DTNumber < JTNumber) { // The destination precedes the switch. Try to move the block forward // so we have a positive offset. MachineBasicBlock *NewBB = - adjustJTTargetBlockForward(MBB, MI->getParent()); + adjustJTTargetBlockForward(JTI, MBB, MI->getParent()); if (NewBB) - MJTI->ReplaceMBBInJumpTable(JTI, JTBBs[j], NewBB); + MJTI->ReplaceMBBInJumpTable(JTI, MBB, NewBB); MadeChange = true; } } @@ -2424,8 +2451,40 @@ bool ARMConstantIslands::reorderThumb2JumpTables() { return MadeChange; } -MachineBasicBlock *ARMConstantIslands:: -adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) { +void ARMConstantIslands::fixupBTI(unsigned JTI, MachineBasicBlock &OldBB, + MachineBasicBlock &NewBB) { + assert(isThumb2 && "BTI in Thumb1?"); + + // Insert a BTI instruction into NewBB + BuildMI(NewBB, NewBB.begin(), DebugLoc(), TII->get(ARM::t2BTI)); + + // Update jump table reference counts. + const MachineJumpTableInfo &MJTI = *MF->getJumpTableInfo(); + const MachineJumpTableEntry &JTE = MJTI.getJumpTables()[JTI]; + for (const MachineBasicBlock *MBB : JTE.MBBs) { + if (MBB != &OldBB) + continue; + --BlockJumpTableRefCount[MBB]; + ++BlockJumpTableRefCount[&NewBB]; + } + + // If the old basic block reference count dropped to zero, remove + // the BTI instruction at its beginning. + if (BlockJumpTableRefCount[&OldBB] > 0) + return; + + // Skip meta instructions + auto BTIPos = llvm::find_if_not(OldBB.instrs(), [](const MachineInstr &MI) { + return MI.isMetaInstruction(); + }); + assert(BTIPos->getOpcode() == ARM::t2BTI && + "BasicBlock is mentioned in a jump table but does start with BTI"); + if (BTIPos->getOpcode() == ARM::t2BTI) + BTIPos->eraseFromParent(); +} + +MachineBasicBlock *ARMConstantIslands::adjustJTTargetBlockForward( + unsigned JTI, MachineBasicBlock *BB, MachineBasicBlock *JTBB) { // If the destination block is terminated by an unconditional branch, // try to move it; otherwise, create a new block following the jump // table that branches back to the actual target. This is a very simple @@ -2483,6 +2542,9 @@ adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) { NewBB->addSuccessor(BB); JTBB->replaceSuccessor(BB, NewBB); + if (MF->getInfo<ARMFunctionInfo>()->branchTargetEnforcement()) + fixupBTI(JTI, *BB, *NewBB); + ++NumJTInserted; return NewBB; } diff --git a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp index a8f09969e948..7a35f252b22a 100644 --- a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -125,9 +125,8 @@ void ARMExpandPseudo::TransferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI, MachineInstrBuilder &DefMI) { const MCInstrDesc &Desc = OldMI.getDesc(); - for (unsigned i = Desc.getNumOperands(), e = OldMI.getNumOperands(); - i != e; ++i) { - const MachineOperand &MO = OldMI.getOperand(i); + for (const MachineOperand &MO : + llvm::drop_begin(OldMI.operands(), Desc.getNumOperands())) { assert(MO.isReg() && MO.getReg()); if (MO.isUse()) UseMI.add(MO); @@ -2252,8 +2251,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, .add(predOps(ARMCC::AL)) .addReg(JumpReg, RegState::Kill); - for (int I = 1, E = MI.getNumOperands(); I != E; ++I) - NewCall->addOperand(MI.getOperand(I)); + for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) + NewCall->addOperand(MO); if (MI.isCandidateForCallSiteEntry()) MI.getMF()->moveCallSiteInfo(&MI, NewCall.getInstr()); @@ -2524,17 +2523,21 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, case ARM::LDRLIT_ga_pcrel: case ARM::LDRLIT_ga_pcrel_ldr: case ARM::tLDRLIT_ga_abs: + case ARM::t2LDRLIT_ga_pcrel: case ARM::tLDRLIT_ga_pcrel: { Register DstReg = MI.getOperand(0).getReg(); bool DstIsDead = MI.getOperand(0).isDead(); const MachineOperand &MO1 = MI.getOperand(1); auto Flags = MO1.getTargetFlags(); const GlobalValue *GV = MO1.getGlobal(); - bool IsARM = - Opcode != ARM::tLDRLIT_ga_pcrel && Opcode != ARM::tLDRLIT_ga_abs; + bool IsARM = Opcode != ARM::tLDRLIT_ga_pcrel && + Opcode != ARM::tLDRLIT_ga_abs && + Opcode != ARM::t2LDRLIT_ga_pcrel; bool IsPIC = Opcode != ARM::LDRLIT_ga_abs && Opcode != ARM::tLDRLIT_ga_abs; unsigned LDRLITOpc = IsARM ? ARM::LDRi12 : ARM::tLDRpci; + if (Opcode == ARM::t2LDRLIT_ga_pcrel) + LDRLITOpc = ARM::t2LDRpci; unsigned PICAddOpc = IsARM ? (Opcode == ARM::LDRLIT_ga_pcrel_ldr ? ARM::PICLDR : ARM::PICADD) @@ -3065,7 +3068,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::BL)); } MIB.cloneMemRefs(MI); - for (unsigned i = 1; i < MI.getNumOperands(); ++i) MIB.add(MI.getOperand(i)); + for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) + MIB.add(MO); MI.eraseFromParent(); return true; } @@ -3080,8 +3084,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, Opcode == ARM::LOADDUAL ? RegState::Define : 0) .addReg(TRI->getSubReg(PairReg, ARM::gsub_1), Opcode == ARM::LOADDUAL ? RegState::Define : 0); - for (unsigned i = 1; i < MI.getNumOperands(); i++) - MIB.add(MI.getOperand(i)); + for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) + MIB.add(MO); MIB.add(predOps(ARMCC::AL)); MIB.cloneMemRefs(MI); MI.eraseFromParent(); diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp index 025e43444f9c..b866cf952ff1 100644 --- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp +++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp @@ -523,9 +523,9 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, } // Determine spill area sizes. - for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - unsigned Reg = CSI[i].getReg(); - int FI = CSI[i].getFrameIdx(); + for (const CalleeSavedInfo &I : CSI) { + unsigned Reg = I.getReg(); + int FI = I.getFrameIdx(); switch (Reg) { case ARM::R8: case ARM::R9: @@ -1317,11 +1317,11 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB, // Mark the D-register spill slots as properly aligned. Since MFI computes // stack slot layout backwards, this can actually mean that the d-reg stack // slot offsets can be wrong. The offset for d8 will always be correct. - for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - unsigned DNum = CSI[i].getReg() - ARM::D8; + for (const CalleeSavedInfo &I : CSI) { + unsigned DNum = I.getReg() - ARM::D8; if (DNum > NumAlignedDPRCS2Regs - 1) continue; - int FI = CSI[i].getFrameIdx(); + int FI = I.getFrameIdx(); // The even-numbered registers will be 16-byte aligned, the odd-numbered // registers will be 8-byte aligned. MFI.setObjectAlignment(FI, DNum % 2 ? Align(8) : Align(16)); @@ -1488,9 +1488,9 @@ static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB, // Find the frame index assigned to d8. int D8SpillFI = 0; - for (unsigned i = 0, e = CSI.size(); i != e; ++i) - if (CSI[i].getReg() == ARM::D8) { - D8SpillFI = CSI[i].getFrameIdx(); + for (const CalleeSavedInfo &I : CSI) + if (I.getReg() == ARM::D8) { + D8SpillFI = I.getFrameIdx(); break; } @@ -1693,7 +1693,7 @@ static unsigned estimateRSStackSizeLimit(MachineFunction &MF, // Default 12 bit limit. break; case ARMII::AddrMode3: - case ARMII::AddrModeT2_i8: + case ARMII::AddrModeT2_i8neg: Limit = std::min(Limit, (1U << 8) - 1); break; case ARMII::AddrMode5FP16: diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp index 2b83a292db76..bb2859c766c2 100644 --- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -3274,7 +3274,8 @@ bool ARMDAGToDAGISel::tryFP_TO_INT(SDNode *N, SDLoc dl) { return false; unsigned int ScalarBits = Type.getScalarSizeInBits(); - bool IsUnsigned = N->getOpcode() == ISD::FP_TO_UINT; + bool IsUnsigned = N->getOpcode() == ISD::FP_TO_UINT || + N->getOpcode() == ISD::FP_TO_UINT_SAT; SDNode *Node = N->getOperand(0).getNode(); // floating-point to fixed-point with one fractional bit gets turned into an @@ -3764,6 +3765,8 @@ void ARMDAGToDAGISel::Select(SDNode *N) { break; case ISD::FP_TO_UINT: case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT_SAT: + case ISD::FP_TO_SINT_SAT: if (tryFP_TO_INT(N, dl)) return; break; diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index e7e10ce07a44..33d115945614 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -1016,6 +1016,10 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setTargetDAGCombine(ISD::FP_EXTEND); setTargetDAGCombine(ISD::SELECT); setTargetDAGCombine(ISD::SELECT_CC); + setTargetDAGCombine(ISD::SETCC); + } + if (Subtarget->hasMVEFloatOps()) { + setTargetDAGCombine(ISD::FADD); } if (!Subtarget->hasFP64()) { @@ -10587,10 +10591,9 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, LPadList.reserve(CallSiteNumToLPad.size()); for (unsigned I = 1; I <= MaxCSNum; ++I) { SmallVectorImpl<MachineBasicBlock*> &MBBList = CallSiteNumToLPad[I]; - for (SmallVectorImpl<MachineBasicBlock*>::iterator - II = MBBList.begin(), IE = MBBList.end(); II != IE; ++II) { - LPadList.push_back(*II); - InvokeBBs.insert((*II)->pred_begin(), (*II)->pred_end()); + for (MachineBasicBlock *MBB : MBBList) { + LPadList.push_back(MBB); + InvokeBBs.insert(MBB->pred_begin(), MBB->pred_end()); } } @@ -10879,9 +10882,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, // Add the jump table entries as successors to the MBB. SmallPtrSet<MachineBasicBlock*, 8> SeenMBBs; - for (std::vector<MachineBasicBlock*>::iterator - I = LPadList.begin(), E = LPadList.end(); I != E; ++I) { - MachineBasicBlock *CurMBB = *I; + for (MachineBasicBlock *CurMBB : LPadList) { if (SeenMBBs.insert(CurMBB).second) DispContBB->addSuccessor(CurMBB); } @@ -10943,9 +10944,8 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, // Mark all former landing pads as non-landing pads. The dispatch is the only // landing pad now. - for (SmallVectorImpl<MachineBasicBlock*>::iterator - I = MBBLPads.begin(), E = MBBLPads.end(); I != E; ++I) - (*I)->setIsEHPad(false); + for (MachineBasicBlock *MBBLPad : MBBLPads) + MBBLPad->setIsEHPad(false); // The instruction is gone now. MI.eraseFromParent(); @@ -11771,8 +11771,8 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, case ARM::STRH_preidx: NewOpc = ARM::STRH_PRE; break; } MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc)); - for (unsigned i = 0; i < MI.getNumOperands(); ++i) - MIB.add(MI.getOperand(i)); + for (const MachineOperand &MO : MI.operands()) + MIB.add(MO); MI.eraseFromParent(); return BB; } @@ -13083,6 +13083,65 @@ static SDValue PerformVSELECTCombine(SDNode *N, return DCI.DAG.getNode(ISD::VSELECT, SDLoc(N), Type, Cond, RHS, LHS); } +// Convert vsetcc([0,1,2,..], splat(n), ult) -> vctp n +static SDValue PerformVSetCCToVCTPCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); + EVT VT = N->getValueType(0); + + if (!Subtarget->hasMVEIntegerOps() || + !DCI.DAG.getTargetLoweringInfo().isTypeLegal(VT)) + return SDValue(); + + if (CC == ISD::SETUGE) { + std::swap(Op0, Op1); + CC = ISD::SETULT; + } + + if (CC != ISD::SETULT || VT.getScalarSizeInBits() != 1 || + Op0.getOpcode() != ISD::BUILD_VECTOR) + return SDValue(); + + // Check first operand is BuildVector of 0,1,2,... + for (unsigned I = 0; I < VT.getVectorNumElements(); I++) { + if (!Op0.getOperand(I).isUndef() && + !(isa<ConstantSDNode>(Op0.getOperand(I)) && + Op0.getConstantOperandVal(I) == I)) + return SDValue(); + } + + // The second is a Splat of Op1S + SDValue Op1S = DCI.DAG.getSplatValue(Op1); + if (!Op1S) + return SDValue(); + + unsigned Opc; + switch (VT.getVectorNumElements()) { + case 2: + Opc = Intrinsic::arm_mve_vctp64; + break; + case 4: + Opc = Intrinsic::arm_mve_vctp32; + break; + case 8: + Opc = Intrinsic::arm_mve_vctp16; + break; + case 16: + Opc = Intrinsic::arm_mve_vctp8; + break; + default: + return SDValue(); + } + + SDLoc DL(N); + return DCI.DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, + DCI.DAG.getConstant(Opc, DL, MVT::i32), + DCI.DAG.getZExtOrTrunc(Op1S, DL, MVT::i32)); +} + static SDValue PerformABSCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { @@ -13427,6 +13486,26 @@ bool ARMTargetLowering::preferIncOfAddToSubOfNot(EVT VT) const { return VT.isScalarInteger(); } +bool ARMTargetLowering::shouldConvertFpToSat(unsigned Op, EVT FPVT, + EVT VT) const { + if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple()) + return false; + + switch (FPVT.getSimpleVT().SimpleTy) { + case MVT::f16: + return Subtarget->hasVFP2Base(); + case MVT::f32: + return Subtarget->hasVFP2Base(); + case MVT::f64: + return Subtarget->hasFP64(); + case MVT::v4f32: + case MVT::v8f16: + return Subtarget->hasMVEFloatOps(); + default: + return false; + } +} + static SDValue PerformSHLSimplify(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST) { @@ -14485,6 +14564,52 @@ static SDValue PerformBFICombine(SDNode *N, SelectionDAG &DAG) { return SDValue(); } +// Check that N is CMPZ(CSINC(0, 0, CC, X)), return X if valid. +static SDValue IsCMPZCSINC(SDNode *Cmp, ARMCC::CondCodes &CC) { + if (Cmp->getOpcode() != ARMISD::CMPZ || !isNullConstant(Cmp->getOperand(1))) + return SDValue(); + SDValue CSInc = Cmp->getOperand(0); + if (CSInc.getOpcode() != ARMISD::CSINC || + !isNullConstant(CSInc.getOperand(0)) || + !isNullConstant(CSInc.getOperand(1)) || !CSInc->hasOneUse()) + return SDValue(); + CC = (ARMCC::CondCodes)CSInc.getConstantOperandVal(2); + return CSInc.getOperand(3); +} + +static SDValue PerformCMPZCombine(SDNode *N, SelectionDAG &DAG) { + // Given CMPZ(CSINC(C, 0, 0, EQ), 0), we can just use C directly. As in + // t92: glue = ARMISD::CMPZ t74, 0 + // t93: i32 = ARMISD::CSINC 0, 0, 1, t92 + // t96: glue = ARMISD::CMPZ t93, 0 + // t114: i32 = ARMISD::CSINV 0, 0, 0, t96 + ARMCC::CondCodes Cond; + if (SDValue C = IsCMPZCSINC(N, Cond)) + if (Cond == ARMCC::EQ) + return C; + return SDValue(); +} + +static SDValue PerformCSETCombine(SDNode *N, SelectionDAG &DAG) { + // Fold away an unneccessary CMPZ/CSINC + // CSXYZ A, B, C1 (CMPZ (CSINC 0, 0, C2, D), 0) -> + // if C1==EQ -> CSXYZ A, B, C2, D + // if C1==NE -> CSXYZ A, B, NOT(C2), D + ARMCC::CondCodes Cond; + if (SDValue C = IsCMPZCSINC(N->getOperand(3).getNode(), Cond)) { + if (N->getConstantOperandVal(2) == ARMCC::EQ) + return DAG.getNode(N->getOpcode(), SDLoc(N), MVT::i32, N->getOperand(0), + N->getOperand(1), + DAG.getConstant(Cond, SDLoc(N), MVT::i32), C); + if (N->getConstantOperandVal(2) == ARMCC::NE) + return DAG.getNode( + N->getOpcode(), SDLoc(N), MVT::i32, N->getOperand(0), + N->getOperand(1), + DAG.getConstant(ARMCC::getOppositeCondition(Cond), SDLoc(N), MVT::i32), C); + } + return SDValue(); +} + /// PerformVMOVRRDCombine - Target-specific dag combine xforms for /// ARMISD::VMOVRRD. static SDValue PerformVMOVRRDCombine(SDNode *N, @@ -16411,6 +16536,42 @@ static SDValue PerformVCVTCombine(SDNode *N, SelectionDAG &DAG, return FixConv; } +static SDValue PerformFAddVSelectCombine(SDNode *N, SelectionDAG &DAG, + const ARMSubtarget *Subtarget) { + if (!Subtarget->hasMVEFloatOps()) + return SDValue(); + + // Turn (fadd x, (vselect c, y, -0.0)) into (vselect c, (fadd x, y), x) + // The second form can be more easily turned into a predicated vadd, and + // possibly combined into a fma to become a predicated vfma. + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + EVT VT = N->getValueType(0); + SDLoc DL(N); + + // The identity element for a fadd is -0.0, which these VMOV's represent. + auto isNegativeZeroSplat = [&](SDValue Op) { + if (Op.getOpcode() != ISD::BITCAST || + Op.getOperand(0).getOpcode() != ARMISD::VMOVIMM) + return false; + if (VT == MVT::v4f32 && Op.getOperand(0).getConstantOperandVal(0) == 1664) + return true; + if (VT == MVT::v8f16 && Op.getOperand(0).getConstantOperandVal(0) == 2688) + return true; + return false; + }; + + if (Op0.getOpcode() == ISD::VSELECT && Op1.getOpcode() != ISD::VSELECT) + std::swap(Op0, Op1); + + if (Op1.getOpcode() != ISD::VSELECT || + !isNegativeZeroSplat(Op1.getOperand(2))) + return SDValue(); + SDValue FAdd = + DAG.getNode(ISD::FADD, DL, VT, Op0, Op1.getOperand(1), N->getFlags()); + return DAG.getNode(ISD::VSELECT, DL, VT, Op1.getOperand(0), FAdd, Op0); +} + /// PerformVDIVCombine - VCVT (fixed-point to floating-point, Advanced SIMD) /// can replace combinations of VCVT (integer to floating-point) and VDIV /// when the VDIV has a constant operand that is a power of 2. @@ -17049,18 +17210,6 @@ static SDValue PerformShiftCombine(SDNode *N, const ARMSubtarget *ST) { SelectionDAG &DAG = DCI.DAG; EVT VT = N->getValueType(0); - if (N->getOpcode() == ISD::SRL && VT == MVT::i32 && ST->hasV6Ops()) { - // Canonicalize (srl (bswap x), 16) to (rotr (bswap x), 16) if the high - // 16-bits of x is zero. This optimizes rev + lsr 16 to rev16. - SDValue N1 = N->getOperand(1); - if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) { - SDValue N0 = N->getOperand(0); - if (C->getZExtValue() == 16 && N0.getOpcode() == ISD::BSWAP && - DAG.MaskedValueIsZero(N0.getOperand(0), - APInt::getHighBitsSet(32, 16))) - return DAG.getNode(ISD::ROTR, SDLoc(N), VT, N0, N1); - } - } if (ST->isThumb1Only() && N->getOpcode() == ISD::SHL && VT == MVT::i32 && N->getOperand(0)->getOpcode() == ISD::AND && @@ -18173,6 +18322,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, case ISD::SELECT_CC: case ISD::SELECT: return PerformSELECTCombine(N, DCI, Subtarget); case ISD::VSELECT: return PerformVSELECTCombine(N, DCI, Subtarget); + case ISD::SETCC: return PerformVSetCCToVCTPCombine(N, DCI, Subtarget); case ISD::ABS: return PerformABSCombine(N, DCI, Subtarget); case ARMISD::ADDE: return PerformADDECombine(N, DCI, Subtarget); case ARMISD::UMLAL: return PerformUMLALCombine(N, DCI.DAG, Subtarget); @@ -18205,6 +18355,8 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: return PerformVCVTCombine(N, DCI.DAG, Subtarget); + case ISD::FADD: + return PerformFAddVSelectCombine(N, DCI.DAG, Subtarget); case ISD::FDIV: return PerformVDIVCombine(N, DCI.DAG, Subtarget); case ISD::INTRINSIC_WO_CHAIN: @@ -18228,6 +18380,12 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, return PerformCMOVCombine(N, DCI.DAG); case ARMISD::BRCOND: return PerformBRCONDCombine(N, DCI.DAG); + case ARMISD::CMPZ: + return PerformCMPZCombine(N, DCI.DAG); + case ARMISD::CSINC: + case ARMISD::CSINV: + case ARMISD::CSNEG: + return PerformCSETCombine(N, DCI.DAG); case ISD::LOAD: return PerformLOADCombine(N, DCI, Subtarget); case ARMISD::VLD1DUP: diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h index 0fddd58e178e..e3b422358cae 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -736,6 +736,8 @@ class VectorType; bool preferIncOfAddToSubOfNot(EVT VT) const override; + bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override; + protected: std::pair<const TargetRegisterClass *, uint8_t> findRepresentativeClass(const TargetRegisterInfo *TRI, diff --git a/llvm/lib/Target/ARM/ARMInstrFormats.td b/llvm/lib/Target/ARM/ARMInstrFormats.td index de351372abf2..ff5afd787c82 100644 --- a/llvm/lib/Target/ARM/ARMInstrFormats.td +++ b/llvm/lib/Target/ARM/ARMInstrFormats.td @@ -103,15 +103,17 @@ def AddrModeT1_4 : AddrMode<9>; def AddrModeT1_s : AddrMode<10>; def AddrModeT2_i12 : AddrMode<11>; def AddrModeT2_i8 : AddrMode<12>; -def AddrModeT2_so : AddrMode<13>; -def AddrModeT2_pc : AddrMode<14>; -def AddrModeT2_i8s4 : AddrMode<15>; -def AddrMode_i12 : AddrMode<16>; -def AddrMode5FP16 : AddrMode<17>; -def AddrModeT2_ldrex : AddrMode<18>; -def AddrModeT2_i7s4 : AddrMode<19>; -def AddrModeT2_i7s2 : AddrMode<20>; -def AddrModeT2_i7 : AddrMode<21>; +def AddrModeT2_i8pos : AddrMode<13>; +def AddrModeT2_i8neg : AddrMode<14>; +def AddrModeT2_so : AddrMode<15>; +def AddrModeT2_pc : AddrMode<16>; +def AddrModeT2_i8s4 : AddrMode<17>; +def AddrMode_i12 : AddrMode<18>; +def AddrMode5FP16 : AddrMode<19>; +def AddrModeT2_ldrex : AddrMode<20>; +def AddrModeT2_i7s4 : AddrMode<21>; +def AddrModeT2_i7s2 : AddrMode<22>; +def AddrModeT2_i7 : AddrMode<23>; // Load / store index mode. class IndexMode<bits<2> val> { @@ -1392,9 +1394,12 @@ class T2I<dag oops, dag iops, InstrItinClass itin, class T2Ii12<dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> : Thumb2I<oops, iops, AddrModeT2_i12, 4, itin, opc, asm, "",pattern>; -class T2Ii8<dag oops, dag iops, InstrItinClass itin, - string opc, string asm, list<dag> pattern> - : Thumb2I<oops, iops, AddrModeT2_i8, 4, itin, opc, asm, "", pattern>; +class T2Ii8p<dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list<dag> pattern> + : Thumb2I<oops, iops, AddrModeT2_i8pos, 4, itin, opc, asm, "", pattern>; +class T2Ii8n<dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list<dag> pattern> + : Thumb2I<oops, iops, AddrModeT2_i8neg, 4, itin, opc, asm, "", pattern>; class T2Iso<dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> : Thumb2I<oops, iops, AddrModeT2_so, 4, itin, opc, asm, "", pattern>; diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td index 7d0bc756e882..1c1db473f866 100644 --- a/llvm/lib/Target/ARM/ARMInstrInfo.td +++ b/llvm/lib/Target/ARM/ARMInstrInfo.td @@ -420,6 +420,12 @@ def lo16AllZero : PatLeaf<(i32 imm), [{ return (((uint32_t)N->getZExtValue()) & 0xFFFFUL) == 0; }], hi16>; +// top16Zero - answer true if the upper 16 bits of $src are 0, false otherwise +def top16Zero: PatLeaf<(i32 GPR:$src), [{ + return !SDValue(N,0)->getValueType(0).isVector() && + CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(32, 16)); + }]>; + class BinOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS), res>; class UnOpFrag <dag res> : PatFrag<(ops node:$Src), res>; @@ -4748,6 +4754,8 @@ def : ARMV6Pat<(srl (bswap (extloadi16 addrmode3:$addr)), (i32 16)), (REV16 (LDRH addrmode3:$addr))>; def : ARMV6Pat<(truncstorei16 (srl (bswap GPR:$Rn), (i32 16)), addrmode3:$addr), (STRH (REV16 GPR:$Rn), addrmode3:$addr)>; +def : ARMV6Pat<(srl (bswap top16Zero:$Rn), (i32 16)), + (REV16 GPR:$Rn)>; let AddedComplexity = 5 in def REVSH : AMiscA1I<0b01101111, 0b1011, (outs GPR:$Rd), (ins GPR:$Rm), diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td index 697730037277..f53814a80e01 100644 --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -3621,21 +3621,24 @@ class MVE_VMUL_fp<string iname, string suffix, bits<2> size, list<dag> pattern=[ let validForTailPredication = 1; } -multiclass MVE_VMULT_fp_m<string iname, MVEVectorVTInfo VTI, - SDNode Op, Intrinsic PredInt> { +multiclass MVE_VMULT_fp_m<string iname, MVEVectorVTInfo VTI, SDNode Op, + Intrinsic PredInt, SDPatternOperator IdentityVec> { def "" : MVE_VMUL_fp<iname, VTI.Suffix, VTI.Size>; defvar Inst = !cast<Instruction>(NAME); let Predicates = [HasMVEFloat] in { - defm : MVE_TwoOpPattern<VTI, Op, PredInt, (? ), !cast<Instruction>(NAME)>; + defm : MVE_TwoOpPattern<VTI, Op, PredInt, (? ), !cast<Instruction>(NAME), IdentityVec>; } } -multiclass MVE_VMUL_fp_m<MVEVectorVTInfo VTI> - : MVE_VMULT_fp_m<"vmul", VTI, fmul, int_arm_mve_mul_predicated>; +multiclass MVE_VMUL_fp_m<MVEVectorVTInfo VTI, SDPatternOperator IdentityVec> + : MVE_VMULT_fp_m<"vmul", VTI, fmul, int_arm_mve_mul_predicated, IdentityVec>; + +def ARMimmOneF: PatLeaf<(bitconvert (v4f32 (ARMvmovFPImm (i32 112))))>; // 1.0 float +def ARMimmOneH: PatLeaf<(bitconvert (v8i16 (ARMvmovImm (i32 2620))))>; // 1.0 half -defm MVE_VMULf32 : MVE_VMUL_fp_m<MVE_v4f32>; -defm MVE_VMULf16 : MVE_VMUL_fp_m<MVE_v8f16>; +defm MVE_VMULf32 : MVE_VMUL_fp_m<MVE_v4f32, ARMimmOneF>; +defm MVE_VMULf16 : MVE_VMUL_fp_m<MVE_v8f16, ARMimmOneH>; class MVE_VCMLA<string suffix, bits<2> size> : MVEFloatArithNeon<"vcmla", suffix, size{1}, (outs MQPR:$Qd), @@ -3747,27 +3750,30 @@ defm MVE_VFMSf32 : MVE_VFMA_fp_multi<"vfms", 1, MVE_v4f32>; defm MVE_VFMSf16 : MVE_VFMA_fp_multi<"vfms", 1, MVE_v8f16>; multiclass MVE_VADDSUB_fp_m<string iname, bit bit_21, MVEVectorVTInfo VTI, - SDNode Op, Intrinsic PredInt> { + SDNode Op, Intrinsic PredInt, SDPatternOperator IdentityVec> { def "" : MVE_VADDSUBFMA_fp<iname, VTI.Suffix, VTI.Size, 0, 1, bit_21> { let validForTailPredication = 1; } defvar Inst = !cast<Instruction>(NAME); let Predicates = [HasMVEFloat] in { - defm : MVE_TwoOpPattern<VTI, Op, PredInt, (? ), !cast<Instruction>(NAME)>; + defm : MVE_TwoOpPattern<VTI, Op, PredInt, (? ), !cast<Instruction>(NAME), IdentityVec>; } } -multiclass MVE_VADD_fp_m<MVEVectorVTInfo VTI> - : MVE_VADDSUB_fp_m<"vadd", 0, VTI, fadd, int_arm_mve_add_predicated>; -multiclass MVE_VSUB_fp_m<MVEVectorVTInfo VTI> - : MVE_VADDSUB_fp_m<"vsub", 1, VTI, fsub, int_arm_mve_sub_predicated>; +multiclass MVE_VADD_fp_m<MVEVectorVTInfo VTI, SDPatternOperator IdentityVec> + : MVE_VADDSUB_fp_m<"vadd", 0, VTI, fadd, int_arm_mve_add_predicated, IdentityVec>; +multiclass MVE_VSUB_fp_m<MVEVectorVTInfo VTI, SDPatternOperator IdentityVec> + : MVE_VADDSUB_fp_m<"vsub", 1, VTI, fsub, int_arm_mve_sub_predicated, IdentityVec>; -defm MVE_VADDf32 : MVE_VADD_fp_m<MVE_v4f32>; -defm MVE_VADDf16 : MVE_VADD_fp_m<MVE_v8f16>; +def ARMimmMinusZeroF: PatLeaf<(bitconvert (v4i32 (ARMvmovImm (i32 1664))))>; // -0.0 float +def ARMimmMinusZeroH: PatLeaf<(bitconvert (v8i16 (ARMvmovImm (i32 2688))))>; // -0.0 half -defm MVE_VSUBf32 : MVE_VSUB_fp_m<MVE_v4f32>; -defm MVE_VSUBf16 : MVE_VSUB_fp_m<MVE_v8f16>; +defm MVE_VADDf32 : MVE_VADD_fp_m<MVE_v4f32, ARMimmMinusZeroF>; +defm MVE_VADDf16 : MVE_VADD_fp_m<MVE_v8f16, ARMimmMinusZeroH>; + +defm MVE_VSUBf32 : MVE_VSUB_fp_m<MVE_v4f32, ARMimmAllZerosV>; +defm MVE_VSUBf16 : MVE_VSUB_fp_m<MVE_v8f16, ARMimmAllZerosV>; class MVE_VCADD<string suffix, bits<2> size, string cstr=""> : MVEFloatArithNeon<"vcadd", suffix, size{1}, (outs MQPR:$Qd), @@ -5373,22 +5379,22 @@ defm MVE_VHSUB_qr_u16 : MVE_VHSUB_qr_m<MVE_v8u16>; defm MVE_VHSUB_qr_u32 : MVE_VHSUB_qr_m<MVE_v4u32>; multiclass MVE_VADDSUB_qr_f<string iname, MVEVectorVTInfo VTI, bit subtract, - SDNode Op, Intrinsic PredInt> { + SDNode Op, Intrinsic PredInt, SDPatternOperator IdentityVec> { def "" : MVE_VxADDSUB_qr<iname, VTI.Suffix, VTI.Size{0}, 0b11, subtract, VTI.Size>; defm : MVE_TwoOpPatternDup<VTI, Op, PredInt, (? ), - !cast<Instruction>(NAME)>; + !cast<Instruction>(NAME), IdentityVec>; } let Predicates = [HasMVEFloat] in { defm MVE_VADD_qr_f32 : MVE_VADDSUB_qr_f<"vadd", MVE_v4f32, 0b0, fadd, - int_arm_mve_add_predicated>; + int_arm_mve_add_predicated, ARMimmMinusZeroF>; defm MVE_VADD_qr_f16 : MVE_VADDSUB_qr_f<"vadd", MVE_v8f16, 0b0, fadd, - int_arm_mve_add_predicated>; + int_arm_mve_add_predicated, ARMimmMinusZeroH>; defm MVE_VSUB_qr_f32 : MVE_VADDSUB_qr_f<"vsub", MVE_v4f32, 0b1, fsub, - int_arm_mve_sub_predicated>; + int_arm_mve_sub_predicated, ARMimmAllZerosV>; defm MVE_VSUB_qr_f16 : MVE_VADDSUB_qr_f<"vsub", MVE_v8f16, 0b1, fsub, - int_arm_mve_sub_predicated>; + int_arm_mve_sub_predicated, ARMimmAllZerosV>; } class MVE_VxSHL_qr<string iname, string suffix, bit U, bits<2> size, @@ -5567,16 +5573,16 @@ defm MVE_VQRDMULH_qr_s8 : MVE_VQRDMULH_qr_m<MVE_v16s8>; defm MVE_VQRDMULH_qr_s16 : MVE_VQRDMULH_qr_m<MVE_v8s16>; defm MVE_VQRDMULH_qr_s32 : MVE_VQRDMULH_qr_m<MVE_v4s32>; -multiclass MVE_VxxMUL_qr_f_m<MVEVectorVTInfo VTI> { +multiclass MVE_VxxMUL_qr_f_m<MVEVectorVTInfo VTI, SDPatternOperator IdentityVec> { let validForTailPredication = 1 in def "" : MVE_VxxMUL_qr<"vmul", VTI.Suffix, VTI.Size{0}, 0b11, VTI.Size>; defm : MVE_TwoOpPatternDup<VTI, fmul, int_arm_mve_mul_predicated, (? ), - !cast<Instruction>(NAME)>; + !cast<Instruction>(NAME), IdentityVec>; } let Predicates = [HasMVEFloat] in { - defm MVE_VMUL_qr_f16 : MVE_VxxMUL_qr_f_m<MVE_v8f16>; - defm MVE_VMUL_qr_f32 : MVE_VxxMUL_qr_f_m<MVE_v4f32>; + defm MVE_VMUL_qr_f16 : MVE_VxxMUL_qr_f_m<MVE_v8f16, ARMimmOneH>; + defm MVE_VMUL_qr_f32 : MVE_VxxMUL_qr_f_m<MVE_v4f32, ARMimmOneF>; } class MVE_VFMAMLA_qr<string iname, string suffix, diff --git a/llvm/lib/Target/ARM/ARMInstrThumb.td b/llvm/lib/Target/ARM/ARMInstrThumb.td index bf717a4056e9..f09ad8167600 100644 --- a/llvm/lib/Target/ARM/ARMInstrThumb.td +++ b/llvm/lib/Target/ARM/ARMInstrThumb.td @@ -1576,6 +1576,8 @@ def : T1Pat<(srl (bswap (extloadi16 t_addrmode_is2:$addr)), (i32 16)), (tREV16 (tLDRHi t_addrmode_is2:$addr))>; def : T1Pat<(srl (bswap (extloadi16 t_addrmode_rr:$addr)), (i32 16)), (tREV16 (tLDRHr t_addrmode_rr:$addr))>; +def : T1Pat<(srl (bswap top16Zero:$Rn), (i32 16)), + (tREV16 tGPR:$Rn)>; def : T1Pat<(truncstorei16 (srl (bswap tGPR:$Rn), (i32 16)), t_addrmode_is2:$addr), (tSTRHi(tREV16 tGPR:$Rn), t_addrmode_is2:$addr)>; diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td index 783db9dde17f..4471317f4ea4 100644 --- a/llvm/lib/Target/ARM/ARMInstrThumb2.td +++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td @@ -1191,9 +1191,9 @@ multiclass T2I_ld<bit signed, bits<2> opcod, string opc, let DecoderMethod = "DecodeT2LoadImm12"; } - def i8 : T2Ii8 <(outs target:$Rt), (ins t2addrmode_negimm8:$addr), iii, - opc, "\t$Rt, $addr", - [(set target:$Rt, (opnode t2addrmode_negimm8:$addr))]>, + def i8 : T2Ii8n <(outs target:$Rt), (ins t2addrmode_negimm8:$addr), iii, + opc, "\t$Rt, $addr", + [(set target:$Rt, (opnode t2addrmode_negimm8:$addr))]>, Sched<[WriteLd]> { bits<4> Rt; bits<13> addr; @@ -1284,9 +1284,9 @@ multiclass T2I_st<bits<2> opcod, string opc, let Inst{23} = addr{12}; // U let Inst{11-0} = addr{11-0}; // imm } - def i8 : T2Ii8 <(outs), (ins target:$Rt, t2addrmode_negimm8:$addr), iii, - opc, "\t$Rt, $addr", - [(opnode target:$Rt, t2addrmode_negimm8:$addr)]>, + def i8 : T2Ii8n <(outs), (ins target:$Rt, t2addrmode_negimm8:$addr), iii, + opc, "\t$Rt, $addr", + [(opnode target:$Rt, t2addrmode_negimm8:$addr)]>, Sched<[WriteST]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0000; @@ -1580,8 +1580,8 @@ def t2LDR_POST_imm : t2AsmPseudo<"ldr${p}.w $Rt, $Rn, $imm", // LDRT, LDRBT, LDRHT, LDRSBT, LDRSHT all have offset mode (PUW=0b110). // Ref: A8.6.57 LDR (immediate, Thumb) Encoding T4 class T2IldT<bit signed, bits<2> type, string opc, InstrItinClass ii> - : T2Ii8<(outs rGPR:$Rt), (ins t2addrmode_posimm8:$addr), ii, opc, - "\t$Rt, $addr", []>, Sched<[WriteLd]> { + : T2Ii8p<(outs rGPR:$Rt), (ins t2addrmode_posimm8:$addr), ii, opc, + "\t$Rt, $addr", []>, Sched<[WriteLd]> { bits<4> Rt; bits<13> addr; let Inst{31-27} = 0b11111; @@ -1747,8 +1747,8 @@ def t2STR_POST_imm : t2AsmPseudo<"str${p}.w $Rt, $Rn, $imm", // only. // Ref: A8.6.193 STR (immediate, Thumb) Encoding T4 class T2IstT<bits<2> type, string opc, InstrItinClass ii> - : T2Ii8<(outs), (ins rGPR:$Rt, t2addrmode_imm8:$addr), ii, opc, - "\t$Rt, $addr", []>, Sched<[WriteST]> { + : T2Ii8p<(outs), (ins rGPR:$Rt, t2addrmode_posimm8:$addr), ii, opc, + "\t$Rt, $addr", []>, Sched<[WriteST]> { let Inst{31-27} = 0b11111; let Inst{26-25} = 0b00; let Inst{24} = 0; // not signed @@ -1851,8 +1851,8 @@ multiclass T2Ipl<bits<1> write, bits<1> instr, string opc> { let DecoderMethod = "DecodeT2LoadImm12"; } - def i8 : T2Ii8<(outs), (ins t2addrmode_negimm8:$addr), IIC_Preload, opc, - "\t$addr", + def i8 : T2Ii8n<(outs), (ins t2addrmode_negimm8:$addr), IIC_Preload, opc, + "\t$addr", [(ARMPreload t2addrmode_negimm8:$addr, (i32 write), (i32 instr))]>, Sched<[WritePreLd]> { let Inst{31-25} = 0b1111100; @@ -2926,18 +2926,11 @@ let AddedComplexity = 1 in def : T2Pat<(and rGPR:$src, t2_so_imm_not:$imm), (t2BICri rGPR:$src, t2_so_imm_not:$imm)>; -// top16Zero - answer true if the upper 16 bits of $src are 0, false otherwise -def top16Zero: PatLeaf<(i32 rGPR:$src), [{ - return !SDValue(N,0)->getValueType(0).isVector() && - CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(32, 16)); - }]>; - // so_imm_notSext is needed instead of so_imm_not, as the value of imm // will match the extended, not the original bitWidth for $src. def : T2Pat<(and top16Zero:$src, t2_so_imm_notSext:$imm), (t2BICri rGPR:$src, t2_so_imm_notSext:$imm)>; - // FIXME: Disable this pattern on Darwin to workaround an assembler bug. def : T2Pat<(or rGPR:$src, t2_so_imm_not:$imm), (t2ORNri rGPR:$src, t2_so_imm_not:$imm)>, @@ -3283,6 +3276,9 @@ def t2REV16 : T2I_misc<0b01, 0b01, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr, [(set rGPR:$Rd, (rotr (bswap rGPR:$Rm), (i32 16)))]>, Sched<[WriteALU]>; +def : T2Pat<(srl (bswap top16Zero:$Rn), (i32 16)), + (t2REV16 rGPR:$Rn)>; + def t2REVSH : T2I_misc<0b01, 0b11, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr, "revsh", ".w\t$Rd, $Rm", [(set rGPR:$Rd, (sra (bswap rGPR:$Rm), (i32 16)))]>, @@ -4059,6 +4055,8 @@ def t2HINT : T2I<(outs), (ins imm0_239:$imm), NoItinerary, "hint", ".w\t$imm", bits<8> imm; let Inst{31-3} = 0b11110011101011111000000000000; let Inst{7-0} = imm; + + let DecoderMethod = "DecodeT2HintSpaceInstruction"; } def : t2InstAlias<"hint$p $imm", (t2HINT imm0_239:$imm, pred:$p), 0>; @@ -4079,6 +4077,11 @@ def : t2InstAlias<"esb$p", (t2HINT 16, pred:$p), 0> { def : t2InstAlias<"csdb$p.w", (t2HINT 20, pred:$p), 0>; def : t2InstAlias<"csdb$p", (t2HINT 20, pred:$p), 1>; +def : t2InstAlias<"pacbti$p r12,lr,sp", (t2HINT 13, pred:$p), 1>; +def : t2InstAlias<"bti$p", (t2HINT 15, pred:$p), 1>; +def : t2InstAlias<"pac$p r12,lr,sp", (t2HINT 29, pred:$p), 1>; +def : t2InstAlias<"aut$p r12,lr,sp", (t2HINT 45, pred:$p), 1>; + def t2DBG : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "dbg", "\t$opt", [(int_arm_dbg imm0_15:$opt)]> { bits<4> opt; @@ -4254,6 +4257,19 @@ def : T2Pat<(ARMWrapper tglobaladdr :$dst), (t2MOVi32imm tglobaladdr :$dst)>, def : T2Pat<(ARMWrapperJT tjumptable:$dst), (t2LEApcrelJT tjumptable:$dst)>; +let hasNoSchedulingInfo = 1 in { +def t2LDRLIT_ga_pcrel : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr), + IIC_iLoadiALU, + [(set rGPR:$dst, + (ARMWrapperPIC tglobaladdr:$addr))]>, + Requires<[IsThumb, HasV8MBaseline, DontUseMovtInPic]>; +} + +// TLS globals +def : Pat<(ARMWrapperPIC tglobaltlsaddr:$addr), + (t2LDRLIT_ga_pcrel tglobaltlsaddr:$addr)>, + Requires<[IsThumb, HasV8MBaseline, DontUseMovtInPic]>; + // Pseudo instruction that combines ldr from constpool and add pc. This should // be expanded into two instructions late to allow if-conversion and // scheduling. @@ -5607,6 +5623,15 @@ let Predicates = [HasV8_1MMainline] in { defm : CSPats<ARMcsinv, t2CSINV>; defm : CSPats<ARMcsneg, t2CSNEG>; + def : T2Pat<(ARMcmov (i32 1), (i32 0), cmovpred:$imm), + (t2CSINC ZR, ZR, imm0_31:$imm)>; + def : T2Pat<(ARMcmov (i32 -1), (i32 0), cmovpred:$imm), + (t2CSINV ZR, ZR, imm0_31:$imm)>; + def : T2Pat<(ARMcmov (i32 0), (i32 1), cmovpred:$imm), + (t2CSINC ZR, ZR, (inv_cond_XFORM imm:$imm))>; + def : T2Pat<(ARMcmov (i32 0), (i32 -1), cmovpred:$imm), + (t2CSINV ZR, ZR, (inv_cond_XFORM imm:$imm))>; + multiclass ModifiedV8_1CSEL<Instruction Insn, dag modvalue> { def : T2Pat<(ARMcmov modvalue, GPRwithZR:$tval, cmovpred:$imm), (Insn GPRwithZR:$tval, GPRwithZR:$fval, imm0_31:$imm)>; @@ -5636,3 +5661,78 @@ let Predicates = [HasV8_1MMainline] in { def : InstAlias<"cneg\t$Rd, $Rn, $fcond", (t2CSNEG rGPR:$Rd, GPRwithZRnosp:$Rn, GPRwithZRnosp:$Rn, pred_noal_inv:$fcond)>; } + + +// PACBTI +let Predicates = [IsThumb2, HasV8_1MMainline, HasPACBTI] in { +def t2PACG : V8_1MI<(outs rGPR:$Rd), + (ins pred:$p, GPRnopc:$Rn, GPRnopc:$Rm), + AddrModeNone, NoItinerary, "pacg${p}", "$Rd, $Rn, $Rm", "", []> { + bits<4> Rd; + bits<4> Rn; + bits<4> Rm; + let Inst{31-20} = 0b111110110110; + let Inst{19-16} = Rn; + let Inst{15-12} = 0b1111; + let Inst{11-8} = Rd; + let Inst{7-4} = 0b0000; + let Inst{3-0} = Rm; +} + +let hasSideEffects = 1 in { +class PACBTIAut<dag iops, string asm, bit b> + : V8_1MI<(outs), iops, + AddrModeNone, NoItinerary, asm, "$Ra, $Rn, $Rm", "", []> { + bits<4> Ra; + bits<4> Rn; + bits<4> Rm; + let Inst{31-20} = 0b111110110101; + let Inst{19-16} = Rn; + let Inst{15-12} = Ra; + let Inst{11-5} = 0b1111000; + let Inst{4} = b; + let Inst{3-0} = Rm; +} +} + +def t2AUTG : PACBTIAut<(ins pred:$p, GPRnosp:$Ra, GPRnopc:$Rn, GPRnopc:$Rm), + "autg${p}", 0>; + +let isBranch = 1, isTerminator = 1, isIndirectBranch = 1 in { + def t2BXAUT : PACBTIAut<(ins pred:$p, GPRnosp:$Ra, rGPR:$Rn, GPRnopc:$Rm), + "bxaut${p}", 1>; +} +} + + +class PACBTIHintSpaceInst<string asm, string ops, bits<8> imm> + : V8_1MI<(outs), (ins), AddrModeNone, NoItinerary, asm, ops, "", []> { + let Inst{31-8} = 0b111100111010111110000000; + let Inst{7-0} = imm; + + let Unpredictable{19-16} = 0b1111; + let Unpredictable{13-11} = 0b101; + + let DecoderMethod = "DecodeT2HintSpaceInstruction"; +} + +class PACBTIHintSpaceNoOpsInst<string asm, bits<8> imm> + : PACBTIHintSpaceInst<asm, "", imm>; + +class PACBTIHintSpaceDefInst<string asm, bits<8> imm> + : PACBTIHintSpaceInst<asm, "r12, lr, sp", imm> { + let Defs = [R12]; + let Uses = [LR, SP]; +} + +class PACBTIHintSpaceUseInst<string asm, bits<8> imm> + : PACBTIHintSpaceInst<asm, "r12, lr, sp", imm> { + let Uses = [R12, LR, SP]; +} + +def t2PAC : PACBTIHintSpaceDefInst<"pac", 0b00011101>; +def t2PACBTI : PACBTIHintSpaceDefInst<"pacbti", 0b00001101>; +def t2BTI : PACBTIHintSpaceNoOpsInst<"bti", 0b00001111>; +def t2AUT : PACBTIHintSpaceUseInst<"aut", 0b00101101> { + let hasSideEffects = 1; +} diff --git a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 6e259b1baf97..3b10c60a0654 100644 --- a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -1298,8 +1298,8 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) { // Can't use an updating ld/st if the base register is also a dest // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined. - for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i) - if (MI->getOperand(i).getReg() == Base) + for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 2)) + if (MO.getReg() == Base) return false; int Bytes = getLSMultipleTransferSize(MI); @@ -1326,8 +1326,8 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) { return false; bool HighRegsUsed = false; - for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i) - if (MI->getOperand(i).getReg() >= ARM::R8) { + for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 2)) + if (MO.getReg() >= ARM::R8) { HighRegsUsed = true; break; } @@ -1350,8 +1350,8 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) { .addImm(Pred).addReg(PredReg); // Transfer the rest of operands. - for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum) - MIB.add(MI->getOperand(OpNum)); + for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 3)) + MIB.add(MO); // Transfer memoperands. MIB.setMemRefs(MI->memoperands()); @@ -2119,9 +2119,7 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { isThumb1 = AFI->isThumbFunction() && !isThumb2; bool Modified = false; - for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; - ++MFI) { - MachineBasicBlock &MBB = *MFI; + for (MachineBasicBlock &MBB : Fn) { Modified |= LoadStoreMultipleOpti(MBB); if (STI->hasV5TOps()) Modified |= MergeReturnIntoLDM(MBB); @@ -2710,13 +2708,13 @@ static bool isLegalOrConvertableAddressImm(unsigned Opcode, int Imm, if (isLegalAddressImm(Opcode, Imm, TII)) return true; - // We can convert AddrModeT2_i12 to AddrModeT2_i8. + // We can convert AddrModeT2_i12 to AddrModeT2_i8neg. const MCInstrDesc &Desc = TII->get(Opcode); unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); switch (AddrMode) { case ARMII::AddrModeT2_i12: CodesizeEstimate += 1; - return std::abs(Imm) < (((1 << 8) * 1) - 1); + return Imm < 0 && -Imm < ((1 << 8) * 1); } return false; } diff --git a/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp b/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp index 507c3e69b3a4..308d5e7889f2 100644 --- a/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp +++ b/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp @@ -13,8 +13,63 @@ using namespace llvm; void ARMFunctionInfo::anchor() {} +static bool GetBranchTargetEnforcement(MachineFunction &MF) { + const auto &Subtarget = MF.getSubtarget<ARMSubtarget>(); + if (!Subtarget.isMClass() || !Subtarget.hasV7Ops()) + return false; + + const Function &F = MF.getFunction(); + if (!F.hasFnAttribute("branch-target-enforcement")) { + if (const auto *BTE = mdconst::extract_or_null<ConstantInt>( + F.getParent()->getModuleFlag("branch-target-enforcement"))) + return BTE->getZExtValue(); + return false; + } + + const StringRef BTIEnable = + F.getFnAttribute("branch-target-enforcement").getValueAsString(); + assert(BTIEnable.equals_insensitive("true") || + BTIEnable.equals_insensitive("false")); + return BTIEnable.equals_insensitive("true"); +} + +// The pair returns values for the ARMFunctionInfo members +// SignReturnAddress and SignReturnAddressAll respectively. +static std::pair<bool, bool> GetSignReturnAddress(const Function &F) { + if (!F.hasFnAttribute("sign-return-address")) { + const Module &M = *F.getParent(); + if (const auto *Sign = mdconst::extract_or_null<ConstantInt>( + M.getModuleFlag("sign-return-address"))) { + if (Sign->getZExtValue()) { + if (const auto *All = mdconst::extract_or_null<ConstantInt>( + M.getModuleFlag("sign-return-address-all"))) + return {true, All->getZExtValue()}; + return {true, false}; + } + } + return {false, false}; + } + + StringRef Scope = F.getFnAttribute("sign-return-address").getValueAsString(); + if (Scope.equals("none")) + return {false, false}; + + if (Scope.equals("all")) + return {true, true}; + + assert(Scope.equals("non-leaf")); + return {true, false}; +} + ARMFunctionInfo::ARMFunctionInfo(MachineFunction &MF) : isThumb(MF.getSubtarget<ARMSubtarget>().isThumb()), hasThumb2(MF.getSubtarget<ARMSubtarget>().hasThumb2()), IsCmseNSEntry(MF.getFunction().hasFnAttribute("cmse_nonsecure_entry")), - IsCmseNSCall(MF.getFunction().hasFnAttribute("cmse_nonsecure_call")) {} + IsCmseNSCall(MF.getFunction().hasFnAttribute("cmse_nonsecure_call")), + BranchTargetEnforcement(GetBranchTargetEnforcement(MF)) { + + const auto &Subtarget = MF.getSubtarget<ARMSubtarget>(); + if (Subtarget.isMClass() && Subtarget.hasV7Ops()) + std::tie(SignReturnAddress, SignReturnAddressAll) = + GetSignReturnAddress(MF.getFunction()); +} diff --git a/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h b/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h index 851655284060..4077fc058217 100644 --- a/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h +++ b/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h @@ -142,6 +142,17 @@ class ARMFunctionInfo : public MachineFunctionInfo { /// con/destructors). bool PreservesR0 = false; + /// True if the function should sign its return address. + bool SignReturnAddress = false; + + /// True if the fucntion should sign its return address, even if LR is not + /// saved. + bool SignReturnAddressAll = false; + + /// True if BTI instructions should be placed at potential indirect jump + /// destinations. + bool BranchTargetEnforcement = false; + public: ARMFunctionInfo() = default; @@ -268,6 +279,20 @@ public: void setPreservesR0() { PreservesR0 = true; } bool getPreservesR0() const { return PreservesR0; } + + bool shouldSignReturnAddress() const { + return shouldSignReturnAddress(LRSpilled); + } + + bool shouldSignReturnAddress(bool SpillsLR) const { + if (!SignReturnAddress) + return false; + if (SignReturnAddressAll) + return true; + return LRSpilled; + } + + bool branchTargetEnforcement() const { return BranchTargetEnforcement; } }; } // end namespace llvm diff --git a/llvm/lib/Target/ARM/ARMPredicates.td b/llvm/lib/Target/ARM/ARMPredicates.td index 2dc097566d14..c0dc6a363471 100644 --- a/llvm/lib/Target/ARM/ARMPredicates.td +++ b/llvm/lib/Target/ARM/ARMPredicates.td @@ -107,6 +107,8 @@ def HasRAS : Predicate<"Subtarget->hasRAS()">, AssemblerPredicate<(all_of FeatureRAS), "ras">; def HasLOB : Predicate<"Subtarget->hasLOB()">, AssemblerPredicate<(all_of FeatureLOB), "lob">; +def HasPACBTI : Predicate<"Subtarget->hasPACBTI()">, + AssemblerPredicate<(all_of FeaturePACBTI), "pacbti">; def HasFP16 : Predicate<"Subtarget->hasFP16()">, AssemblerPredicate<(all_of FeatureFP16),"half-float conversions">; def HasFullFP16 : Predicate<"Subtarget->hasFullFP16()">, diff --git a/llvm/lib/Target/ARM/ARMRegisterInfo.td b/llvm/lib/Target/ARM/ARMRegisterInfo.td index 9752b3166b45..760a5a5a20cf 100644 --- a/llvm/lib/Target/ARM/ARMRegisterInfo.td +++ b/llvm/lib/Target/ARM/ARMRegisterInfo.td @@ -277,6 +277,16 @@ def GPRwithAPSR : RegisterClass<"ARM", [i32], 32, (add (sub GPR, PC), APSR_NZCV) let DiagnosticString = "operand must be a register in range [r0, r14] or apsr_nzcv"; } +// GPRs without the SP register. Used for BXAUT and AUTG +def GPRnosp : RegisterClass<"ARM", [i32], 32, (add (sequence "R%u", 0, 12), LR, PC)> { + let AltOrders = [(add LR, GPRnosp), (trunc GPRnosp, 8), + (add (trunc GPRnosp, 8), R12, LR, (shl GPRnosp, 8))]; + let AltOrderSelect = [{ + return MF.getSubtarget<ARMSubtarget>().getGPRAllocationOrder(MF); + }]; + let DiagnosticString = "operand must be a register in range [r0, r12] or LR or PC"; +} + // GPRs without the PC and SP registers but with APSR. Used by CLRM instruction. def GPRwithAPSRnosp : RegisterClass<"ARM", [i32], 32, (add (sequence "R%u", 0, 12), LR, APSR)> { let isAllocatable = 0; diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h index 5e1217b6a468..d51a888c951f 100644 --- a/llvm/lib/Target/ARM/ARMSubtarget.h +++ b/llvm/lib/Target/ARM/ARMSubtarget.h @@ -373,6 +373,8 @@ protected: /// HasLOB - if true, the processor supports the Low Overhead Branch extension bool HasLOB = false; + bool HasPACBTI = false; + /// If true, the instructions "vmov.i32 d0, #0" and "vmov.i32 q0, #0" are /// particularly effective at zeroing a VFP register. bool HasZeroCycleZeroing = false; @@ -671,6 +673,7 @@ public: bool hasCRC() const { return HasCRC; } bool hasRAS() const { return HasRAS; } bool hasLOB() const { return HasLOB; } + bool hasPACBTI() const { return HasPACBTI; } bool hasVirtualization() const { return HasVirtualization; } bool useNEONForSinglePrecisionFP() const { diff --git a/llvm/lib/Target/ARM/ARMSystemRegister.td b/llvm/lib/Target/ARM/ARMSystemRegister.td index f21c7f0246f9..c03db15d1041 100644 --- a/llvm/lib/Target/ARM/ARMSystemRegister.td +++ b/llvm/lib/Target/ARM/ARMSystemRegister.td @@ -106,6 +106,24 @@ def : MClassSysReg<0, 0, 1, 0x894, "control_ns">; def : MClassSysReg<0, 0, 1, 0x898, "sp_ns">; } +let Requires = [{ {ARM::FeaturePACBTI} }] in { +def : MClassSysReg<0, 0, 1, 0x820, "pac_key_p_0">; +def : MClassSysReg<0, 0, 1, 0x821, "pac_key_p_1">; +def : MClassSysReg<0, 0, 1, 0x822, "pac_key_p_2">; +def : MClassSysReg<0, 0, 1, 0x823, "pac_key_p_3">; +def : MClassSysReg<0, 0, 1, 0x824, "pac_key_u_0">; +def : MClassSysReg<0, 0, 1, 0x825, "pac_key_u_1">; +def : MClassSysReg<0, 0, 1, 0x826, "pac_key_u_2">; +def : MClassSysReg<0, 0, 1, 0x827, "pac_key_u_3">; +def : MClassSysReg<0, 0, 1, 0x8a0, "pac_key_p_0_ns">; +def : MClassSysReg<0, 0, 1, 0x8a1, "pac_key_p_1_ns">; +def : MClassSysReg<0, 0, 1, 0x8a2, "pac_key_p_2_ns">; +def : MClassSysReg<0, 0, 1, 0x8a3, "pac_key_p_3_ns">; +def : MClassSysReg<0, 0, 1, 0x8a4, "pac_key_u_0_ns">; +def : MClassSysReg<0, 0, 1, 0x8a5, "pac_key_u_1_ns">; +def : MClassSysReg<0, 0, 1, 0x8a6, "pac_key_u_2_ns">; +def : MClassSysReg<0, 0, 1, 0x8a7, "pac_key_u_3_ns">; +} // Banked Registers // diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/llvm/lib/Target/ARM/ARMTargetMachine.cpp index 833c7effd31c..0b314ac2a41e 100644 --- a/llvm/lib/Target/ARM/ARMTargetMachine.cpp +++ b/llvm/lib/Target/ARM/ARMTargetMachine.cpp @@ -92,6 +92,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeARMTarget() { initializeARMLoadStoreOptPass(Registry); initializeARMPreAllocLoadStoreOptPass(Registry); initializeARMParallelDSPPass(Registry); + initializeARMBranchTargetsPass(Registry); initializeARMConstantIslandsPass(Registry); initializeARMExecutionDomainFixPass(Registry); initializeARMExpandPseudoPass(Registry); @@ -571,6 +572,7 @@ void ARMPassConfig::addPreEmitPass() { } void ARMPassConfig::addPreEmitPass2() { + addPass(createARMBranchTargetsPass()); addPass(createARMConstantIslandPass()); addPass(createARMLowOverheadLoopsPass()); diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index 88de84a4fd78..602c6745d310 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -334,8 +334,9 @@ InstructionCost ARMTTIImpl::getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, } // Checks whether Inst is part of a min(max()) or max(min()) pattern -// that will match to an SSAT instruction -static bool isSSATMinMaxPattern(Instruction *Inst, const APInt &Imm) { +// that will match to an SSAT instruction. Returns the instruction being +// saturated, or null if no saturation pattern was found. +static Value *isSSATMinMaxPattern(Instruction *Inst, const APInt &Imm) { Value *LHS, *RHS; ConstantInt *C; SelectPatternFlavor InstSPF = matchSelectPattern(Inst, LHS, RHS).Flavor; @@ -358,12 +359,27 @@ static bool isSSATMinMaxPattern(Instruction *Inst, const APInt &Imm) { return false; }; - if (isSSatMin(Inst->getOperand(1)) || - (Inst->hasNUses(2) && (isSSatMin(*Inst->user_begin()) || - isSSatMin(*(++Inst->user_begin()))))) - return true; + if (isSSatMin(Inst->getOperand(1))) + return cast<Instruction>(Inst->getOperand(1))->getOperand(1); + if (Inst->hasNUses(2) && + (isSSatMin(*Inst->user_begin()) || isSSatMin(*(++Inst->user_begin())))) + return Inst->getOperand(1); } - return false; + return nullptr; +} + +// Look for a FP Saturation pattern, where the instruction can be simplified to +// a fptosi.sat. max(min(fptosi)). The constant in this case is always free. +static bool isFPSatMinMaxPattern(Instruction *Inst, const APInt &Imm) { + if (Imm.getBitWidth() != 64 || + Imm != APInt::getHighBitsSet(64, 33)) // -2147483648 + return false; + Value *FP = isSSATMinMaxPattern(Inst, Imm); + if (!FP && isa<ICmpInst>(Inst) && Inst->hasOneUse()) + FP = isSSATMinMaxPattern(cast<Instruction>(*Inst->user_begin()), Imm); + if (!FP) + return false; + return isa<FPToSIInst>(FP); } InstructionCost ARMTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, @@ -423,6 +439,9 @@ InstructionCost ARMTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, return 0; } + if (Inst && ST->hasVFP2Base() && isFPSatMinMaxPattern(Inst, Imm)) + return 0; + // We can convert <= -1 to < 0, which is generally quite cheap. if (Inst && Opcode == Instruction::ICmp && Idx == 1 && Imm.isAllOnesValue()) { ICmpInst::Predicate Pred = cast<ICmpInst>(Inst)->getPredicate(); diff --git a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 64d2e1bfa9b2..39f407ba7149 100644 --- a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -6429,15 +6429,17 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic, Mnemonic == "vrintp" || Mnemonic == "vrintm" || Mnemonic == "hvc" || Mnemonic.startswith("vsel") || Mnemonic == "vins" || Mnemonic == "vmovx" || Mnemonic == "bxns" || Mnemonic == "blxns" || - Mnemonic == "vdot" || Mnemonic == "vmmla" || + Mnemonic == "vdot" || Mnemonic == "vmmla" || Mnemonic == "vudot" || Mnemonic == "vsdot" || Mnemonic == "vcmla" || Mnemonic == "vcadd" || Mnemonic == "vfmal" || Mnemonic == "vfmsl" || - Mnemonic == "wls" || Mnemonic == "le" || Mnemonic == "dls" || - Mnemonic == "csel" || Mnemonic == "csinc" || + Mnemonic == "wls" || Mnemonic == "le" || Mnemonic == "dls" || + Mnemonic == "csel" || Mnemonic == "csinc" || Mnemonic == "csinv" || Mnemonic == "csneg" || Mnemonic == "cinc" || - Mnemonic == "cinv" || Mnemonic == "cneg" || Mnemonic == "cset" || - Mnemonic == "csetm") + Mnemonic == "cinv" || Mnemonic == "cneg" || Mnemonic == "cset" || + Mnemonic == "csetm" || + Mnemonic == "aut" || Mnemonic == "pac" || Mnemonic == "pacbti" || + Mnemonic == "bti") return Mnemonic; // First, split out any predication code. Ignore mnemonics we know aren't @@ -6581,9 +6583,11 @@ void ARMAsmParser::getMnemonicAcceptInfo(StringRef Mnemonic, Mnemonic == "csinc" || Mnemonic == "csinv" || Mnemonic == "csneg" || Mnemonic == "cinc" || Mnemonic == "cinv" || Mnemonic == "cneg" || Mnemonic == "cset" || Mnemonic == "csetm" || - Mnemonic.startswith("vpt") || Mnemonic.startswith("vpst") || (hasCDE() && MS.isCDEInstr(Mnemonic) && !MS.isITPredicableCDEInstr(Mnemonic)) || + Mnemonic.startswith("vpt") || Mnemonic.startswith("vpst") || + Mnemonic == "pac" || Mnemonic == "pacbti" || Mnemonic == "aut" || + Mnemonic == "bti" || (hasMVE() && (Mnemonic.startswith("vst2") || Mnemonic.startswith("vld2") || Mnemonic.startswith("vst4") || Mnemonic.startswith("vld4") || @@ -12272,6 +12276,7 @@ bool ARMAsmParser::enableArchExtFeature(StringRef Name, SMLoc &ExtLoc) { {ARM::FeatureFPARMv8, ARM::FeatureFullFP16}}, {ARM::AEK_RAS, {Feature_HasV8Bit}, {ARM::FeatureRAS}}, {ARM::AEK_LOB, {Feature_HasV8_1MMainlineBit}, {ARM::FeatureLOB}}, + {ARM::AEK_PACBTI, {Feature_HasV8_1MMainlineBit}, {ARM::FeaturePACBTI}}, // FIXME: Unsupported extensions. {ARM::AEK_OS, {}, {}}, {ARM::AEK_IWMMXT, {}, {}}, diff --git a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index 9caef9f09ea9..c3df7dc88d79 100644 --- a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -185,8 +185,11 @@ static DecodeStatus DecodetGPREvenRegisterClass(MCInst &Inst, unsigned RegNo, static DecodeStatus DecodeGPRwithAPSR_NZCVnospRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeGPRnopcRegisterClass(MCInst &Inst, - unsigned RegNo, uint64_t Address, +static DecodeStatus DecodeGPRnopcRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder); +static DecodeStatus DecodeGPRnospRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder); static DecodeStatus DecodeGPRwithAPSRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, @@ -287,6 +290,9 @@ static DecodeStatus DecodeSETPANInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); static DecodeStatus DecodeT2CPSInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeT2HintSpaceInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, + const void *Decoder); static DecodeStatus DecodeAddrModeImm12Operand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); static DecodeStatus DecodeAddrMode5Operand(MCInst &Inst, unsigned Val, @@ -1172,6 +1178,19 @@ DecodeGPRnopcRegisterClass(MCInst &Inst, unsigned RegNo, return S; } +static DecodeStatus DecodeGPRnospRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + if (RegNo == 13) + S = MCDisassembler::SoftFail; + + Check(S, DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder)); + + return S; +} + static DecodeStatus DecodeGPRwithAPSRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder) { @@ -2441,6 +2460,31 @@ static DecodeStatus DecodeT2CPSInstruction(MCInst &Inst, unsigned Insn, return S; } +static DecodeStatus DecodeT2HintSpaceInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, + const void *Decoder) { + unsigned imm = fieldFromInstruction(Insn, 0, 8); + + unsigned Opcode = ARM::t2HINT; + + if (imm == 0x0D) { + Opcode = ARM::t2PACBTI; + } else if (imm == 0x1D) { + Opcode = ARM::t2PAC; + } else if (imm == 0x2D) { + Opcode = ARM::t2AUT; + } else if (imm == 0x0F) { + Opcode = ARM::t2BTI; + } + + Inst.setOpcode(Opcode); + if (Opcode == ARM::t2HINT) { + Inst.addOperand(MCOperand::createImm(imm)); + } + + return MCDisassembler::Success; +} + static DecodeStatus DecodeT2MOVTWInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -4726,6 +4770,25 @@ static DecodeStatus DecodeMSRMask(MCInst &Inst, unsigned Val, if (!(FeatureBits[ARM::Feature8MSecExt])) return MCDisassembler::Fail; break; + case 0x20: // pac_key_p_0 + case 0x21: // pac_key_p_1 + case 0x22: // pac_key_p_2 + case 0x23: // pac_key_p_3 + case 0x24: // pac_key_u_0 + case 0x25: // pac_key_u_1 + case 0x26: // pac_key_u_2 + case 0x27: // pac_key_u_3 + case 0xa0: // pac_key_p_0_ns + case 0xa1: // pac_key_p_1_ns + case 0xa2: // pac_key_p_2_ns + case 0xa3: // pac_key_p_3_ns + case 0xa4: // pac_key_u_0_ns + case 0xa5: // pac_key_u_1_ns + case 0xa6: // pac_key_u_2_ns + case 0xa7: // pac_key_u_3_ns + if (!(FeatureBits[ARM::FeaturePACBTI])) + return MCDisassembler::Fail; + break; default: // Architecturally defined as unpredictable S = MCDisassembler::SoftFail; diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h index 43f7575df6db..f8de0320166a 100644 --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h @@ -195,16 +195,18 @@ namespace ARMII { AddrModeT1_4 = 9, AddrModeT1_s = 10, // i8 * 4 for pc and sp relative data AddrModeT2_i12 = 11, - AddrModeT2_i8 = 12, - AddrModeT2_so = 13, - AddrModeT2_pc = 14, // +/- i12 for pc relative data - AddrModeT2_i8s4 = 15, // i8 * 4 - AddrMode_i12 = 16, - AddrMode5FP16 = 17, // i8 * 2 - AddrModeT2_ldrex = 18, // i8 * 4, with unscaled offset in MCInst - AddrModeT2_i7s4 = 19, // i7 * 4 - AddrModeT2_i7s2 = 20, // i7 * 2 - AddrModeT2_i7 = 21, // i7 * 1 + AddrModeT2_i8 = 12, // +/- i8 + AddrModeT2_i8pos = 13, // + i8 + AddrModeT2_i8neg = 14, // - i8 + AddrModeT2_so = 15, + AddrModeT2_pc = 16, // +/- i12 for pc relative data + AddrModeT2_i8s4 = 17, // i8 * 4 + AddrMode_i12 = 18, + AddrMode5FP16 = 19, // i8 * 2 + AddrModeT2_ldrex = 20, // i8 * 4, with unscaled offset in MCInst + AddrModeT2_i7s4 = 21, // i7 * 4 + AddrModeT2_i7s2 = 22, // i7 * 2 + AddrModeT2_i7 = 23, // i7 * 1 }; inline static const char *AddrModeToString(AddrMode addrmode) { @@ -223,6 +225,8 @@ namespace ARMII { case AddrModeT1_s: return "AddrModeT1_s"; case AddrModeT2_i12: return "AddrModeT2_i12"; case AddrModeT2_i8: return "AddrModeT2_i8"; + case AddrModeT2_i8pos: return "AddrModeT2_i8pos"; + case AddrModeT2_i8neg: return "AddrModeT2_i8neg"; case AddrModeT2_so: return "AddrModeT2_so"; case AddrModeT2_pc: return "AddrModeT2_pc"; case AddrModeT2_i8s4: return "AddrModeT2_i8s4"; diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp index 3e4c97630af6..02a2d01176fc 100644 --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp @@ -299,4 +299,9 @@ void ARMTargetStreamer::emitTargetAttributes(const MCSubtargetInfo &STI) { else if (STI.hasFeature(ARM::FeatureVirtualization)) emitAttribute(ARMBuildAttrs::Virtualization_use, ARMBuildAttrs::AllowVirtualization); + + if (STI.hasFeature(ARM::FeaturePACBTI)) { + emitAttribute(ARMBuildAttrs::PAC_extension, ARMBuildAttrs::AllowPAC); + emitAttribute(ARMBuildAttrs::BTI_extension, ARMBuildAttrs::AllowBTI); + } } diff --git a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp index e4e95f63f0a6..224c61b9f065 100644 --- a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -205,9 +205,9 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, return; } - for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - unsigned Reg = CSI[i].getReg(); - int FI = CSI[i].getFrameIdx(); + for (const CalleeSavedInfo &I : CSI) { + unsigned Reg = I.getReg(); + int FI = I.getFrameIdx(); switch (Reg) { case ARM::R8: case ARM::R9: @@ -266,10 +266,9 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); } - for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(), - E = CSI.end(); I != E; ++I) { - unsigned Reg = I->getReg(); - int FI = I->getFrameIdx(); + for (const CalleeSavedInfo &I : CSI) { + unsigned Reg = I.getReg(); + int FI = I.getFrameIdx(); switch (Reg) { case ARM::R8: case ARM::R9: diff --git a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp index bdb167a08e61..ebd139af2219 100644 --- a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -261,7 +261,7 @@ void Thumb2InstrInfo::expandLoadStackGuard( cast<GlobalValue>((*MI->memoperands_begin())->getValue()); if (MF.getSubtarget<ARMSubtarget>().isGVInGOT(GV)) - expandLoadStackGuardBase(MI, ARM::tLDRLIT_ga_pcrel, ARM::t2LDRi12); + expandLoadStackGuardBase(MI, ARM::t2LDRLIT_ga_pcrel, ARM::t2LDRi12); else if (MF.getTarget().isPositionIndependent()) expandLoadStackGuardBase(MI, ARM::t2MOV_ga_pcrel, ARM::t2LDRi12); else @@ -634,7 +634,8 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, unsigned NumBits = 0; unsigned Scale = 1; - if (AddrMode == ARMII::AddrModeT2_i8 || AddrMode == ARMII::AddrModeT2_i12) { + if (AddrMode == ARMII::AddrModeT2_i8neg || + AddrMode == ARMII::AddrModeT2_i12) { // i8 supports only negative, and i12 supports only positive, so // based on Offset sign convert Opcode to the appropriate // instruction diff --git a/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp b/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp index 132516694f4e..1164b6ebbac3 100644 --- a/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -502,8 +502,8 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, // For the non-writeback version (this one), the base register must be // one of the registers being loaded. bool isOK = false; - for (unsigned i = 3; i < MI->getNumOperands(); ++i) { - if (MI->getOperand(i).getReg() == BaseReg) { + for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 3)) { + if (MO.getReg() == BaseReg) { isOK = true; break; } @@ -527,8 +527,8 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, // numbered register (i.e. it's in operand 4 onwards) then with writeback // the stored value is unknown, so we can't convert to tSTMIA_UPD. Register BaseReg = MI->getOperand(0).getReg(); - for (unsigned i = 4; i < MI->getNumOperands(); ++i) - if (MI->getOperand(i).getReg() == BaseReg) + for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 4)) + if (MO.getReg() == BaseReg) return false; break; @@ -611,8 +611,8 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, } // Transfer the rest of operands. - for (unsigned e = MI->getNumOperands(); OpNum != e; ++OpNum) - MIB.add(MI->getOperand(OpNum)); + for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), OpNum)) + MIB.add(MO); // Transfer memoperands. MIB.setMemRefs(MI->memoperands()); diff --git a/llvm/lib/Target/BPF/BPFMCInstLower.cpp b/llvm/lib/Target/BPF/BPFMCInstLower.cpp index 846798a63cb7..2ce9c386f24c 100644 --- a/llvm/lib/Target/BPF/BPFMCInstLower.cpp +++ b/llvm/lib/Target/BPF/BPFMCInstLower.cpp @@ -47,9 +47,7 @@ MCOperand BPFMCInstLower::LowerSymbolOperand(const MachineOperand &MO, void BPFMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { OutMI.setOpcode(MI->getOpcode()); - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); - + for (const MachineOperand &MO : MI->operands()) { MCOperand MCOp; switch (MO.getType()) { default: diff --git a/llvm/lib/Target/Hexagon/BitTracker.cpp b/llvm/lib/Target/Hexagon/BitTracker.cpp index 8bced3cec082..685bafd785df 100644 --- a/llvm/lib/Target/Hexagon/BitTracker.cpp +++ b/llvm/lib/Target/Hexagon/BitTracker.cpp @@ -214,9 +214,9 @@ bool BT::RegisterCell::meet(const RegisterCell &RC, Register SelfR) { BT::RegisterCell &BT::RegisterCell::insert(const BT::RegisterCell &RC, const BitMask &M) { uint16_t B = M.first(), E = M.last(), W = width(); - // Sanity: M must be a valid mask for *this. + // M must be a valid mask for *this. assert(B < W && E < W); - // Sanity: the masked part of *this must have the same number of bits + // The masked part of *this must have the same number of bits // as the source. assert(B > E || E-B+1 == RC.width()); // B <= E => E-B+1 = |RC|. assert(B <= E || E+(W-B)+1 == RC.width()); // E < B => E+(W-B)+1 = |RC|. @@ -850,8 +850,7 @@ void BT::visitNonBranch(const MachineInstr &MI) { bool Eval = ME.evaluate(MI, Map, ResMap); if (Trace && Eval) { - for (unsigned i = 0, n = MI.getNumOperands(); i < n; ++i) { - const MachineOperand &MO = MI.getOperand(i); + for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg() || !MO.isUse()) continue; RegisterRef RU(MO); diff --git a/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp b/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp index 0f6dedeb28c3..1938a5c259da 100644 --- a/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp +++ b/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp @@ -189,7 +189,7 @@ bool HexagonEvaluator::evaluate(const MachineInstr &MI, unsigned NumDefs = 0; - // Sanity verification: there should not be any defs with subregisters. + // Basic correctness check: there should not be any defs with subregisters. for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg() || !MO.isDef()) continue; diff --git a/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp b/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp index 43f0758f6598..8c3b9572201e 100644 --- a/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp +++ b/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp @@ -476,10 +476,10 @@ namespace { } // end anonymous namespace static const NodeSet *node_class(GepNode *N, NodeSymRel &Rel) { - for (NodeSymRel::iterator I = Rel.begin(), E = Rel.end(); I != E; ++I) - if (I->count(N)) - return &*I; - return nullptr; + for (const NodeSet &S : Rel) + if (S.count(N)) + return &S; + return nullptr; } // Create an ordered pair of GepNode pointers. The pair will be used in @@ -589,9 +589,8 @@ void HexagonCommonGEP::common() { dbgs() << "{ " << I->first << ", " << I->second << " }\n"; dbgs() << "Gep equivalence classes:\n"; - for (NodeSymRel::iterator I = EqRel.begin(), E = EqRel.end(); I != E; ++I) { + for (const NodeSet &S : EqRel) { dbgs() << '{'; - const NodeSet &S = *I; for (NodeSet::const_iterator J = S.begin(), F = S.end(); J != F; ++J) { if (J != S.begin()) dbgs() << ','; @@ -604,8 +603,7 @@ void HexagonCommonGEP::common() { // Create a projection from a NodeSet to the minimal element in it. using ProjMap = std::map<const NodeSet *, GepNode *>; ProjMap PM; - for (NodeSymRel::iterator I = EqRel.begin(), E = EqRel.end(); I != E; ++I) { - const NodeSet &S = *I; + for (const NodeSet &S : EqRel) { GepNode *Min = *std::min_element(S.begin(), S.end(), NodeOrder); std::pair<ProjMap::iterator,bool> Ins = PM.insert(std::make_pair(&S, Min)); (void)Ins; @@ -1280,8 +1278,8 @@ bool HexagonCommonGEP::runOnFunction(Function &F) { return false; // For now bail out on C++ exception handling. - for (Function::iterator A = F.begin(), Z = F.end(); A != Z; ++A) - for (BasicBlock::iterator I = A->begin(), E = A->end(); I != E; ++I) + for (const BasicBlock &BB : F) + for (const Instruction &I : BB) if (isa<InvokeInst>(I) || isa<LandingPadInst>(I)) return false; diff --git a/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp b/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp index a774baaa48e6..d3fcdb6ae9a8 100644 --- a/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp +++ b/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp @@ -1254,7 +1254,7 @@ void HCE::collect(MachineFunction &MF) { void HCE::assignInits(const ExtRoot &ER, unsigned Begin, unsigned End, AssignmentMap &IMap) { - // Sanity check: make sure that all extenders in the range [Begin..End) + // Basic correctness: make sure that all extenders in the range [Begin..End) // share the same root ER. for (unsigned I = Begin; I != End; ++I) assert(ER == ExtRoot(Extenders[I].getOp())); diff --git a/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp b/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp index 23d0cc829e52..03b0f75b2dc1 100644 --- a/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp +++ b/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp @@ -237,12 +237,9 @@ static bool isEvenReg(unsigned Reg) { } static void removeKillInfo(MachineInstr &MI, unsigned RegNotKilled) { - for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) { - MachineOperand &Op = MI.getOperand(I); - if (!Op.isReg() || Op.getReg() != RegNotKilled || !Op.isKill()) - continue; - Op.setIsKill(false); - } + for (MachineOperand &Op : MI.operands()) + if (Op.isReg() && Op.getReg() == RegNotKilled && Op.isKill()) + Op.setIsKill(false); } /// Returns true if it is unsafe to move a copy instruction from \p UseReg to @@ -403,10 +400,7 @@ HexagonCopyToCombine::findPotentialNewifiableTFRs(MachineBasicBlock &BB) { // Mark TFRs that feed a potential new value store as such. if (TII->mayBeNewStore(MI)) { // Look for uses of TFR instructions. - for (unsigned OpdIdx = 0, OpdE = MI.getNumOperands(); OpdIdx != OpdE; - ++OpdIdx) { - MachineOperand &Op = MI.getOperand(OpdIdx); - + for (const MachineOperand &Op : MI.operands()) { // Skip over anything except register uses. if (!Op.isReg() || !Op.isUse() || !Op.getReg()) continue; @@ -484,14 +478,13 @@ bool HexagonCopyToCombine::runOnMachineFunction(MachineFunction &MF) { IsConst64Disabled = true; // Traverse basic blocks. - for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE; - ++BI) { + for (MachineBasicBlock &MBB : MF) { PotentiallyNewifiableTFR.clear(); - findPotentialNewifiableTFRs(*BI); + findPotentialNewifiableTFRs(MBB); // Traverse instructions in basic block. - for(MachineBasicBlock::iterator MI = BI->begin(), End = BI->end(); - MI != End;) { + for (MachineBasicBlock::iterator MI = MBB.begin(), End = MBB.end(); + MI != End;) { MachineInstr &I1 = *MI++; if (I1.isDebugInstr()) diff --git a/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp b/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp index bff596e69efd..12ceac545e9d 100644 --- a/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp +++ b/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp @@ -1404,18 +1404,18 @@ bool HexagonFrameLowering::insertCSRSpillsInBlock(MachineBasicBlock &MBB, // Add callee-saved registers as use. addCalleeSaveRegistersAsImpOperand(SaveRegsCall, CSI, false, true); // Add live in registers. - for (unsigned I = 0; I < CSI.size(); ++I) - MBB.addLiveIn(CSI[I].getReg()); + for (const CalleeSavedInfo &I : CSI) + MBB.addLiveIn(I.getReg()); return true; } - for (unsigned i = 0, n = CSI.size(); i < n; ++i) { - unsigned Reg = CSI[i].getReg(); + for (const CalleeSavedInfo &I : CSI) { + unsigned Reg = I.getReg(); // Add live in registers. We treat eh_return callee saved register r0 - r3 // specially. They are not really callee saved registers as they are not // supposed to be killed. bool IsKill = !HRI.isEHReturnCalleeSaveReg(Reg); - int FI = CSI[i].getFrameIdx(); + int FI = I.getFrameIdx(); const TargetRegisterClass *RC = HRI.getMinimalPhysRegClass(Reg); HII.storeRegToStackSlot(MBB, MI, Reg, IsKill, FI, RC, &HRI); if (IsKill) @@ -1478,10 +1478,10 @@ bool HexagonFrameLowering::insertCSRRestoresInBlock(MachineBasicBlock &MBB, return true; } - for (unsigned i = 0; i < CSI.size(); ++i) { - unsigned Reg = CSI[i].getReg(); + for (const CalleeSavedInfo &I : CSI) { + unsigned Reg = I.getReg(); const TargetRegisterClass *RC = HRI.getMinimalPhysRegClass(Reg); - int FI = CSI[i].getFrameIdx(); + int FI = I.getFrameIdx(); HII.loadRegFromStackSlot(MBB, MI, Reg, FI, RC, &HRI); } @@ -1619,8 +1619,8 @@ bool HexagonFrameLowering::assignCalleeSavedSpillSlots(MachineFunction &MF, // (1) For each callee-saved register, add that register and all of its // sub-registers to SRegs. LLVM_DEBUG(dbgs() << "Initial CS registers: {"); - for (unsigned i = 0, n = CSI.size(); i < n; ++i) { - unsigned R = CSI[i].getReg(); + for (const CalleeSavedInfo &I : CSI) { + unsigned R = I.getReg(); LLVM_DEBUG(dbgs() << ' ' << printReg(R, TRI)); for (MCSubRegIterator SR(R, TRI, true); SR.isValid(); ++SR) SRegs[*SR] = true; @@ -1720,10 +1720,10 @@ bool HexagonFrameLowering::assignCalleeSavedSpillSlots(MachineFunction &MF, LLVM_DEBUG({ dbgs() << "CS information: {"; - for (unsigned i = 0, n = CSI.size(); i < n; ++i) { - int FI = CSI[i].getFrameIdx(); + for (const CalleeSavedInfo &I : CSI) { + int FI = I.getFrameIdx(); int Off = MFI.getObjectOffset(FI); - dbgs() << ' ' << printReg(CSI[i].getReg(), TRI) << ":fi#" << FI << ":sp"; + dbgs() << ' ' << printReg(I.getReg(), TRI) << ":fi#" << FI << ":sp"; if (Off >= 0) dbgs() << '+'; dbgs() << Off; @@ -2634,8 +2634,8 @@ bool HexagonFrameLowering::shouldInlineCSR(const MachineFunction &MF, // Check if CSI only has double registers, and if the registers form // a contiguous block starting from D8. BitVector Regs(Hexagon::NUM_TARGET_REGS); - for (unsigned i = 0, n = CSI.size(); i < n; ++i) { - unsigned R = CSI[i].getReg(); + for (const CalleeSavedInfo &I : CSI) { + unsigned R = I.getReg(); if (!Hexagon::DoubleRegsRegClass.contains(R)) return true; Regs[R] = true; diff --git a/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp b/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp index 02da2f29591a..46c1fbc6eeb2 100644 --- a/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp +++ b/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp @@ -597,19 +597,12 @@ void HexagonGenInsert::dump_map() const { void HexagonGenInsert::buildOrderingMF(RegisterOrdering &RO) const { unsigned Index = 0; - using mf_iterator = MachineFunction::const_iterator; - - for (mf_iterator A = MFN->begin(), Z = MFN->end(); A != Z; ++A) { - const MachineBasicBlock &B = *A; + for (const MachineBasicBlock &B : *MFN) { if (!CMS->BT.reached(&B)) continue; - using mb_iterator = MachineBasicBlock::const_iterator; - - for (mb_iterator I = B.begin(), E = B.end(); I != E; ++I) { - const MachineInstr *MI = &*I; - for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) { - const MachineOperand &MO = MI->getOperand(i); + for (const MachineInstr &MI : B) { + for (const MachineOperand &MO : MI.operands()) { if (MO.isReg() && MO.isDef()) { Register R = MO.getReg(); assert(MO.getSubReg() == 0 && "Unexpected subregister in definition"); @@ -725,8 +718,7 @@ bool HexagonGenInsert::findNonSelfReference(unsigned VR) const { void HexagonGenInsert::getInstrDefs(const MachineInstr *MI, RegisterSet &Defs) const { - for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) { - const MachineOperand &MO = MI->getOperand(i); + for (const MachineOperand &MO : MI->operands()) { if (!MO.isReg() || !MO.isDef()) continue; Register R = MO.getReg(); @@ -738,8 +730,7 @@ void HexagonGenInsert::getInstrDefs(const MachineInstr *MI, void HexagonGenInsert::getInstrUses(const MachineInstr *MI, RegisterSet &Uses) const { - for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) { - const MachineOperand &MO = MI->getOperand(i); + for (const MachineOperand &MO : MI->operands()) { if (!MO.isReg() || !MO.isUse()) continue; Register R = MO.getReg(); @@ -942,12 +933,11 @@ void HexagonGenInsert::collectInBlock(MachineBasicBlock *B, // can remove them from the list of available registers once all DT // successors have been processed. RegisterSet BlockDefs, InsDefs; - for (MachineBasicBlock::iterator I = B->begin(), E = B->end(); I != E; ++I) { - MachineInstr *MI = &*I; + for (MachineInstr &MI : *B) { InsDefs.clear(); - getInstrDefs(MI, InsDefs); + getInstrDefs(&MI, InsDefs); // Leave those alone. They are more transparent than "insert". - bool Skip = MI->isCopy() || MI->isRegSequence(); + bool Skip = MI.isCopy() || MI.isRegSequence(); if (!Skip) { // Visit all defined registers, and attempt to find the corresponding @@ -1458,8 +1448,7 @@ bool HexagonGenInsert::removeDeadCode(MachineDomTreeNode *N) { for (auto I = B->rbegin(), E = B->rend(); I != E; ++I) Instrs.push_back(&*I); - for (auto I = Instrs.begin(), E = Instrs.end(); I != E; ++I) { - MachineInstr *MI = *I; + for (MachineInstr *MI : Instrs) { unsigned Opc = MI->getOpcode(); // Do not touch lifetime markers. This is why the target-independent DCE // cannot be used. @@ -1501,7 +1490,7 @@ bool HexagonGenInsert::runOnMachineFunction(MachineFunction &MF) { bool Timing = OptTiming, TimingDetail = Timing && OptTimingDetail; bool Changed = false; - // Sanity check: one, but not both. + // Verify: one, but not both. assert(!OptSelectAll0 || !OptSelectHas0); IFMap.clear(); diff --git a/llvm/lib/Target/Hexagon/HexagonGenMux.cpp b/llvm/lib/Target/Hexagon/HexagonGenMux.cpp index cf4f13fb8c0d..55de02816fb8 100644 --- a/llvm/lib/Target/Hexagon/HexagonGenMux.cpp +++ b/llvm/lib/Target/Hexagon/HexagonGenMux.cpp @@ -328,7 +328,7 @@ bool HexagonGenMux::genMuxInBlock(MachineBasicBlock &B) { unsigned MxOpc = getMuxOpcode(*MX.SrcT, *MX.SrcF); if (!MxOpc) continue; - // Basic sanity check: since we are deleting instructions, validate the + // Basic correctness check: since we are deleting instructions, validate the // iterators. There is a possibility that one of Def1 or Def2 is translated // to "mux" and being considered for other "mux" instructions. if (!MX.At->getParent() || !MX.Def1->getParent() || !MX.Def2->getParent()) diff --git a/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp b/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp index d8d2025c5d27..1a66394e9757 100644 --- a/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp +++ b/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp @@ -205,16 +205,14 @@ bool HexagonGenPredicate::isConvertibleToPredForm(const MachineInstr *MI) { } void HexagonGenPredicate::collectPredicateGPR(MachineFunction &MF) { - for (MachineFunction::iterator A = MF.begin(), Z = MF.end(); A != Z; ++A) { - MachineBasicBlock &B = *A; - for (MachineBasicBlock::iterator I = B.begin(), E = B.end(); I != E; ++I) { - MachineInstr *MI = &*I; - unsigned Opc = MI->getOpcode(); + for (MachineBasicBlock &B : MF) { + for (MachineInstr &MI : B) { + unsigned Opc = MI.getOpcode(); switch (Opc) { case Hexagon::C2_tfrpr: case TargetOpcode::COPY: - if (isPredReg(MI->getOperand(1).getReg())) { - RegisterSubReg RD = MI->getOperand(0); + if (isPredReg(MI.getOperand(1).getReg())) { + RegisterSubReg RD = MI.getOperand(0); if (RD.R.isVirtual()) PredGPRs.insert(RD); } @@ -411,7 +409,7 @@ bool HexagonGenPredicate::convertToPredForm(MachineInstr *MI) { NumOps = 2; } - // Some sanity: check that def is in operand #0. + // Check that def is in operand #0. MachineOperand &Op0 = MI->getOperand(0); assert(Op0.isDef()); RegisterSubReg OutR(Op0); @@ -488,8 +486,8 @@ bool HexagonGenPredicate::eliminatePredCopies(MachineFunction &MF) { } } - for (VectOfInst::iterator I = Erase.begin(), E = Erase.end(); I != E; ++I) - (*I)->eraseFromParent(); + for (MachineInstr *MI : Erase) + MI->eraseFromParent(); return Changed; } @@ -515,11 +513,8 @@ bool HexagonGenPredicate::runOnMachineFunction(MachineFunction &MF) { Again = false; VectOfInst Processed, Copy; - using iterator = VectOfInst::iterator; - Copy = PUsers; - for (iterator I = Copy.begin(), E = Copy.end(); I != E; ++I) { - MachineInstr *MI = *I; + for (MachineInstr *MI : Copy) { bool Done = convertToPredForm(MI); if (Done) { Processed.insert(MI); diff --git a/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp index a4971ad712eb..5d2e1b259449 100644 --- a/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp +++ b/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp @@ -1014,12 +1014,10 @@ bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L, LLVM_DEBUG(dbgs() << "\nhw_loop head, " << printMBBReference(**L->block_begin())); for (MachineBasicBlock *MBB : L->getBlocks()) { - for (MachineBasicBlock::iterator - MII = MBB->begin(), E = MBB->end(); MII != E; ++MII) { - const MachineInstr *MI = &*MII; - if (isInvalidLoopOperation(MI, IsInnerHWLoop)) { + for (const MachineInstr &MI : *MBB) { + if (isInvalidLoopOperation(&MI, IsInnerHWLoop)) { LLVM_DEBUG(dbgs() << "\nCannot convert to hw_loop due to:"; - MI->dump();); + MI.dump();); return true; } } @@ -1034,8 +1032,7 @@ bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L, bool HexagonHardwareLoops::isDead(const MachineInstr *MI, SmallVectorImpl<MachineInstr *> &DeadPhis) const { // Examine each operand. - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); + for (const MachineOperand &MO : MI->operands()) { if (!MO.isReg() || !MO.isDef()) continue; @@ -1089,8 +1086,7 @@ void HexagonHardwareLoops::removeIfDead(MachineInstr *MI) { // It is possible that some DBG_VALUE instructions refer to this // instruction. Examine each def operand for such references; // if found, mark the DBG_VALUE as undef (but don't delete it). - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); + for (const MachineOperand &MO : MI->operands()) { if (!MO.isReg() || !MO.isDef()) continue; Register Reg = MO.getReg(); @@ -1123,7 +1119,7 @@ void HexagonHardwareLoops::removeIfDead(MachineInstr *MI) { bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L, bool &RecL0used, bool &RecL1used) { - // This is just for sanity. + // This is just to confirm basic correctness. assert(L->getHeader() && "Loop without a header?"); bool Changed = false; @@ -1877,8 +1873,7 @@ MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop( if (TII->analyzeBranch(*ExitingBlock, TB, FB, Tmp1, false)) return nullptr; - for (MBBVector::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) { - MachineBasicBlock *PB = *I; + for (MachineBasicBlock *PB : Preds) { bool NotAnalyzed = TII->analyzeBranch(*PB, TB, FB, Tmp1, false); if (NotAnalyzed) return nullptr; @@ -1960,8 +1955,7 @@ MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop( TB = FB = nullptr; - for (MBBVector::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) { - MachineBasicBlock *PB = *I; + for (MachineBasicBlock *PB : Preds) { if (PB != Latch) { Tmp2.clear(); bool NotAnalyzed = TII->analyzeBranch(*PB, TB, FB, Tmp2, false); diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp index b50a0e29ecae..ed4874baf7c8 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp @@ -1006,7 +1006,7 @@ static void packSegmentMask(ArrayRef<int> Mask, ArrayRef<unsigned> OutSegMap, static bool isPermutation(ArrayRef<int> Mask) { // Check by adding all numbers only works if there is no overflow. - assert(Mask.size() < 0x00007FFF && "Sanity failure"); + assert(Mask.size() < 0x00007FFF && "Overflow failure"); int Sum = 0; for (int Idx : Mask) { if (Idx == -1) @@ -1217,7 +1217,7 @@ OpRef HvxSelector::packs(ShuffleMask SM, OpRef Va, OpRef Vb, } else if (Seg0 == ~1u) { Seg0 = SegList[0] != Seg1 ? SegList[0] : SegList[1]; } else { - assert(Seg1 == ~1u); // Sanity + assert(Seg1 == ~1u); Seg1 = SegList[0] != Seg0 ? SegList[0] : SegList[1]; } } @@ -1265,7 +1265,7 @@ OpRef HvxSelector::packs(ShuffleMask SM, OpRef Va, OpRef Vb, } else { // BC or DA: this could be done via valign by SegLen. // Do nothing here, because valign (if possible) will be generated - // later on (make sure the Seg0 values are as expected, for sanity). + // later on (make sure the Seg0 values are as expected). assert(Seg0 == 1 || Seg0 == 3); } } @@ -1414,7 +1414,7 @@ OpRef HvxSelector::shuffs1(ShuffleMask SM, OpRef Va, ResultStack &Results) { return OpRef::undef(getSingleVT(MVT::i8)); unsigned HalfLen = HwLen / 2; - assert(isPowerOf2_32(HalfLen)); // Sanity. + assert(isPowerOf2_32(HalfLen)); // Handle special case where the output is the same half of the input // repeated twice, i.e. if Va = AB, then handle the output of AA or BB. diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp index 29572e3106d1..88effed9f076 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -442,8 +442,7 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, CLI.IsTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, IsVarArg, IsStructRet, StructAttrFlag, Outs, OutVals, Ins, DAG); - for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { - CCValAssign &VA = ArgLocs[i]; + for (const CCValAssign &VA : ArgLocs) { if (VA.isMemLoc()) { CLI.IsTailCall = false; break; @@ -2549,7 +2548,8 @@ HexagonTargetLowering::extractVector(SDValue VecV, SDValue IdxV, // Special case for v{8,4,2}i1 (the only boolean vectors legal in Hexagon // without any coprocessors). if (ElemWidth == 1) { - assert(VecWidth == VecTy.getVectorNumElements() && "Sanity failure"); + assert(VecWidth == VecTy.getVectorNumElements() && + "Vector elements should equal vector width size"); assert(VecWidth == 8 || VecWidth == 4 || VecWidth == 2); // Check if this is an extract of the lowest bit. if (IdxN) { @@ -2863,8 +2863,7 @@ HexagonTargetLowering::LowerCONCAT_VECTORS(SDValue Op, Scale /= 2; } - // Another sanity check. At this point there should only be two words - // left, and Scale should be 2. + // At this point there should only be two words left, and Scale should be 2. assert(Scale == 2 && Words[IdxW].size() == 2); SDValue WW = DAG.getNode(HexagonISD::COMBINE, dl, MVT::i64, diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp index 8900fca8bb78..f7237f496aee 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp @@ -9,6 +9,7 @@ #include "HexagonISelLowering.h" #include "HexagonRegisterInfo.h" #include "HexagonSubtarget.h" +#include "llvm/Analysis/MemoryLocation.h" #include "llvm/IR/IntrinsicsHexagon.h" #include "llvm/Support/CommandLine.h" @@ -1846,16 +1847,18 @@ HexagonTargetLowering::SplitHvxMemOp(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = MemN->getChain(); SDValue Base0 = MemN->getBasePtr(); SDValue Base1 = DAG.getMemBasePlusOffset(Base0, TypeSize::Fixed(HwLen), dl); + unsigned MemOpc = MemN->getOpcode(); MachineMemOperand *MOp0 = nullptr, *MOp1 = nullptr; if (MachineMemOperand *MMO = MemN->getMemOperand()) { MachineFunction &MF = DAG.getMachineFunction(); - MOp0 = MF.getMachineMemOperand(MMO, 0, HwLen); - MOp1 = MF.getMachineMemOperand(MMO, HwLen, HwLen); + uint64_t MemSize = (MemOpc == ISD::MLOAD || MemOpc == ISD::MSTORE) + ? (uint64_t)MemoryLocation::UnknownSize + : HwLen; + MOp0 = MF.getMachineMemOperand(MMO, 0, MemSize); + MOp1 = MF.getMachineMemOperand(MMO, HwLen, MemSize); } - unsigned MemOpc = MemN->getOpcode(); - if (MemOpc == ISD::LOAD) { assert(cast<LoadSDNode>(Op)->isUnindexed()); SDValue Load0 = DAG.getLoad(SingleTy, dl, Chain, Base0, MOp0); diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp index 76220eff4d51..b6984d40f78e 100644 --- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -193,9 +193,7 @@ static inline void parseOperands(const MachineInstr &MI, Defs.clear(); Uses.clear(); - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI.getOperand(i); - + for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg()) continue; @@ -1644,8 +1642,7 @@ bool HexagonInstrInfo::ClobbersPredicate(MachineInstr &MI, bool SkipDead) const { const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo(); - for (unsigned oper = 0; oper < MI.getNumOperands(); ++oper) { - MachineOperand MO = MI.getOperand(oper); + for (const MachineOperand &MO : MI.operands()) { if (MO.isReg()) { if (!MO.isDef()) continue; diff --git a/llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp b/llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp index 9507de95231f..987c4a5fa6c4 100644 --- a/llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp +++ b/llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp @@ -109,8 +109,7 @@ void llvm::HexagonLowerToMC(const MCInstrInfo &MCII, const MachineInstr *MI, assert(MCI->getOpcode() == static_cast<unsigned>(MI->getOpcode()) && "MCI opcode should have been set on construction"); - for (unsigned i = 0, e = MI->getNumOperands(); i < e; i++) { - const MachineOperand &MO = MI->getOperand(i); + for (const MachineOperand &MO : MI->operands()) { MCOperand MCO; bool MustExtend = MO.getTargetFlags() & HexagonII::HMOTF_ConstExtended; diff --git a/llvm/lib/Target/Hexagon/HexagonPeephole.cpp b/llvm/lib/Target/Hexagon/HexagonPeephole.cpp index fc31139e13ce..1ff248200572 100644 --- a/llvm/lib/Target/Hexagon/HexagonPeephole.cpp +++ b/llvm/lib/Target/Hexagon/HexagonPeephole.cpp @@ -120,16 +120,12 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { if (DisableHexagonPeephole) return false; // Loop over all of the basic blocks. - for (MachineFunction::iterator MBBb = MF.begin(), MBBe = MF.end(); - MBBb != MBBe; ++MBBb) { - MachineBasicBlock *MBB = &*MBBb; + for (MachineBasicBlock &MBB : MF) { PeepholeMap.clear(); PeepholeDoubleRegsMap.clear(); // Traverse the basic block. - for (auto I = MBB->begin(), E = MBB->end(), NextI = I; I != E; I = NextI) { - NextI = std::next(I); - MachineInstr &MI = *I; + for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) { // Look for sign extends: // %170 = SXTW %166 if (!DisableOptSZExt && MI.getOpcode() == Hexagon::A2_sxtw) { @@ -274,11 +270,11 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { if (NewOp) { Register PSrc = MI.getOperand(PR).getReg(); if (unsigned POrig = PeepholeMap.lookup(PSrc)) { - BuildMI(*MBB, MI.getIterator(), MI.getDebugLoc(), - QII->get(NewOp), MI.getOperand(0).getReg()) - .addReg(POrig) - .add(MI.getOperand(S2)) - .add(MI.getOperand(S1)); + BuildMI(MBB, MI.getIterator(), MI.getDebugLoc(), QII->get(NewOp), + MI.getOperand(0).getReg()) + .addReg(POrig) + .add(MI.getOperand(S2)) + .add(MI.getOperand(S1)); MRI->clearKillFlags(POrig); MI.eraseFromParent(); } diff --git a/llvm/lib/Target/Hexagon/HexagonStoreWidening.cpp b/llvm/lib/Target/Hexagon/HexagonStoreWidening.cpp index 93ba277b0c9d..2c5c64cfcfc6 100644 --- a/llvm/lib/Target/Hexagon/HexagonStoreWidening.cpp +++ b/llvm/lib/Target/Hexagon/HexagonStoreWidening.cpp @@ -400,8 +400,7 @@ bool HexagonStoreWidening::createWideStores(InstrGroup &OG, InstrGroup &NG, unsigned Acc = 0; // Value accumulator. unsigned Shift = 0; - for (InstrGroup::iterator I = OG.begin(), E = OG.end(); I != E; ++I) { - MachineInstr *MI = *I; + for (MachineInstr *MI : OG) { const MachineMemOperand &MMO = getStoreTarget(MI); MachineOperand &SO = MI->getOperand(2); // Source. assert(SO.isImm() && "Expecting an immediate operand"); diff --git a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp index 87b1c43961d7..ecb2f88d8096 100644 --- a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp +++ b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp @@ -305,8 +305,7 @@ void HexagonSubtarget::CallMutation::apply(ScheduleDAGInstrs *DAGInstrs) { VRegHoldingReg[MI->getOperand(0).getReg()] = MI->getOperand(1).getReg(); LastVRegUse.erase(MI->getOperand(1).getReg()); } else { - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); + for (const MachineOperand &MO : MI->operands()) { if (!MO.isReg()) continue; if (MO.isUse() && !MI->isCopy() && diff --git a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp index 897fb209a8bf..ea2798a3b44e 100644 --- a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp +++ b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp @@ -749,7 +749,6 @@ auto AlignVectors::realignGroup(const MoveGroup &Move) const -> bool { WithMaxAlign.ValTy, Adjust); int Diff = Start - (OffAtMax + Adjust); AlignVal = HVC.getConstInt(Diff); - // Sanity. assert(Diff >= 0); assert(static_cast<decltype(MinNeeded.value())>(Diff) < MinNeeded.value()); } else { diff --git a/llvm/lib/Target/Lanai/LanaiDelaySlotFiller.cpp b/llvm/lib/Target/Lanai/LanaiDelaySlotFiller.cpp index b9e577d201f9..cafe93bf8f4b 100644 --- a/llvm/lib/Target/Lanai/LanaiDelaySlotFiller.cpp +++ b/llvm/lib/Target/Lanai/LanaiDelaySlotFiller.cpp @@ -51,9 +51,8 @@ struct Filler : public MachineFunctionPass { TRI = Subtarget.getRegisterInfo(); bool Changed = false; - for (MachineFunction::iterator FI = MF.begin(), FE = MF.end(); FI != FE; - ++FI) - Changed |= runOnMachineBasicBlock(*FI); + for (MachineBasicBlock &MBB : MF) + Changed |= runOnMachineBasicBlock(MBB); return Changed; } @@ -200,8 +199,7 @@ bool Filler::delayHasHazard(MachineBasicBlock::instr_iterator MI, bool &SawLoad, assert((!MI->isCall() && !MI->isReturn()) && "Cannot put calls or returns in delay slot."); - for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) { - const MachineOperand &MO = MI->getOperand(I); + for (const MachineOperand &MO : MI->operands()) { unsigned Reg; if (!MO.isReg() || !(Reg = MO.getReg())) diff --git a/llvm/lib/Target/Lanai/LanaiFrameLowering.cpp b/llvm/lib/Target/Lanai/LanaiFrameLowering.cpp index 3a2d5030775e..3644eafe4353 100644 --- a/llvm/lib/Target/Lanai/LanaiFrameLowering.cpp +++ b/llvm/lib/Target/Lanai/LanaiFrameLowering.cpp @@ -65,17 +65,14 @@ void LanaiFrameLowering::replaceAdjDynAllocPseudo(MachineFunction &MF) const { *static_cast<const LanaiInstrInfo *>(STI.getInstrInfo()); unsigned MaxCallFrameSize = MF.getFrameInfo().getMaxCallFrameSize(); - for (MachineFunction::iterator MBB = MF.begin(), E = MF.end(); MBB != E; - ++MBB) { - MachineBasicBlock::iterator MBBI = MBB->begin(); - while (MBBI != MBB->end()) { - MachineInstr &MI = *MBBI++; + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) { if (MI.getOpcode() == Lanai::ADJDYNALLOC) { DebugLoc DL = MI.getDebugLoc(); Register Dst = MI.getOperand(0).getReg(); Register Src = MI.getOperand(1).getReg(); - BuildMI(*MBB, MI, DL, LII.get(Lanai::ADD_I_LO), Dst) + BuildMI(MBB, MI, DL, LII.get(Lanai::ADD_I_LO), Dst) .addReg(Src) .addImm(MaxCallFrameSize); MI.eraseFromParent(); diff --git a/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp b/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp index 21d035c7ee9c..4217b8509676 100644 --- a/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp +++ b/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp @@ -467,8 +467,7 @@ static MachineInstr *canFoldIntoSelect(Register Reg, return nullptr; // Check if MI has any non-dead defs or physreg uses. This also detects // predicated instructions which will be reading SR. - for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); + for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 1)) { // Reject frame index operands. if (MO.isFI() || MO.isCPI() || MO.isJTI()) return nullptr; diff --git a/llvm/lib/Target/Lanai/LanaiMCInstLower.cpp b/llvm/lib/Target/Lanai/LanaiMCInstLower.cpp index 743f4f7c6e2f..479c0b1f0358 100644 --- a/llvm/lib/Target/Lanai/LanaiMCInstLower.cpp +++ b/llvm/lib/Target/Lanai/LanaiMCInstLower.cpp @@ -93,9 +93,7 @@ MCOperand LanaiMCInstLower::LowerSymbolOperand(const MachineOperand &MO, void LanaiMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { OutMI.setOpcode(MI->getOpcode()); - for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) { - const MachineOperand &MO = MI->getOperand(I); - + for (const MachineOperand &MO : MI->operands()) { MCOperand MCOp; switch (MO.getType()) { case MachineOperand::MO_Register: diff --git a/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp b/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp index a83a5d2dfcc9..2a77a150f9aa 100644 --- a/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp +++ b/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp @@ -211,8 +211,8 @@ bool MSP430FrameLowering::restoreCalleeSavedRegisters( MachineFunction &MF = *MBB.getParent(); const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); - for (unsigned i = 0, e = CSI.size(); i != e; ++i) - BuildMI(MBB, MI, DL, TII.get(MSP430::POP16r), CSI[i].getReg()); + for (const CalleeSavedInfo &I : CSI) + BuildMI(MBB, MI, DL, TII.get(MSP430::POP16r), I.getReg()); return true; } diff --git a/llvm/lib/Target/MSP430/MSP430MCInstLower.cpp b/llvm/lib/Target/MSP430/MSP430MCInstLower.cpp index 1e57f33386e6..52c037de7660 100644 --- a/llvm/lib/Target/MSP430/MSP430MCInstLower.cpp +++ b/llvm/lib/Target/MSP430/MSP430MCInstLower.cpp @@ -115,9 +115,7 @@ LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const { void MSP430MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { OutMI.setOpcode(MI->getOpcode()); - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); - + for (const MachineOperand &MO : MI->operands()) { MCOperand MCOp; switch (MO.getType()) { default: diff --git a/llvm/lib/Target/Mips/Mips16FrameLowering.cpp b/llvm/lib/Target/Mips/Mips16FrameLowering.cpp index fefa1134b021..622f2039f9e4 100644 --- a/llvm/lib/Target/Mips/Mips16FrameLowering.cpp +++ b/llvm/lib/Target/Mips/Mips16FrameLowering.cpp @@ -72,10 +72,9 @@ void Mips16FrameLowering::emitPrologue(MachineFunction &MF, if (!CSI.empty()) { const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); - for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(), - E = CSI.end(); I != E; ++I) { - int64_t Offset = MFI.getObjectOffset(I->getFrameIdx()); - unsigned Reg = I->getReg(); + for (const CalleeSavedInfo &I : CSI) { + int64_t Offset = MFI.getObjectOffset(I.getFrameIdx()); + unsigned Reg = I.getReg(); unsigned DReg = MRI->getDwarfRegNum(Reg, true); unsigned CFIIndex = MF.addFrameInst( MCCFIInstruction::createOffset(nullptr, DReg, Offset)); @@ -119,13 +118,13 @@ bool Mips16FrameLowering::spillCalleeSavedRegisters( // will be saved with the "save" instruction // during emitPrologue // - for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + for (const CalleeSavedInfo &I : CSI) { // Add the callee-saved register as live-in. Do not add if the register is // RA and return address is taken, because it has already been added in // method MipsTargetLowering::lowerRETURNADDR. // It's killed at the spill, unless the register is RA and return address // is taken. - unsigned Reg = CSI[i].getReg(); + unsigned Reg = I.getReg(); bool IsRAAndRetAddrIsTaken = (Reg == Mips::RA) && MF->getFrameInfo().isReturnAddressTaken(); if (!IsRAAndRetAddrIsTaken) diff --git a/llvm/lib/Target/Mips/MipsAsmPrinter.cpp b/llvm/lib/Target/Mips/MipsAsmPrinter.cpp index 6d3f3adb2b7a..5d026785b921 100644 --- a/llvm/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/llvm/lib/Target/Mips/MipsAsmPrinter.cpp @@ -163,9 +163,8 @@ static void emitDirectiveRelocJalr(const MachineInstr &MI, TargetMachine &TM, MCStreamer &OutStreamer, const MipsSubtarget &Subtarget) { - for (unsigned int I = MI.getDesc().getNumOperands(), E = MI.getNumOperands(); - I < E; ++I) { - MachineOperand MO = MI.getOperand(I); + for (const MachineOperand &MO : + llvm::drop_begin(MI.operands(), MI.getDesc().getNumOperands())) { if (MO.isMCSymbol() && (MO.getTargetFlags() & MipsII::MO_JALR)) { MCSymbol *Callee = MO.getMCSymbol(); if (Callee && !Callee->getName().empty()) { diff --git a/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp b/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp index 8e619549f01c..491d379bfe0b 100644 --- a/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp +++ b/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp @@ -637,8 +637,8 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) { // has any inline assembly in it. If so, we have to be conservative about // alignment assumptions, as we don't know for sure the size of any // instructions in the inline assembly. - for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) - computeBlockSize(&*I); + for (MachineBasicBlock &MBB : *MF) + computeBlockSize(&MBB); // Compute block offsets. adjustBBOffsetsAfter(&MF->front()); @@ -730,8 +730,8 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) { continue; // Scan the instructions for constant pool operands. - for (unsigned op = 0, e = MI.getNumOperands(); op != e; ++op) - if (MI.getOperand(op).isCPI()) { + for (const MachineOperand &MO : MI.operands()) + if (MO.isCPI()) { // We found one. The addressing mode tells us the max displacement // from the PC that this instruction permits. @@ -759,7 +759,7 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) { break; } // Remember that this is a user of a CP entry. - unsigned CPI = MI.getOperand(op).getIndex(); + unsigned CPI = MO.getIndex(); MachineInstr *CPEMI = CPEMIs[CPI]; unsigned MaxOffs = ((1 << Bits)-1) * Scale; unsigned LongFormMaxOffs = ((1 << LongFormBits)-1) * LongFormScale; @@ -1066,9 +1066,9 @@ int MipsConstantIslands::findInRangeCPEntry(CPUser& U, unsigned UserOffset) // Point the CPUser node to the replacement U.CPEMI = CPEs[i].CPEMI; // Change the CPI in the instruction operand to refer to the clone. - for (unsigned j = 0, e = UserMI->getNumOperands(); j != e; ++j) - if (UserMI->getOperand(j).isCPI()) { - UserMI->getOperand(j).setIndex(CPEs[i].CPI); + for (MachineOperand &MO : UserMI->operands()) + if (MO.isCPI()) { + MO.setIndex(CPEs[i].CPI); break; } // Adjust the refcount of the clone... @@ -1122,9 +1122,9 @@ int MipsConstantIslands::findLongFormInRangeCPEntry // Point the CPUser node to the replacement U.CPEMI = CPEs[i].CPEMI; // Change the CPI in the instruction operand to refer to the clone. - for (unsigned j = 0, e = UserMI->getNumOperands(); j != e; ++j) - if (UserMI->getOperand(j).isCPI()) { - UserMI->getOperand(j).setIndex(CPEs[i].CPI); + for (MachineOperand &MO : UserMI->operands()) + if (MO.isCPI()) { + MO.setIndex(CPEs[i].CPI); break; } // Adjust the refcount of the clone... @@ -1392,9 +1392,9 @@ bool MipsConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) { adjustBBOffsetsAfter(&*--NewIsland->getIterator()); // Finally, change the CPI in the instruction operand to be ID. - for (unsigned i = 0, e = UserMI->getNumOperands(); i != e; ++i) - if (UserMI->getOperand(i).isCPI()) { - UserMI->getOperand(i).setIndex(ID); + for (MachineOperand &MO : UserMI->operands()) + if (MO.isCPI()) { + MO.setIndex(ID); break; } @@ -1633,10 +1633,10 @@ MipsConstantIslands::fixupConditionalBr(ImmBranch &Br) { void MipsConstantIslands::prescanForConstants() { unsigned J = 0; (void)J; - for (MachineFunction::iterator B = - MF->begin(), E = MF->end(); B != E; ++B) { - for (MachineBasicBlock::instr_iterator I = - B->instr_begin(), EB = B->instr_end(); I != EB; ++I) { + for (MachineBasicBlock &B : *MF) { + for (MachineBasicBlock::instr_iterator I = B.instr_begin(), + EB = B.instr_end(); + I != EB; ++I) { switch(I->getDesc().getOpcode()) { case Mips::LwConstant32: { PrescannedForConstants = true; diff --git a/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp b/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp index c2e3d7393a6d..2d27d7553de6 100644 --- a/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp +++ b/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp @@ -218,9 +218,8 @@ namespace { bool runOnMachineFunction(MachineFunction &F) override { TM = &F.getTarget(); bool Changed = false; - for (MachineFunction::iterator FI = F.begin(), FE = F.end(); - FI != FE; ++FI) - Changed |= runOnMachineBasicBlock(*FI); + for (MachineBasicBlock &MBB : F) + Changed |= runOnMachineBasicBlock(MBB); // This pass invalidates liveness information when it reorders // instructions to fill delay slot. Without this, -verify-machineinstrs diff --git a/llvm/lib/Target/Mips/MipsExpandPseudo.cpp b/llvm/lib/Target/Mips/MipsExpandPseudo.cpp index f72dc1da4131..31180d5a23ef 100644 --- a/llvm/lib/Target/Mips/MipsExpandPseudo.cpp +++ b/llvm/lib/Target/Mips/MipsExpandPseudo.cpp @@ -896,9 +896,8 @@ bool MipsExpandPseudo::runOnMachineFunction(MachineFunction &MF) { TII = STI->getInstrInfo(); bool Modified = false; - for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); MFI != E; - ++MFI) - Modified |= expandMBB(*MFI); + for (MachineBasicBlock &MBB : MF) + Modified |= expandMBB(MBB); if (Modified) MF.RenumberBlocks(); diff --git a/llvm/lib/Target/Mips/MipsFrameLowering.h b/llvm/lib/Target/Mips/MipsFrameLowering.h index 612b2b712fa8..710a3d40c38e 100644 --- a/llvm/lib/Target/Mips/MipsFrameLowering.h +++ b/llvm/lib/Target/Mips/MipsFrameLowering.h @@ -34,7 +34,10 @@ public: bool hasBP(const MachineFunction &MF) const; - bool isFPCloseToIncomingSP() const override { return false; } + bool allocateScavengingFrameIndexesNearIncomingSP( + const MachineFunction &MF) const override { + return false; + } bool enableShrinkWrapping(const MachineFunction &MF) const override { return true; diff --git a/llvm/lib/Target/Mips/MipsMCInstLower.cpp b/llvm/lib/Target/Mips/MipsMCInstLower.cpp index 66e04bda2af3..7b58cb90ab87 100644 --- a/llvm/lib/Target/Mips/MipsMCInstLower.cpp +++ b/llvm/lib/Target/Mips/MipsMCInstLower.cpp @@ -318,8 +318,7 @@ void MipsMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { OutMI.setOpcode(MI->getOpcode()); - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); + for (const MachineOperand &MO : MI->operands()) { MCOperand MCOp = LowerOperand(MO); if (MCOp.isValid()) diff --git a/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp b/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp index bb4b9c6fa6a7..193d071447ff 100644 --- a/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp +++ b/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp @@ -452,10 +452,9 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF, // Iterate over list of callee-saved registers and emit .cfi_offset // directives. - for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(), - E = CSI.end(); I != E; ++I) { - int64_t Offset = MFI.getObjectOffset(I->getFrameIdx()); - unsigned Reg = I->getReg(); + for (const CalleeSavedInfo &I : CSI) { + int64_t Offset = MFI.getObjectOffset(I.getFrameIdx()); + unsigned Reg = I.getReg(); // If Reg is a double precision register, emit two cfa_offsets, // one for each of the paired single precision registers. @@ -796,13 +795,13 @@ bool MipsSEFrameLowering::spillCalleeSavedRegisters( MachineFunction *MF = MBB.getParent(); const TargetInstrInfo &TII = *STI.getInstrInfo(); - for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + for (const CalleeSavedInfo &I : CSI) { // Add the callee-saved register as live-in. Do not add if the register is // RA and return address is taken, because it has already been added in // method MipsTargetLowering::lowerRETURNADDR. // It's killed at the spill, unless the register is RA and return address // is taken. - unsigned Reg = CSI[i].getReg(); + unsigned Reg = I.getReg(); bool IsRAAndRetAddrIsTaken = (Reg == Mips::RA || Reg == Mips::RA_64) && MF->getFrameInfo().isReturnAddressTaken(); if (!IsRAAndRetAddrIsTaken) @@ -831,8 +830,7 @@ bool MipsSEFrameLowering::spillCalleeSavedRegisters( // Insert the spill to the stack frame. bool IsKill = !IsRAAndRetAddrIsTaken; const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.storeRegToStackSlot(MBB, MI, Reg, IsKill, - CSI[i].getFrameIdx(), RC, TRI); + TII.storeRegToStackSlot(MBB, MI, Reg, IsKill, I.getFrameIdx(), RC, TRI); } return true; diff --git a/llvm/lib/Target/Mips/MipsSEISelLowering.cpp b/llvm/lib/Target/Mips/MipsSEISelLowering.cpp index 1fe6ab09804b..40b215a8204c 100644 --- a/llvm/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/llvm/lib/Target/Mips/MipsSEISelLowering.cpp @@ -3581,8 +3581,8 @@ MipsSETargetLowering::emitLD_F16_PSEUDO(MachineInstr &MI, MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, TII->get(UsingMips32 ? Mips::LH : Mips::LH64), Rt); - for (unsigned i = 1; i < MI.getNumOperands(); i++) - MIB.add(MI.getOperand(i)); + for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) + MIB.add(MO); if(!UsingMips32) { Register Tmp = RegInfo.createVirtualRegister(&Mips::GPR32RegClass); diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index aab6d2034f11..c35e67d6726f 100644 --- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -130,10 +130,8 @@ VisitGlobalVariableForEmission(const GlobalVariable *GV, for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i) DiscoverDependentGlobals(GV->getOperand(i), Others); - for (DenseSet<const GlobalVariable *>::iterator I = Others.begin(), - E = Others.end(); - I != E; ++I) - VisitGlobalVariableForEmission(*I, Order, Visited, Visiting); + for (const GlobalVariable *GV : Others) + VisitGlobalVariableForEmission(GV, Order, Visited, Visiting); // Now we can visit ourself Order.push_back(GV); @@ -699,35 +697,33 @@ static bool useFuncSeen(const Constant *C, void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) { DenseMap<const Function *, bool> seenMap; - for (Module::const_iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) { - const Function *F = &*FI; - - if (F->getAttributes().hasFnAttr("nvptx-libcall-callee")) { - emitDeclaration(F, O); + for (const Function &F : M) { + if (F.getAttributes().hasFnAttr("nvptx-libcall-callee")) { + emitDeclaration(&F, O); continue; } - if (F->isDeclaration()) { - if (F->use_empty()) + if (F.isDeclaration()) { + if (F.use_empty()) continue; - if (F->getIntrinsicID()) + if (F.getIntrinsicID()) continue; - emitDeclaration(F, O); + emitDeclaration(&F, O); continue; } - for (const User *U : F->users()) { + for (const User *U : F.users()) { if (const Constant *C = dyn_cast<Constant>(U)) { if (usedInGlobalVarDef(C)) { // The use is in the initialization of a global variable // that is a function pointer, so print a declaration // for the original function - emitDeclaration(F, O); + emitDeclaration(&F, O); break; } // Emit a declaration of this function if the function that // uses this constant expr has already been seen. if (useFuncSeen(C, seenMap)) { - emitDeclaration(F, O); + emitDeclaration(&F, O); break; } } @@ -746,11 +742,11 @@ void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) { // appearing in the module before the callee. so print out // a declaration for the callee. if (seenMap.find(caller) != seenMap.end()) { - emitDeclaration(F, O); + emitDeclaration(&F, O); break; } } - seenMap[F] = true; + seenMap[&F] = true; } } @@ -887,33 +883,11 @@ bool NVPTXAsmPrinter::doFinalization(Module &M) { GlobalsEmitted = true; } - // XXX Temproarily remove global variables so that doFinalization() will not - // emit them again (global variables are emitted at beginning). - - Module::GlobalListType &global_list = M.getGlobalList(); - int i, n = global_list.size(); - GlobalVariable **gv_array = new GlobalVariable *[n]; - - // first, back-up GlobalVariable in gv_array - i = 0; - for (Module::global_iterator I = global_list.begin(), E = global_list.end(); - I != E; ++I) - gv_array[i++] = &*I; - - // second, empty global_list - while (!global_list.empty()) - global_list.remove(global_list.begin()); - // call doFinalization bool ret = AsmPrinter::doFinalization(M); - // now we restore global variables - for (i = 0; i < n; i++) - global_list.insert(global_list.end(), gv_array[i]); - clearAnnotationCache(&M); - delete[] gv_array; // Close the last emitted section if (HasDebugInfo) { static_cast<NVPTXTargetStreamer *>(OutStreamer->getTargetStreamer()) diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h index 5d680e731e4a..2a3a38d7b2f1 100644 --- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h +++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h @@ -306,6 +306,11 @@ public: std::string getVirtualRegisterName(unsigned) const; const MCSymbol *getFunctionFrameSymbol() const override; + + // Make emitGlobalVariable() no-op for NVPTX. + // Global variables have been already emitted by the time the base AsmPrinter + // attempts to do so in doFinalization() (see NVPTXAsmPrinter::emitGlobals()). + void emitGlobalVariable(const GlobalVariable *GV) override {} }; } // end namespace llvm diff --git a/llvm/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp b/llvm/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp index a8a43cee9ab7..34b9dfe87cc2 100644 --- a/llvm/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp @@ -72,8 +72,7 @@ bool NVPTXAssignValidGlobalNames::runOnModule(Module &M) { std::string NVPTXAssignValidGlobalNames::cleanUpName(StringRef Name) { std::string ValidName; raw_string_ostream ValidNameStream(ValidName); - for (unsigned I = 0, E = Name.size(); I != E; ++I) { - char C = Name[I]; + for (char C : Name) { if (C == '.' || C == '@') { ValidNameStream << "_$_"; } else { diff --git a/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp b/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp index e404cead344b..f4934f0bc20b 100644 --- a/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp @@ -56,23 +56,16 @@ bool NVPTXReplaceImageHandles::runOnMachineFunction(MachineFunction &MF) { bool Changed = false; InstrsToRemove.clear(); - for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE; - ++BI) { - for (MachineBasicBlock::iterator I = (*BI).begin(), E = (*BI).end(); - I != E; ++I) { - MachineInstr &MI = *I; + for (MachineBasicBlock &MBB : MF) + for (MachineInstr &MI : MBB) Changed |= processInstr(MI); - } - } // Now clean up any handle-access instructions // This is needed in debug mode when code cleanup passes are not executed, // but we need the handle access to be eliminated because they are not // valid instructions when image handles are disabled. - for (DenseSet<MachineInstr *>::iterator I = InstrsToRemove.begin(), - E = InstrsToRemove.end(); I != E; ++I) { - (*I)->eraseFromParent(); - } + for (MachineInstr *MI : InstrsToRemove) + MI->eraseFromParent(); return Changed; } diff --git a/llvm/lib/Target/PowerPC/P10InstrResources.td b/llvm/lib/Target/PowerPC/P10InstrResources.td index f43ba00ec373..f3ae0010ad8e 100644 --- a/llvm/lib/Target/PowerPC/P10InstrResources.td +++ b/llvm/lib/Target/PowerPC/P10InstrResources.td @@ -626,7 +626,9 @@ def : InstRW<[P10W_DX_5C, P10W_DISP_ANY, P10DX_Read, P10DX_Read], // 5 Cycles Fixed-Point and BCD operations, 3 input operands def : InstRW<[P10W_DX_5C, P10W_DISP_ANY, P10DX_Read, P10DX_Read, P10DX_Read], (instrs + BCDADD_rec, BCDS_rec, + BCDSUB_rec, BCDTRUNC_rec, VADDECUQ, VADDEUQM, @@ -1974,7 +1976,7 @@ def : InstRW<[P10W_SX, P10W_DISP_ANY], ICBLQ, ICBTLS, ICCCI, - LA, + LA, LA8, LDMX, MFDCR, MFPMR, @@ -2073,3 +2075,4 @@ def : InstRW<[P10W_vMU_7C, P10W_DISP_ANY, P10vMU_Read, P10vMU_Read, P10vMU_Read] VMSUMUHM, VMSUMUHS )>; + diff --git a/llvm/lib/Target/PowerPC/P9InstrResources.td b/llvm/lib/Target/PowerPC/P9InstrResources.td index c4f4a2b3d796..f7c049951c54 100644 --- a/llvm/lib/Target/PowerPC/P9InstrResources.td +++ b/llvm/lib/Target/PowerPC/P9InstrResources.td @@ -151,6 +151,7 @@ def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C], (instregex "ADD(4|8)(TLS)?(_)?$"), (instregex "NEG(8)?(O)?$"), (instregex "ADDI(S)?toc(HA|L)(8)?$"), + (instregex "LA(8)?$"), COPY, MCRF, MCRXRX, @@ -165,7 +166,6 @@ def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C], SRADI_32, RLDIC, RFEBB, - LA, TBEGIN, TRECHKPT, NOP, @@ -624,7 +624,9 @@ def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C], BCDS_rec, BCDTRUNC_rec, BCDUS_rec, - BCDUTRUNC_rec + BCDUTRUNC_rec, + BCDADD_rec, + BCDSUB_rec )>; // 12 Cycle DFU operation. Only one DFU unit per CPU so we use a whole diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td index a1ff20bb3612..422bd11dca52 100644 --- a/llvm/lib/Target/PowerPC/PPC.td +++ b/llvm/lib/Target/PowerPC/PPC.td @@ -203,6 +203,22 @@ def FeatureLogicalFusion : SubtargetFeature<"fuse-logical", "HasLogicalFusion", "true", "Target supports Logical Operations fusion", [FeatureFusion]>; +def FeatureSha3Fusion : + SubtargetFeature<"fuse-sha3", "HasSha3Fusion", "true", + "Target supports SHA3 assist fusion", + [FeatureFusion]>; +def FeatureCompareFusion: + SubtargetFeature<"fuse-cmp", "HasCompareFusion", "true", + "Target supports Comparison Operations fusion", + [FeatureFusion]>; +def FeatureWideImmFusion: + SubtargetFeature<"fuse-wideimm", "HasWideImmFusion", "true", + "Target supports Wide-Immediate fusion", + [FeatureFusion]>; +def FeatureZeroMoveFusion: + SubtargetFeature<"fuse-zeromove", "HasZeroMoveFusion", "true", + "Target supports move to SPR with branch fusion", + [FeatureFusion]>; def FeatureUnalignedFloats : SubtargetFeature<"allow-unaligned-fp-access", "AllowsUnalignedFPAccess", "true", "CPU does not trap on unaligned FP access">; @@ -393,7 +409,7 @@ def ProcessorFeatures { // still exist with the exception of those we know are Power9 specific. list<SubtargetFeature> FusionFeatures = [ FeatureStoreFusion, FeatureAddLogicalFusion, FeatureLogicalAddFusion, - FeatureLogicalFusion, FeatureArithAddFusion + FeatureLogicalFusion, FeatureArithAddFusion, FeatureSha3Fusion, ]; list<SubtargetFeature> P10AdditionalFeatures = !listconcat(FusionFeatures, [ diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp index a76963abb8e4..16e3b2b85c2e 100644 --- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -875,18 +875,19 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) { EmitToStreamer(*OutStreamer, TmpInst); return; } - case PPC::ADDItoc: { + case PPC::ADDItoc: + case PPC::ADDItoc8: { assert(IsAIX && TM.getCodeModel() == CodeModel::Small && - "Operand only valid in AIX 32 bit mode"); + "PseudoOp only valid for small code model AIX"); - // Transform %rN = ADDItoc @op1, %r2. + // Transform %rN = ADDItoc/8 @op1, %r2. LowerPPCMachineInstrToMCInst(MI, TmpInst, *this); // Change the opcode to load address. - TmpInst.setOpcode(PPC::LA); + TmpInst.setOpcode((!IsPPC64) ? (PPC::LA) : (PPC::LA8)); const MachineOperand &MO = MI->getOperand(1); - assert(MO.isGlobal() && "Invalid operand for ADDItoc."); + assert(MO.isGlobal() && "Invalid operand for ADDItoc[8]."); // Map the operand to its corresponding MCSymbol. const MCSymbol *const MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this); diff --git a/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp b/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp index fa6713dcca80..4cac0e3551f6 100644 --- a/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp +++ b/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp @@ -120,16 +120,13 @@ unsigned PPCBSel::ComputeBlockSizes(MachineFunction &Fn) { static_cast<const PPCInstrInfo *>(Fn.getSubtarget().getInstrInfo()); unsigned FuncSize = GetInitialOffset(Fn); - for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; - ++MFI) { - MachineBasicBlock *MBB = &*MFI; - + for (MachineBasicBlock &MBB : Fn) { // The end of the previous block may have extra nops if this block has an // alignment requirement. - if (MBB->getNumber() > 0) { - unsigned AlignExtra = GetAlignmentAdjustment(*MBB, FuncSize); + if (MBB.getNumber() > 0) { + unsigned AlignExtra = GetAlignmentAdjustment(MBB, FuncSize); - auto &BS = BlockSizes[MBB->getNumber()-1]; + auto &BS = BlockSizes[MBB.getNumber()-1]; BS.first += AlignExtra; BS.second = AlignExtra; @@ -138,10 +135,10 @@ unsigned PPCBSel::ComputeBlockSizes(MachineFunction &Fn) { unsigned BlockSize = 0; unsigned UnalignedBytesRemaining = 0; - for (MachineInstr &MI : *MBB) { + for (MachineInstr &MI : MBB) { unsigned MINumBytes = TII->getInstSizeInBytes(MI); if (MI.isInlineAsm() && (FirstImpreciseBlock < 0)) - FirstImpreciseBlock = MBB->getNumber(); + FirstImpreciseBlock = MBB.getNumber(); if (TII->isPrefixed(MI.getOpcode())) { NumPrefixed++; @@ -171,7 +168,7 @@ unsigned PPCBSel::ComputeBlockSizes(MachineFunction &Fn) { BlockSize += MINumBytes; } - BlockSizes[MBB->getNumber()].first = BlockSize; + BlockSizes[MBB.getNumber()].first = BlockSize; FuncSize += BlockSize; } @@ -181,16 +178,13 @@ unsigned PPCBSel::ComputeBlockSizes(MachineFunction &Fn) { /// Modify the basic block align adjustment. void PPCBSel::modifyAdjustment(MachineFunction &Fn) { unsigned Offset = GetInitialOffset(Fn); - for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; - ++MFI) { - MachineBasicBlock *MBB = &*MFI; - - if (MBB->getNumber() > 0) { - auto &BS = BlockSizes[MBB->getNumber()-1]; + for (MachineBasicBlock &MBB : Fn) { + if (MBB.getNumber() > 0) { + auto &BS = BlockSizes[MBB.getNumber()-1]; BS.first -= BS.second; Offset -= BS.second; - unsigned AlignExtra = GetAlignmentAdjustment(*MBB, Offset); + unsigned AlignExtra = GetAlignmentAdjustment(MBB, Offset); BS.first += AlignExtra; BS.second = AlignExtra; @@ -198,7 +192,7 @@ void PPCBSel::modifyAdjustment(MachineFunction &Fn) { Offset += AlignExtra; } - Offset += BlockSizes[MBB->getNumber()].first; + Offset += BlockSizes[MBB.getNumber()].first; } } diff --git a/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp b/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp index b9518d6d7064..b1f5bdd885cd 100644 --- a/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp +++ b/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp @@ -81,8 +81,7 @@ FunctionPass *llvm::createPPCCTRLoopsVerify() { } static bool clobbersCTR(const MachineInstr &MI) { - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI.getOperand(i); + for (const MachineOperand &MO : MI.operands()) { if (MO.isReg()) { if (MO.isDef() && (MO.getReg() == PPC::CTR || MO.getReg() == PPC::CTR8)) return true; @@ -167,18 +166,16 @@ bool PPCCTRLoopsVerify::runOnMachineFunction(MachineFunction &MF) { // Verify that all bdnz/bdz instructions are dominated by a loop mtctr before // any other instructions that might clobber the ctr register. - for (MachineFunction::iterator I = MF.begin(), IE = MF.end(); - I != IE; ++I) { - MachineBasicBlock *MBB = &*I; - if (!MDT->isReachableFromEntry(MBB)) + for (MachineBasicBlock &MBB : MF) { + if (!MDT->isReachableFromEntry(&MBB)) continue; - for (MachineBasicBlock::iterator MII = MBB->getFirstTerminator(), - MIIE = MBB->end(); MII != MIIE; ++MII) { + for (MachineBasicBlock::iterator MII = MBB.getFirstTerminator(), + MIIE = MBB.end(); MII != MIIE; ++MII) { unsigned Opc = MII->getOpcode(); if (Opc == PPC::BDNZ8 || Opc == PPC::BDNZ || Opc == PPC::BDZ8 || Opc == PPC::BDZ) - if (!verifyCTRBranch(MBB, MII)) + if (!verifyCTRBranch(&MBB, MII)) llvm_unreachable("Invalid PPC CTR loop!"); } } diff --git a/llvm/lib/Target/PowerPC/PPCExpandAtomicPseudoInsts.cpp b/llvm/lib/Target/PowerPC/PPCExpandAtomicPseudoInsts.cpp index be4c9dd60b00..a9794ddd0566 100644 --- a/llvm/lib/Target/PowerPC/PPCExpandAtomicPseudoInsts.cpp +++ b/llvm/lib/Target/PowerPC/PPCExpandAtomicPseudoInsts.cpp @@ -74,8 +74,7 @@ bool PPCExpandAtomicPseudo::runOnMachineFunction(MachineFunction &MF) { bool Changed = false; TII = static_cast<const PPCInstrInfo *>(MF.getSubtarget().getInstrInfo()); TRI = &TII->getRegisterInfo(); - for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { - MachineBasicBlock &MBB = *I; + for (MachineBasicBlock &MBB : MF) { for (MachineBasicBlock::iterator MBBI = MBB.begin(), MBBE = MBB.end(); MBBI != MBBE;) { MachineInstr &MI = *MBBI; diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp index fc3c7ec35b8d..3ca563fee970 100644 --- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -391,9 +391,8 @@ void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const { unsigned BPReg = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg; unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FP8Reg; - for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); - BI != BE; ++BI) - for (MachineBasicBlock::iterator MBBI = BI->end(); MBBI != BI->begin(); ) { + for (MachineBasicBlock &MBB : MF) + for (MachineBasicBlock::iterator MBBI = MBB.end(); MBBI != MBB.begin();) { --MBBI; for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) { MachineOperand &MO = MBBI->getOperand(I); @@ -1172,8 +1171,8 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, // Describe where callee saved registers were saved, at fixed offsets from // CFA. const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); - for (unsigned I = 0, E = CSI.size(); I != E; ++I) { - unsigned Reg = CSI[I].getReg(); + for (const CalleeSavedInfo &I : CSI) { + unsigned Reg = I.getReg(); if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue; // This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just @@ -1204,15 +1203,15 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, continue; } - if (CSI[I].isSpilledToReg()) { - unsigned SpilledReg = CSI[I].getDstReg(); + if (I.isSpilledToReg()) { + unsigned SpilledReg = I.getDstReg(); unsigned CFIRegister = MF.addFrameInst(MCCFIInstruction::createRegister( nullptr, MRI->getDwarfRegNum(Reg, true), MRI->getDwarfRegNum(SpilledReg, true))); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIRegister); } else { - int64_t Offset = MFI.getObjectOffset(CSI[I].getFrameIdx()); + int64_t Offset = MFI.getObjectOffset(I.getFrameIdx()); // We have changed the object offset above but we do not want to change // the actual offsets in the CFI instruction so we have to undo the // offset change here. @@ -2085,15 +2084,15 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, SmallVector<CalleeSavedInfo, 18> FPRegs; SmallVector<CalleeSavedInfo, 18> VRegs; - for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - unsigned Reg = CSI[i].getReg(); + for (const CalleeSavedInfo &I : CSI) { + unsigned Reg = I.getReg(); assert((!MF.getInfo<PPCFunctionInfo>()->mustSaveTOC() || (Reg != PPC::X2 && Reg != PPC::R2)) && "Not expecting to try to spill R2 in a function that must save TOC"); if (PPC::GPRCRegClass.contains(Reg)) { HasGPSaveArea = true; - GPRegs.push_back(CSI[i]); + GPRegs.push_back(I); if (Reg < MinGPR) { MinGPR = Reg; @@ -2101,7 +2100,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, } else if (PPC::G8RCRegClass.contains(Reg)) { HasG8SaveArea = true; - G8Regs.push_back(CSI[i]); + G8Regs.push_back(I); if (Reg < MinG8R) { MinG8R = Reg; @@ -2109,7 +2108,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, } else if (PPC::F8RCRegClass.contains(Reg)) { HasFPSaveArea = true; - FPRegs.push_back(CSI[i]); + FPRegs.push_back(I); if (Reg < MinFPR) { MinFPR = Reg; @@ -2123,7 +2122,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, // alignment requirements, so overload the save area for both cases. HasVRSaveArea = true; - VRegs.push_back(CSI[i]); + VRegs.push_back(I); if (Reg < MinVR) { MinVR = Reg; @@ -2395,8 +2394,8 @@ bool PPCFrameLowering::spillCalleeSavedRegisters( } }); - for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - unsigned Reg = CSI[i].getReg(); + for (const CalleeSavedInfo &I : CSI) { + unsigned Reg = I.getReg(); // CR2 through CR4 are the nonvolatile CR fields. bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4; @@ -2439,11 +2438,11 @@ bool PPCFrameLowering::spillCalleeSavedRegisters( MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW)) .addReg(PPC::R12, getKillRegState(true)), - CSI[i].getFrameIdx())); + I.getFrameIdx())); } } else { - if (CSI[i].isSpilledToReg()) { - unsigned Dst = CSI[i].getDstReg(); + if (I.isSpilledToReg()) { + unsigned Dst = I.getDstReg(); if (Spilled[Dst]) continue; @@ -2478,9 +2477,9 @@ bool PPCFrameLowering::spillCalleeSavedRegisters( if (Subtarget.needsSwapsForVSXMemOps() && !MF->getFunction().hasFnAttribute(Attribute::NoUnwind)) TII.storeRegToStackSlotNoUpd(MBB, MI, Reg, !IsLiveIn, - CSI[i].getFrameIdx(), RC, TRI); + I.getFrameIdx(), RC, TRI); else - TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn, CSI[i].getFrameIdx(), + TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn, I.getFrameIdx(), RC, TRI); } } diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 0abdf81d0908..a2664bcff4ab 100644 --- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -510,14 +510,12 @@ static bool hasTocDataAttr(SDValue Val, unsigned PointerSize) { return false; // TODO: These asserts should be updated as more support for the toc data - // transformation is added (64 bit, struct support, etc.). + // transformation is added (struct support, etc.). - assert(PointerSize == 4 && "Only 32 Bit Codegen is currently supported by " - "the toc data transformation."); - - assert(PointerSize >= GV->getAlign().valueOrOne().value() && - "GlobalVariables with an alignment requirement stricter then 4-bytes " - "not supported by the toc data transformation."); + assert( + PointerSize >= GV->getAlign().valueOrOne().value() && + "GlobalVariables with an alignment requirement stricter than TOC entry " + "size not supported by the toc data transformation."); Type *GVType = GV->getValueType(); @@ -537,7 +535,7 @@ static bool hasTocDataAttr(SDValue Val, unsigned PointerSize) { "supported by the toc data transformation."); assert(GVType->getPrimitiveSizeInBits() <= PointerSize * 8 && - "A GlobalVariable with size larger than 32 bits is not currently " + "A GlobalVariable with size larger than a TOC entry is not currently " "supported by the toc data transformation."); if (GV->hasLocalLinkage() || GV->hasPrivateLinkage()) @@ -5049,16 +5047,94 @@ void PPCDAGToDAGISel::Select(SDNode *N) { // value for the comparison. When selecting through a .td file, a type // error is raised. Must check this first so we never break on the // !Subtarget->isISA3_1() check. - if (N->getConstantOperandVal(0) == Intrinsic::ppc_fsels) { + auto IntID = N->getConstantOperandVal(0); + if (IntID == Intrinsic::ppc_fsels) { SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3)}; CurDAG->SelectNodeTo(N, PPC::FSELS, MVT::f32, Ops); return; } + if (IntID == Intrinsic::ppc_bcdadd_p || IntID == Intrinsic::ppc_bcdsub_p) { + auto Pred = N->getConstantOperandVal(1); + unsigned Opcode = + IntID == Intrinsic::ppc_bcdadd_p ? PPC::BCDADD_rec : PPC::BCDSUB_rec; + unsigned SubReg = 0; + unsigned ShiftVal = 0; + bool Reverse = false; + switch (Pred) { + case 0: + SubReg = PPC::sub_eq; + ShiftVal = 1; + break; + case 1: + SubReg = PPC::sub_eq; + ShiftVal = 1; + Reverse = true; + break; + case 2: + SubReg = PPC::sub_lt; + ShiftVal = 3; + break; + case 3: + SubReg = PPC::sub_lt; + ShiftVal = 3; + Reverse = true; + break; + case 4: + SubReg = PPC::sub_gt; + ShiftVal = 2; + break; + case 5: + SubReg = PPC::sub_gt; + ShiftVal = 2; + Reverse = true; + break; + case 6: + SubReg = PPC::sub_un; + break; + case 7: + SubReg = PPC::sub_un; + Reverse = true; + break; + } + + EVT VTs[] = {MVT::v16i8, MVT::Glue}; + SDValue Ops[] = {N->getOperand(2), N->getOperand(3), + CurDAG->getTargetConstant(0, dl, MVT::i32)}; + SDValue BCDOp = SDValue(CurDAG->getMachineNode(Opcode, dl, VTs, Ops), 0); + SDValue CR6Reg = CurDAG->getRegister(PPC::CR6, MVT::i32); + // On Power10, we can use SETBC[R]. On prior architectures, we have to use + // MFOCRF and shift/negate the value. + if (Subtarget->isISA3_1()) { + SDValue SubRegIdx = CurDAG->getTargetConstant(SubReg, dl, MVT::i32); + SDValue CRBit = SDValue( + CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1, + CR6Reg, SubRegIdx, BCDOp.getValue(1)), + 0); + CurDAG->SelectNodeTo(N, Reverse ? PPC::SETBCR : PPC::SETBC, MVT::i32, + CRBit); + } else { + SDValue Move = + SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR6Reg, + BCDOp.getValue(1)), + 0); + SDValue Ops[] = {Move, getI32Imm((32 - (4 + ShiftVal)) & 31, dl), + getI32Imm(31, dl), getI32Imm(31, dl)}; + if (!Reverse) + CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); + else { + SDValue Shift = SDValue( + CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0); + CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Shift, getI32Imm(1, dl)); + } + } + return; + } + if (!Subtarget->isISA3_1()) break; unsigned Opcode = 0; - switch (N->getConstantOperandVal(0)) { + switch (IntID) { default: break; case Intrinsic::ppc_altivec_vstribr_p: @@ -5713,41 +5789,57 @@ void PPCDAGToDAGISel::Select(SDNode *N) { if (isAIXABI && CModel == CodeModel::Medium) report_fatal_error("Medium code model is not supported on AIX."); - // For 64-bit small code model, we allow SelectCodeCommon to handle this, - // selecting one of LDtoc, LDtocJTI, LDtocCPT, and LDtocBA. - if (isPPC64 && CModel == CodeModel::Small) + // For 64-bit ELF small code model, we allow SelectCodeCommon to handle + // this, selecting one of LDtoc, LDtocJTI, LDtocCPT, and LDtocBA. For AIX + // small code model, we need to check for a toc-data attribute. + if (isPPC64 && !isAIXABI && CModel == CodeModel::Small) break; + auto replaceWith = [this, &dl](unsigned OpCode, SDNode *TocEntry, + EVT OperandTy) { + SDValue GA = TocEntry->getOperand(0); + SDValue TocBase = TocEntry->getOperand(1); + SDNode *MN = CurDAG->getMachineNode(OpCode, dl, OperandTy, GA, TocBase); + transferMemOperands(TocEntry, MN); + ReplaceNode(TocEntry, MN); + }; + // Handle 32-bit small code model. - if (!isPPC64) { + if (!isPPC64 && CModel == CodeModel::Small) { // Transforms the ISD::TOC_ENTRY node to passed in Opcode, either // PPC::ADDItoc, or PPC::LWZtoc - auto replaceWith = [this, &dl](unsigned OpCode, SDNode *TocEntry) { - SDValue GA = TocEntry->getOperand(0); - SDValue TocBase = TocEntry->getOperand(1); - SDNode *MN = CurDAG->getMachineNode(OpCode, dl, MVT::i32, GA, TocBase); - transferMemOperands(TocEntry, MN); - ReplaceNode(TocEntry, MN); - }; - if (isELFABI) { assert(TM.isPositionIndependent() && "32-bit ELF can only have TOC entries in position independent" " code."); // 32-bit ELF always uses a small code model toc access. - replaceWith(PPC::LWZtoc, N); + replaceWith(PPC::LWZtoc, N, MVT::i32); return; } - if (isAIXABI && CModel == CodeModel::Small) { - if (hasTocDataAttr(N->getOperand(0), - CurDAG->getDataLayout().getPointerSize())) - replaceWith(PPC::ADDItoc, N); - else - replaceWith(PPC::LWZtoc, N); + assert(isAIXABI && "ELF ABI already handled"); + if (hasTocDataAttr(N->getOperand(0), + CurDAG->getDataLayout().getPointerSize())) { + replaceWith(PPC::ADDItoc, N, MVT::i32); return; } + + replaceWith(PPC::LWZtoc, N, MVT::i32); + return; + } + + if (isPPC64 && CModel == CodeModel::Small) { + assert(isAIXABI && "ELF ABI handled in common SelectCode"); + + if (hasTocDataAttr(N->getOperand(0), + CurDAG->getDataLayout().getPointerSize())) { + replaceWith(PPC::ADDItoc8, N, MVT::i64); + return; + } + // Break if it doesn't have toc data attribute. Proceed with common + // SelectCode. + break; } assert(CModel != CodeModel::Small && "All small code models handled."); diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index ac952b240a48..ec7e30d7e362 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -12116,6 +12116,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MachineFunction::iterator It = ++BB->getIterator(); MachineFunction *F = BB->getParent(); + MachineRegisterInfo &MRI = F->getRegInfo(); if (MI.getOpcode() == PPC::SELECT_CC_I4 || MI.getOpcode() == PPC::SELECT_CC_I8 || MI.getOpcode() == PPC::SELECT_I4 || @@ -12721,7 +12722,10 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, Register OldFPSCRReg = MI.getOperand(0).getReg(); // Save FPSCR value. - BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg); + if (MRI.use_empty(OldFPSCRReg)) + BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg); + else + BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg); // The floating point rounding mode is in the bits 62:63 of FPCSR, and has // the following settings: @@ -12854,7 +12858,10 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, // Result of setflm is previous FPSCR content, so we need to save it first. Register OldFPSCRReg = MI.getOperand(0).getReg(); - BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg); + if (MRI.use_empty(OldFPSCRReg)) + BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg); + else + BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg); // Put bits in 32:63 to FPSCR. Register NewFPSCRReg = MI.getOperand(1).getReg(); @@ -15966,8 +15973,11 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, } break; case 'v': - if (Subtarget.hasAltivec()) + if (Subtarget.hasAltivec() && VT.isVector()) return std::make_pair(0U, &PPC::VRRCRegClass); + else if (Subtarget.hasVSX()) + // Scalars in Altivec registers only make sense with VSX. + return std::make_pair(0U, &PPC::VFRCRegClass); break; case 'y': // crrc return std::make_pair(0U, &PPC::CRRCRegClass); @@ -17664,6 +17674,24 @@ PPC::AddrMode PPCTargetLowering::SelectForceXFormMode(SDValue N, SDValue &Disp, return Mode; } +bool PPCTargetLowering::splitValueIntoRegisterParts( + SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, + unsigned NumParts, MVT PartVT, Optional<CallingConv::ID> CC) const { + EVT ValVT = Val.getValueType(); + // If we are splitting a scalar integer into f64 parts (i.e. so they + // can be placed into VFRC registers), we need to zero extend and + // bitcast the values. This will ensure the value is placed into a + // VSR using direct moves or stack operations as needed. + if (PartVT == MVT::f64 && + (ValVT == MVT::i32 || ValVT == MVT::i16 || ValVT == MVT::i8)) { + Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val); + Val = DAG.getNode(ISD::BITCAST, DL, MVT::f64, Val); + Parts[0] = Val; + return true; + } + return false; +} + // If we happen to match to an aligned D-Form, check if the Frame Index is // adequately aligned. If it is not, reset the mode to match to X-Form. static void setXFormForUnalignedFI(SDValue N, unsigned Flags, diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index 34dce2c3172d..87b7f96112ec 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -1139,6 +1139,10 @@ namespace llvm { PPC::AddrMode SelectForceXFormMode(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const; + bool + splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, + SDValue *Parts, unsigned NumParts, MVT PartVT, + Optional<CallingConv::ID> CC) const override; /// Structure that collects some common arguments that get passed around /// between the functions for call lowering. struct CallFlags { diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td index 417a6ce7e522..58af8037f59c 100644 --- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td @@ -773,6 +773,11 @@ def ADDIS8 : DForm_2<15, (outs g8rc:$rD), (ins g8rc_nox0:$rA, s17imm64:$imm), "addis $rD, $rA, $imm", IIC_IntSimple, [(set i64:$rD, (add i64:$rA, imm16ShiftedSExt:$imm))]>; +def LA8 : DForm_2<14, (outs g8rc:$rD), (ins g8rc_nox0:$rA, s16imm64:$sym), + "la $rD, $sym($rA)", IIC_IntGeneral, + [(set i64:$rD, (add i64:$rA, + (PPClo tglobaladdr:$sym, 0)))]>; + let Defs = [CARRY] in { def SUBFIC8: DForm_2< 8, (outs g8rc:$rD), (ins g8rc:$rA, s16imm64:$imm), "subfic $rD, $rA, $imm", IIC_IntGeneral, @@ -1435,6 +1440,13 @@ def ADDIStocHA8: PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentr def ADDItocL: PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp), "#ADDItocL", []>, isPPC64; } + +// Local Data Transform +def ADDItoc8 : PPCEmitTimePseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc_nox0:$reg), + "#ADDItoc8", + [(set i64:$rD, + (PPCtoc_entry tglobaladdr:$disp, i64:$reg))]>, isPPC64; + let mayLoad = 1 in def LDtocL: PPCEmitTimePseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc_nox0:$reg), "#LDtocL", []>, isPPC64; diff --git a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td index 1e0e2d88e54b..fe21a164dfab 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td @@ -1161,6 +1161,22 @@ def : Pat<(v16i8 (srl (sub v16i8:$vA, (v16i8 (bitconvert(vnot v4i32:$vB)))), } // end HasAltivec +// [PO VRT VRA VRB 1 PS XO], "_o" means CR6 is set. +class VX_VT5_VA5_VB5_PS1_XO9_o<bits<9> xo, string opc, list<dag> pattern> + : VX_RD5_RSp5_PS1_XO9<xo, + (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, u1imm:$PS), + !strconcat(opc, " $vD, $vA, $vB, $PS"), IIC_VecFP, pattern> { + let Defs = [CR6]; +} + +// [PO VRT VRA VRB 1 / XO] +class VX_VT5_VA5_VB5_XO9_o<bits<9> xo, string opc, list<dag> pattern> + : VX_RD5_RSp5_PS1_XO9<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + !strconcat(opc, " $vD, $vA, $vB"), IIC_VecFP, pattern> { + let Defs = [CR6]; + let PS = 0; +} + def HasP8Altivec : Predicate<"Subtarget->hasP8Altivec()">; def HasP8Crypto : Predicate<"Subtarget->hasP8Crypto()">; let Predicates = [HasP8Altivec] in { @@ -1351,6 +1367,13 @@ def VUPKHSW : VX2_Int_Ty2<1614, "vupkhsw", int_ppc_altivec_vupkhsw, v2i64, v4i32>; def VUPKLSW : VX2_Int_Ty2<1742, "vupklsw", int_ppc_altivec_vupklsw, v2i64, v4i32>; +def BCDADD_rec : VX_VT5_VA5_VB5_PS1_XO9_o<1, "bcdadd." , []>; +def BCDSUB_rec : VX_VT5_VA5_VB5_PS1_XO9_o<65, "bcdsub." , []>; + +def : Pat<(v16i8 (int_ppc_bcdadd v16i8:$vA, v16i8:$vB, timm:$PS)), + (BCDADD_rec $vA, $vB, $PS)>; +def : Pat<(v16i8 (int_ppc_bcdsub v16i8:$vA, v16i8:$vB, timm:$PS)), + (BCDSUB_rec $vA, $vB, $PS)>; // Shuffle patterns for unary and swapped (LE) vector pack modulo. def:Pat<(vpkudum_unary_shuffle v16i8:$vA, undef), @@ -1598,22 +1621,6 @@ def BCDCPSGN_rec : VX1_VT5_VA5_VB5<833, "bcdcpsgn.", []>; def BCDSETSGN_rec : VX_VT5_EO5_VB5_PS1_XO9_o<31, 385, "bcdsetsgn.", []>; -// [PO VRT VRA VRB 1 PS XO], "_o" means CR6 is set. -class VX_VT5_VA5_VB5_PS1_XO9_o<bits<9> xo, string opc, list<dag> pattern> - : VX_RD5_RSp5_PS1_XO9<xo, - (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, u1imm:$PS), - !strconcat(opc, " $vD, $vA, $vB, $PS"), IIC_VecFP, pattern> { - let Defs = [CR6]; -} - -// [PO VRT VRA VRB 1 / XO] -class VX_VT5_VA5_VB5_XO9_o<bits<9> xo, string opc, list<dag> pattern> - : VX_RD5_RSp5_PS1_XO9<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - !strconcat(opc, " $vD, $vA, $vB"), IIC_VecFP, pattern> { - let Defs = [CR6]; - let PS = 0; -} - // Decimal Shift/Unsigned-Shift/Shift-and-Round def BCDS_rec : VX_VT5_VA5_VB5_PS1_XO9_o<193, "bcds." , []>; def BCDUS_rec : VX_VT5_VA5_VB5_XO9_o <129, "bcdus.", []>; diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index 649a150866b4..a0fd2111de11 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -2138,9 +2138,8 @@ bool PPCInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, } static bool MBBDefinesCTR(MachineBasicBlock &MBB) { - for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end(); - I != IE; ++I) - if (I->definesRegister(PPC::CTR) || I->definesRegister(PPC::CTR8)) + for (MachineInstr &MI : MBB) + if (MI.definesRegister(PPC::CTR) || MI.definesRegister(PPC::CTR8)) return true; return false; } @@ -2331,8 +2330,7 @@ bool PPCInstrInfo::ClobbersPredicate(MachineInstr &MI, &PPC::CTRRCRegClass, &PPC::CTRRC8RegClass }; bool Found = false; - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI.getOperand(i); + for (const MachineOperand &MO : MI.operands()) { for (unsigned c = 0; c < array_lengthof(RCs) && !Found; ++c) { const TargetRegisterClass *RC = RCs[c]; if (MO.isReg()) { diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td index d2d5ca92ca1c..d92a10c5b208 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -2471,6 +2471,7 @@ def DblwdCmp { // [HasVSX, HasP8Vector, IsLittleEndian] // [HasVSX, HasP8Vector, NoP9Vector, IsBigEndian, IsPPC64] // [HasVSX, HasP8Vector, NoP9Vector, IsLittleEndian] +// [HasVSX, HasP8Altivec] // [HasVSX, HasDirectMove] // [HasVSX, HasDirectMove, IsBigEndian] // [HasVSX, HasDirectMove, IsLittleEndian] @@ -2500,6 +2501,10 @@ let Predicates = [HasVSX, IsBigEndian, HasP8Altivec] in def : Pat<(v16i8 (int_ppc_altivec_crypto_vpermxor v16i8:$a, v16i8:$b, v16i8:$c)), (v16i8 (VPERMXOR $a, $b, $c))>; +let Predicates = [HasVSX, HasP8Altivec] in + def : Pat<(v16i8 (int_ppc_altivec_crypto_vpermxor_be v16i8:$a, + v16i8:$b, v16i8:$c)), + (v16i8 (VPERMXOR $a, $b, $c))>; let AddedComplexity = 400 in { // Valid for any VSX subtarget, regardless of endianness. diff --git a/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp b/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp index 5cc180d770b2..22c5b6c11289 100644 --- a/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp +++ b/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp @@ -152,9 +152,9 @@ void llvm::LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, AsmPrinter &AP) { OutMI.setOpcode(MI->getOpcode()); - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + for (const MachineOperand &MO : MI->operands()) { MCOperand MCOp; - if (LowerPPCMachineOperandToMCOperand(MI->getOperand(i), MCOp, AP)) + if (LowerPPCMachineOperandToMCOperand(MO, MCOp, AP)) OutMI.addOperand(MCOp); } } diff --git a/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp b/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp index bdff5109c1e1..9d5206f8fd43 100644 --- a/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp +++ b/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp @@ -149,6 +149,79 @@ static bool checkOpConstraints(FusionFeature::FusionKind Kd, case FusionFeature::FK_SldiAdd: return (matchingImmOps(FirstMI, 2, 3) && matchingImmOps(FirstMI, 3, 60)) || (matchingImmOps(FirstMI, 2, 6) && matchingImmOps(FirstMI, 3, 57)); + + // rldicl rx, ra, 1, 0 - xor + case FusionFeature::FK_RotateLeftXor: + return matchingImmOps(FirstMI, 2, 1) && matchingImmOps(FirstMI, 3, 0); + + // rldicr rx, ra, 1, 63 - xor + case FusionFeature::FK_RotateRightXor: + return matchingImmOps(FirstMI, 2, 1) && matchingImmOps(FirstMI, 3, 63); + + // We actually use CMPW* and CMPD*, 'l' doesn't exist as an operand in instr. + + // { lbz,lbzx,lhz,lhzx,lwz,lwzx } - cmpi 0,1,rx,{ 0,1,-1 } + // { lbz,lbzx,lhz,lhzx,lwz,lwzx } - cmpli 0,L,rx,{ 0,1 } + case FusionFeature::FK_LoadCmp1: + // { ld,ldx } - cmpi 0,1,rx,{ 0,1,-1 } + // { ld,ldx } - cmpli 0,1,rx,{ 0,1 } + case FusionFeature::FK_LoadCmp2: { + const MachineOperand &BT = SecondMI.getOperand(0); + if (!BT.isReg() || + (!Register::isVirtualRegister(BT.getReg()) && BT.getReg() != PPC::CR0)) + return false; + if (SecondMI.getOpcode() == PPC::CMPDI && + matchingImmOps(SecondMI, 2, -1, 16)) + return true; + return matchingImmOps(SecondMI, 2, 0) || matchingImmOps(SecondMI, 2, 1); + } + + // { lha,lhax,lwa,lwax } - cmpi 0,L,rx,{ 0,1,-1 } + case FusionFeature::FK_LoadCmp3: { + const MachineOperand &BT = SecondMI.getOperand(0); + if (!BT.isReg() || + (!Register::isVirtualRegister(BT.getReg()) && BT.getReg() != PPC::CR0)) + return false; + return matchingImmOps(SecondMI, 2, 0) || matchingImmOps(SecondMI, 2, 1) || + matchingImmOps(SecondMI, 2, -1, 16); + } + + // mtctr - { bcctr,bcctrl } + case FusionFeature::FK_ZeroMoveCTR: + // ( mtctr rx ) is alias of ( mtspr 9, rx ) + return (FirstMI.getOpcode() != PPC::MTSPR && + FirstMI.getOpcode() != PPC::MTSPR8) || + matchingImmOps(FirstMI, 0, 9); + + // mtlr - { bclr,bclrl } + case FusionFeature::FK_ZeroMoveLR: + // ( mtlr rx ) is alias of ( mtspr 8, rx ) + return (FirstMI.getOpcode() != PPC::MTSPR && + FirstMI.getOpcode() != PPC::MTSPR8) || + matchingImmOps(FirstMI, 0, 8); + + // addis rx,ra,si - addi rt,rx,SI, SI >= 0 + case FusionFeature::FK_AddisAddi: { + const MachineOperand &RA = FirstMI.getOperand(1); + const MachineOperand &SI = SecondMI.getOperand(2); + if (!SI.isImm() || !RA.isReg()) + return false; + if (RA.getReg() == PPC::ZERO || RA.getReg() == PPC::ZERO8) + return false; + return SignExtend64(SI.getImm(), 16) >= 0; + } + + // addi rx,ra,si - addis rt,rx,SI, ra > 0, SI >= 2 + case FusionFeature::FK_AddiAddis: { + const MachineOperand &RA = FirstMI.getOperand(1); + const MachineOperand &SI = FirstMI.getOperand(2); + if (!SI.isImm() || !RA.isReg()) + return false; + if (RA.getReg() == PPC::ZERO || RA.getReg() == PPC::ZERO8) + return false; + int64_t ExtendedSI = SignExtend64(SI.getImm(), 16); + return ExtendedSI >= 2; + } } llvm_unreachable("All the cases should have been handled"); diff --git a/llvm/lib/Target/PowerPC/PPCMacroFusion.def b/llvm/lib/Target/PowerPC/PPCMacroFusion.def index 469a24800423..e4954b722fd0 100644 --- a/llvm/lib/Target/PowerPC/PPCMacroFusion.def +++ b/llvm/lib/Target/PowerPC/PPCMacroFusion.def @@ -78,5 +78,80 @@ FUSION_FEATURE(VecLogical, hasLogicalFusion, -1, FUSION_FEATURE(SldiAdd, hasArithAddFusion, -1, FUSION_OP_SET(RLDICR, RLDICR_32), FUSION_OP_SET(ADD4, ADD8, SUBF, SUBF8)) +// rldicl rx, ra, 1, 0 - xor +FUSION_FEATURE(RotateLeftXor, hasSha3Fusion, 1, + FUSION_OP_SET(RLDICL, RLDICL_32, RLDICL_32_64), + FUSION_OP_SET(XOR, XOR8)) + +// rldicr rx, ra, 1, 63 - xor +FUSION_FEATURE(RotateRightXor, hasSha3Fusion, 1, + FUSION_OP_SET(RLDICR, RLDICR_32), FUSION_OP_SET(XOR, XOR8)) + +// There're two special cases in 'load-compare' series, so we have to split +// them into several pattern groups to fit into current framework. This can +// be clearer once we switched to a more expressive approach. + +// { lbz,lbzx,lhz,lhzx,lwz,lwzx } - cmpi 0,1,rx,{ 0,1,-1 } +// { lbz,lbzx,lhz,lhzx,lwz,lwzx } - cmpli 0,L,rx,{ 0,1 } +FUSION_FEATURE(LoadCmp1, hasCompareFusion, 1, + FUSION_OP_SET(LBZ, LBZ8, LBZX, LBZX8, LBZXTLS, LBZXTLS_, + LBZXTLS_32, LHZ, LHZ8, LHZX, LHZX8, LHZXTLS, + LHZXTLS_, LHZXTLS_32, LWZ, LWZ8, LWZX, LWZX8, + LWZXTLS, LWZXTLS_, LWZXTLS_32), + FUSION_OP_SET(CMPDI, CMPLDI, CMPLWI)) + +// { ld,ldx } - cmpi 0,1,rx,{ 0,1,-1 } +// { ld,ldx } - cmpli 0,1,rx,{ 0,1 } +FUSION_FEATURE(LoadCmp2, hasCompareFusion, 1, + FUSION_OP_SET(LD, LDX, LDXTLS, LDXTLS_), + FUSION_OP_SET(CMPDI, CMPLDI)) + +// { lha,lhax,lwa,lwax } - cmpi 0,L,rx,{ 0,1,-1 } +FUSION_FEATURE(LoadCmp3, hasCompareFusion, 1, + FUSION_OP_SET(LHA, LHA8, LHAX, LHAX8, LWA, LWA_32, LWAX, + LWAX_32), + FUSION_OP_SET(CMPLDI, CMPLWI)) + +// ori - oris +FUSION_FEATURE(OriOris, hasWideImmFusion, 1, FUSION_OP_SET(ORI, ORI8), + FUSION_OP_SET(ORIS, ORIS8)) + +// lis - ori +FUSION_FEATURE(LisOri, hasWideImmFusion, 1, FUSION_OP_SET(LIS, LIS8), + FUSION_OP_SET(ORI, ORI8)) + +// oris - ori +FUSION_FEATURE(OrisOri, hasWideImmFusion, 1, FUSION_OP_SET(ORIS, ORIS8), + FUSION_OP_SET(ORI, ORI8)) + +// xori - xoris +FUSION_FEATURE(XoriXoris, hasWideImmFusion, 1, FUSION_OP_SET(XORI, XORI8), + FUSION_OP_SET(XORIS, XORIS8)) + +// xoris - xori +FUSION_FEATURE(XorisXori, hasWideImmFusion, 1, FUSION_OP_SET(XORIS, XORIS8), + FUSION_OP_SET(XORI, XORI8)) + +// addis rx,ra,si - addi rt,rx,SI, SI >= 0 +FUSION_FEATURE(AddisAddi, hasWideImmFusion, 1, + FUSION_OP_SET(ADDIS, ADDIS8, ADDIStocHA8), + FUSION_OP_SET(ADDI, ADDI8, ADDItocL)) + +// addi rx,ra,si - addis rt,rx,SI, ra > 0, SI >= 2 +FUSION_FEATURE(AddiAddis, hasWideImmFusion, 1, + FUSION_OP_SET(ADDI, ADDI8, ADDItocL), + FUSION_OP_SET(ADDIS, ADDIS8, ADDIStocHA8)) + +// mtctr - { bcctr,bcctrl } +FUSION_FEATURE(ZeroMoveCTR, hasZeroMoveFusion, -1, + FUSION_OP_SET(MTCTR, MTCTRloop, MTSPR8, MTSPR), + FUSION_OP_SET(BCCTR, BCCTRn, BCCTR8, BCCTR8n, BCCTRL, BCCTRLn, + BCCTRL8, BCCTRL8n, gBCCTR, gBCCTRL)) + +// mtlr - { bclr,bclrl } +FUSION_FEATURE(ZeroMoveLR, hasZeroMoveFusion, -1, + FUSION_OP_SET(MTLR8, MTLR, MTSPR8, MTSPR), + FUSION_OP_SET(BCLR, BCLRn, gBCLR, BCLRL, BCLRLn, gBCLRL)) + #undef FUSION_FEATURE #undef FUSION_OP_SET diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp index dfc29dbb10f1..1258a1281597 100644 --- a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp @@ -131,6 +131,10 @@ void PPCSubtarget::initializeEnvironment() { HasAddLogicalFusion = false; HasLogicalAddFusion = false; HasLogicalFusion = false; + HasSha3Fusion = false; + HasCompareFusion = false; + HasWideImmFusion = false; + HasZeroMoveFusion = false; IsISA2_06 = false; IsISA2_07 = false; IsISA3_0 = false; diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.h b/llvm/lib/Target/PowerPC/PPCSubtarget.h index 783ea121ccb8..d52833cb1465 100644 --- a/llvm/lib/Target/PowerPC/PPCSubtarget.h +++ b/llvm/lib/Target/PowerPC/PPCSubtarget.h @@ -151,6 +151,10 @@ protected: bool HasAddLogicalFusion; bool HasLogicalAddFusion; bool HasLogicalFusion; + bool HasSha3Fusion; + bool HasCompareFusion; + bool HasWideImmFusion; + bool HasZeroMoveFusion; bool IsISA2_06; bool IsISA2_07; bool IsISA3_0; @@ -340,6 +344,10 @@ public: bool hasAddLogicalFusion() const { return HasAddLogicalFusion; } bool hasLogicalAddFusion() const { return HasLogicalAddFusion; } bool hasLogicalFusion() const { return HasLogicalFusion; } + bool hasCompareFusion() const { return HasCompareFusion; } + bool hasWideImmFusion() const { return HasWideImmFusion; } + bool hasSha3Fusion() const { return HasSha3Fusion; } + bool hasZeroMoveFusion() const { return HasZeroMoveFusion; } bool needsSwapsForVSXMemOps() const { return hasVSX() && isLittleEndian() && !hasP9Vector(); } diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index 77d5a2668b60..5d6f58a77a39 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -318,9 +318,20 @@ InstructionCost PPCTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, return PPCTTIImpl::getIntImmCost(Imm, Ty, CostKind); } +// Check if the current Type is an MMA vector type. Valid MMA types are +// v256i1 and v512i1 respectively. +static bool isMMAType(Type *Ty) { + return Ty->isVectorTy() && (Ty->getScalarSizeInBits() == 1) && + (Ty->getPrimitiveSizeInBits() > 128); +} + InstructionCost PPCTTIImpl::getUserCost(const User *U, ArrayRef<const Value *> Operands, TTI::TargetCostKind CostKind) { + // Set the max cost if an MMA type is present (v256i1, v512i1). + if (isMMAType(U->getType())) + return InstructionCost::getMax(); + // We already implement getCastInstrCost and getMemoryOpCost where we perform // the vector adjustment there. if (isa<CastInst>(U) || isa<LoadInst>(U) || isa<StoreInst>(U)) @@ -942,32 +953,39 @@ unsigned PPCTTIImpl::getMaxInterleaveFactor(unsigned VF) { return 2; } -// Adjust the cost of vector instructions on targets which there is overlap -// between the vector and scalar units, thereby reducing the overall throughput -// of vector code wrt. scalar code. -InstructionCost PPCTTIImpl::vectorCostAdjustment(InstructionCost Cost, - unsigned Opcode, Type *Ty1, - Type *Ty2) { +// Returns a cost adjustment factor to adjust the cost of vector instructions +// on targets which there is overlap between the vector and scalar units, +// thereby reducing the overall throughput of vector code wrt. scalar code. +// An invalid instruction cost is returned if the type is an MMA vector type. +InstructionCost PPCTTIImpl::vectorCostAdjustmentFactor(unsigned Opcode, + Type *Ty1, Type *Ty2) { + // If the vector type is of an MMA type (v256i1, v512i1), an invalid + // instruction cost is returned. This is to signify to other cost computing + // functions to return the maximum instruction cost in order to prevent any + // opportunities for the optimizer to produce MMA types within the IR. + if (isMMAType(Ty1)) + return InstructionCost::getInvalid(); + if (!ST->vectorsUseTwoUnits() || !Ty1->isVectorTy()) - return Cost; + return InstructionCost(1); std::pair<InstructionCost, MVT> LT1 = TLI->getTypeLegalizationCost(DL, Ty1); // If type legalization involves splitting the vector, we don't want to // double the cost at every step - only the last step. if (LT1.first != 1 || !LT1.second.isVector()) - return Cost; + return InstructionCost(1); int ISD = TLI->InstructionOpcodeToISD(Opcode); if (TLI->isOperationExpand(ISD, LT1.second)) - return Cost; + return InstructionCost(1); if (Ty2) { std::pair<InstructionCost, MVT> LT2 = TLI->getTypeLegalizationCost(DL, Ty2); if (LT2.first != 1 || !LT2.second.isVector()) - return Cost; + return InstructionCost(1); } - return Cost * 2; + return InstructionCost(2); } InstructionCost PPCTTIImpl::getArithmeticInstrCost( @@ -977,6 +995,11 @@ InstructionCost PPCTTIImpl::getArithmeticInstrCost( TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args, const Instruction *CxtI) { assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode"); + + InstructionCost CostFactor = vectorCostAdjustmentFactor(Opcode, Ty, nullptr); + if (!CostFactor.isValid()) + return InstructionCost::getMax(); + // TODO: Handle more cost kinds. if (CostKind != TTI::TCK_RecipThroughput) return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, @@ -986,12 +1009,18 @@ InstructionCost PPCTTIImpl::getArithmeticInstrCost( // Fallback to the default implementation. InstructionCost Cost = BaseT::getArithmeticInstrCost( Opcode, Ty, CostKind, Op1Info, Op2Info, Opd1PropInfo, Opd2PropInfo); - return vectorCostAdjustment(Cost, Opcode, Ty, nullptr); + return Cost * CostFactor; } InstructionCost PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, ArrayRef<int> Mask, int Index, Type *SubTp) { + + InstructionCost CostFactor = + vectorCostAdjustmentFactor(Instruction::ShuffleVector, Tp, nullptr); + if (!CostFactor.isValid()) + return InstructionCost::getMax(); + // Legalize the type. std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp); @@ -1000,8 +1029,7 @@ InstructionCost PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, // instruction). We need one such shuffle instruction for each actual // register (this is not true for arbitrary shuffles, but is true for the // structured types of shuffles covered by TTI::ShuffleKind). - return vectorCostAdjustment(LT.first, Instruction::ShuffleVector, Tp, - nullptr); + return LT.first * CostFactor; } InstructionCost PPCTTIImpl::getCFInstrCost(unsigned Opcode, @@ -1020,9 +1048,13 @@ InstructionCost PPCTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, const Instruction *I) { assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode"); + InstructionCost CostFactor = vectorCostAdjustmentFactor(Opcode, Dst, Src); + if (!CostFactor.isValid()) + return InstructionCost::getMax(); + InstructionCost Cost = BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I); - Cost = vectorCostAdjustment(Cost, Opcode, Dst, Src); + Cost *= CostFactor; // TODO: Allow non-throughput costs that aren't binary. if (CostKind != TTI::TCK_RecipThroughput) return Cost == 0 ? 0 : 1; @@ -1034,12 +1066,17 @@ InstructionCost PPCTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I) { + InstructionCost CostFactor = + vectorCostAdjustmentFactor(Opcode, ValTy, nullptr); + if (!CostFactor.isValid()) + return InstructionCost::getMax(); + InstructionCost Cost = BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I); // TODO: Handle other cost kinds. if (CostKind != TTI::TCK_RecipThroughput) return Cost; - return vectorCostAdjustment(Cost, Opcode, ValTy, nullptr); + return Cost * CostFactor; } InstructionCost PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, @@ -1049,8 +1086,12 @@ InstructionCost PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); + InstructionCost CostFactor = vectorCostAdjustmentFactor(Opcode, Val, nullptr); + if (!CostFactor.isValid()) + return InstructionCost::getMax(); + InstructionCost Cost = BaseT::getVectorInstrCost(Opcode, Val, Index); - Cost = vectorCostAdjustment(Cost, Opcode, Val, nullptr); + Cost *= CostFactor; if (ST->hasVSX() && Val->getScalarType()->isDoubleTy()) { // Double-precision scalars are already located in index #0 (or #1 if LE). @@ -1065,7 +1106,7 @@ InstructionCost PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, if (ISD == ISD::INSERT_VECTOR_ELT) // A move-to VSR and a permute/insert. Assume vector operation cost // for both (cost will be 2x on P9). - return vectorCostAdjustment(2, Opcode, Val, nullptr); + return 2 * CostFactor; // It's an extract. Maybe we can do a cheap move-from VSR. unsigned EltSize = Val->getScalarSizeInBits(); @@ -1082,7 +1123,7 @@ InstructionCost PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, // We need a vector extract (or mfvsrld). Assume vector operation cost. // The cost of the load constant for a vector extract is disregarded // (invariant, easily schedulable). - return vectorCostAdjustment(1, Opcode, Val, nullptr); + return CostFactor; } else if (ST->hasDirectMove()) // Assume permute has standard cost. @@ -1114,6 +1155,11 @@ InstructionCost PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned AddressSpace, TTI::TargetCostKind CostKind, const Instruction *I) { + + InstructionCost CostFactor = vectorCostAdjustmentFactor(Opcode, Src, nullptr); + if (!CostFactor.isValid()) + return InstructionCost::getMax(); + if (TLI->getValueType(DL, Src, true) == MVT::Other) return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind); @@ -1128,7 +1174,7 @@ InstructionCost PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, if (CostKind != TTI::TCK_RecipThroughput) return Cost; - Cost = vectorCostAdjustment(Cost, Opcode, Src, nullptr); + Cost *= CostFactor; bool IsAltivecType = ST->hasAltivec() && (LT.second == MVT::v16i8 || LT.second == MVT::v8i16 || @@ -1194,6 +1240,11 @@ InstructionCost PPCTTIImpl::getInterleavedMemoryOpCost( unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond, bool UseMaskForGaps) { + InstructionCost CostFactor = + vectorCostAdjustmentFactor(Opcode, VecTy, nullptr); + if (!CostFactor.isValid()) + return InstructionCost::getMax(); + if (UseMaskForCond || UseMaskForGaps) return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, Alignment, AddressSpace, CostKind, diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h index aa84013803af..7aeb0c59d503 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -100,8 +100,8 @@ public: unsigned getCacheLineSize() const override; unsigned getPrefetchDistance() const override; unsigned getMaxInterleaveFactor(unsigned VF); - InstructionCost vectorCostAdjustment(InstructionCost Cost, unsigned Opcode, - Type *Ty1, Type *Ty2); + InstructionCost vectorCostAdjustmentFactor(unsigned Opcode, Type *Ty1, + Type *Ty2); InstructionCost getArithmeticInstrCost( unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp index d1979b5456ce..f1c3810f4ee5 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp @@ -170,6 +170,14 @@ void RISCVInstPrinter::printAtomicMemOp(const MCInst *MI, unsigned OpNo, void RISCVInstPrinter::printVTypeI(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O) { unsigned Imm = MI->getOperand(OpNo).getImm(); + // Print the raw immediate for reserved values: vlmul[2:0]=4, vsew[2:0]=0b1xx, + // or non-zero bits 8/9/10. + if (RISCVVType::getVLMUL(Imm) == RISCVII::VLMUL::LMUL_RESERVED || + RISCVVType::getSEW(Imm) > 64 || (Imm & 0x700) != 0) { + O << Imm; + return; + } + // Print the text form. RISCVVType::printVType(Imm, O); } diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp index 595c3cdfbb1d..f5d491938050 100644 --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -237,7 +237,13 @@ bool RISCVFrameLowering::hasBP(const MachineFunction &MF) const { const MachineFrameInfo &MFI = MF.getFrameInfo(); const TargetRegisterInfo *TRI = STI.getRegisterInfo(); - return MFI.hasVarSizedObjects() && TRI->hasStackRealignment(MF); + // If we do not reserve stack space for outgoing arguments in prologue, + // we will adjust the stack pointer before call instruction. After the + // adjustment, we can not use SP to access the stack objects for the + // arguments. Instead, use BP to access these stack objects. + return (MFI.hasVarSizedObjects() || + (!hasReservedCallFrame(MF) && MFI.getMaxCallFrameSize() != 0)) && + TRI->hasStackRealignment(MF); } // Determines the size of the frame and maximum call frame size. @@ -1065,10 +1071,14 @@ bool RISCVFrameLowering::restoreCalleeSavedRegisters( if (MI != MBB.end() && !MI->isDebugInstr()) DL = MI->getDebugLoc(); - // Manually restore values not restored by libcall. Insert in reverse order. + // Manually restore values not restored by libcall. + // Keep the same order as in the prologue. There is no need to reverse the + // order in the epilogue. In addition, the return address will be restored + // first in the epilogue. It increases the opportunity to avoid the + // load-to-use data hazard between loading RA and return by RA. // loadRegFromStackSlot can insert multiple instructions. const auto &NonLibcallCSI = getNonLibcallCSI(*MF, CSI); - for (auto &CS : reverse(NonLibcallCSI)) { + for (auto &CS : NonLibcallCSI) { Register Reg = CS.getReg(); const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); TII.loadRegFromStackSlot(MBB, MI, Reg, CS.getFrameIdx(), RC, TRI); diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 0f1a6e5f9154..f3331571fc55 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -335,17 +335,29 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SELECT_CC, MVT::f16, Expand); setOperationAction(ISD::SELECT, MVT::f16, Custom); setOperationAction(ISD::BR_CC, MVT::f16, Expand); - for (auto Op : FPOpToExpand) - setOperationAction(Op, MVT::f16, Expand); setOperationAction(ISD::FREM, MVT::f16, Promote); - setOperationAction(ISD::FCEIL, MVT::f16, Promote); - setOperationAction(ISD::FFLOOR, MVT::f16, Promote); - setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote); - setOperationAction(ISD::FRINT, MVT::f16, Promote); - setOperationAction(ISD::FROUND, MVT::f16, Promote); - setOperationAction(ISD::FROUNDEVEN, MVT::f16, Promote); - setOperationAction(ISD::FTRUNC, MVT::f16, Promote); + setOperationAction(ISD::FCEIL, MVT::f16, Promote); + setOperationAction(ISD::FFLOOR, MVT::f16, Promote); + setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote); + setOperationAction(ISD::FRINT, MVT::f16, Promote); + setOperationAction(ISD::FROUND, MVT::f16, Promote); + setOperationAction(ISD::FROUNDEVEN, MVT::f16, Promote); + setOperationAction(ISD::FTRUNC, MVT::f16, Promote); + setOperationAction(ISD::FPOW, MVT::f16, Promote); + setOperationAction(ISD::FPOWI, MVT::f16, Promote); + setOperationAction(ISD::FCOS, MVT::f16, Promote); + setOperationAction(ISD::FSIN, MVT::f16, Promote); + setOperationAction(ISD::FSINCOS, MVT::f16, Promote); + setOperationAction(ISD::FEXP, MVT::f16, Promote); + setOperationAction(ISD::FEXP2, MVT::f16, Promote); + setOperationAction(ISD::FLOG, MVT::f16, Promote); + setOperationAction(ISD::FLOG2, MVT::f16, Promote); + setOperationAction(ISD::FLOG10, MVT::f16, Promote); + + // We need to custom promote this. + if (Subtarget.is64Bit()) + setOperationAction(ISD::FPOWI, MVT::i32, Custom); } if (Subtarget.hasStdExtF()) { @@ -676,6 +688,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FMINNUM, VT, Legal); setOperationAction(ISD::FMAXNUM, VT, Legal); + setOperationAction(ISD::FTRUNC, VT, Custom); + setOperationAction(ISD::FCEIL, VT, Custom); + setOperationAction(ISD::FFLOOR, VT, Custom); + setOperationAction(ISD::VECREDUCE_FADD, VT, Custom); setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom); setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom); @@ -924,6 +940,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FP_ROUND, VT, Custom); setOperationAction(ISD::FP_EXTEND, VT, Custom); + setOperationAction(ISD::FTRUNC, VT, Custom); + setOperationAction(ISD::FCEIL, VT, Custom); + setOperationAction(ISD::FFLOOR, VT, Custom); + for (auto CC : VFPCCToExpand) setCondCodeAction(CC, VT, Expand); @@ -1165,6 +1185,10 @@ bool RISCVTargetLowering::shouldSinkOperands( case Instruction::Shl: case Instruction::LShr: case Instruction::AShr: + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::URem: + case Instruction::SRem: return Operand == 1; case Instruction::Call: if (auto *II = dyn_cast<IntrinsicInst>(I)) { @@ -1631,6 +1655,66 @@ static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) { return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO); } +// Expand vector FTRUNC, FCEIL, and FFLOOR by converting to the integer domain +// and back. Taking care to avoid converting values that are nan or already +// correct. +// TODO: Floor and ceil could be shorter by changing rounding mode, but we don't +// have FRM dependencies modeled yet. +static SDValue lowerFTRUNC_FCEIL_FFLOOR(SDValue Op, SelectionDAG &DAG) { + MVT VT = Op.getSimpleValueType(); + assert(VT.isVector() && "Unexpected type"); + + SDLoc DL(Op); + + // Freeze the source since we are increasing the number of uses. + SDValue Src = DAG.getNode(ISD::FREEZE, DL, VT, Op.getOperand(0)); + + // Truncate to integer and convert back to FP. + MVT IntVT = VT.changeVectorElementTypeToInteger(); + SDValue Truncated = DAG.getNode(ISD::FP_TO_SINT, DL, IntVT, Src); + Truncated = DAG.getNode(ISD::SINT_TO_FP, DL, VT, Truncated); + + MVT SetccVT = MVT::getVectorVT(MVT::i1, VT.getVectorElementCount()); + + if (Op.getOpcode() == ISD::FCEIL) { + // If the truncated value is the greater than or equal to the original + // value, we've computed the ceil. Otherwise, we went the wrong way and + // need to increase by 1. + // FIXME: This should use a masked operation. Handle here or in isel? + SDValue Adjust = DAG.getNode(ISD::FADD, DL, VT, Truncated, + DAG.getConstantFP(1.0, DL, VT)); + SDValue NeedAdjust = DAG.getSetCC(DL, SetccVT, Truncated, Src, ISD::SETOLT); + Truncated = DAG.getSelect(DL, VT, NeedAdjust, Adjust, Truncated); + } else if (Op.getOpcode() == ISD::FFLOOR) { + // If the truncated value is the less than or equal to the original value, + // we've computed the floor. Otherwise, we went the wrong way and need to + // decrease by 1. + // FIXME: This should use a masked operation. Handle here or in isel? + SDValue Adjust = DAG.getNode(ISD::FSUB, DL, VT, Truncated, + DAG.getConstantFP(1.0, DL, VT)); + SDValue NeedAdjust = DAG.getSetCC(DL, SetccVT, Truncated, Src, ISD::SETOGT); + Truncated = DAG.getSelect(DL, VT, NeedAdjust, Adjust, Truncated); + } + + // Restore the original sign so that -0.0 is preserved. + Truncated = DAG.getNode(ISD::FCOPYSIGN, DL, VT, Truncated, Src); + + // Determine the largest integer that can be represented exactly. This and + // values larger than it don't have any fractional bits so don't need to + // be converted. + const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT); + unsigned Precision = APFloat::semanticsPrecision(FltSem); + APFloat MaxVal = APFloat(FltSem); + MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1), + /*IsSigned*/ false, APFloat::rmNearestTiesToEven); + SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT); + + // If abs(Src) was larger than MaxVal or nan, keep it. + SDValue Abs = DAG.getNode(ISD::FABS, DL, VT, Src); + SDValue Setcc = DAG.getSetCC(DL, SetccVT, Abs, MaxValNode, ISD::SETOLT); + return DAG.getSelect(DL, VT, Setcc, Truncated, Src); +} + static SDValue lowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { MVT VT = Op.getSimpleValueType(); @@ -2670,6 +2754,20 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, DAG.getConstant(3, DL, VT)); return DAG.getNode(ISD::MUL, DL, VT, VScale, Op.getOperand(0)); } + case ISD::FPOWI: { + // Custom promote f16 powi with illegal i32 integer type on RV64. Once + // promoted this will be legalized into a libcall by LegalizeIntegerTypes. + if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() && + Op.getOperand(1).getValueType() == MVT::i32) { + SDLoc DL(Op); + SDValue Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0)); + SDValue Powi = + DAG.getNode(ISD::FPOWI, DL, MVT::f32, Op0, Op.getOperand(1)); + return DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, Powi, + DAG.getIntPtrConstant(0, DL)); + } + return SDValue(); + } case ISD::FP_EXTEND: { // RVV can only do fp_extend to types double the size as the source. We // custom-lower f16->f64 extensions to two hops of ISD::FP_EXTEND, going @@ -2858,6 +2956,10 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, case ISD::FP_TO_SINT_SAT: case ISD::FP_TO_UINT_SAT: return lowerFP_TO_INT_SAT(Op, DAG); + case ISD::FTRUNC: + case ISD::FCEIL: + case ISD::FFLOOR: + return lowerFTRUNC_FCEIL_FFLOOR(Op, DAG); case ISD::VECREDUCE_ADD: case ISD::VECREDUCE_UMAX: case ISD::VECREDUCE_SMAX: @@ -9834,6 +9936,23 @@ bool RISCVTargetLowering::shouldRemoveExtendFromGSIndex(EVT VT) const { return false; } +bool RISCVTargetLowering::shouldConvertFpToSat(unsigned Op, EVT FPVT, + EVT VT) const { + if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple()) + return false; + + switch (FPVT.getSimpleVT().SimpleTy) { + case MVT::f16: + return Subtarget.hasStdExtZfh(); + case MVT::f32: + return Subtarget.hasStdExtF(); + case MVT::f64: + return Subtarget.hasStdExtD(); + default: + return false; + } +} + bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const { VT = VT.getScalarType(); diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index 8e3d716ae919..849928eb46ae 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -514,6 +514,8 @@ public: bool isLegalElementTypeForRVV(Type *ScalarTy) const; + bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override; + private: /// RISCVCCAssignFn - This target-specific function extends the default /// CCValAssign with additional information used to lower RISC-V calling diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td index b653928ccea9..6f9cde966132 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -705,6 +705,7 @@ def PseudoLD : PseudoLoad<"ld">; def PseudoSD : PseudoStore<"sd">; } // Predicates = [IsRV64] +def : InstAlias<"li $rd, $imm", (ADDI GPR:$rd, X0, simm12:$imm)>; def : InstAlias<"mv $rd, $rs", (ADDI GPR:$rd, GPR:$rs, 0)>; def : InstAlias<"not $rd, $rs", (XORI GPR:$rd, GPR:$rs, -1)>; def : InstAlias<"neg $rd, $rs", (SUB GPR:$rd, X0, GPR:$rs)>; diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp index 388cce00bdf3..798532d5bc44 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp @@ -20,6 +20,7 @@ #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/Support/ErrorHandling.h" #define GET_REGINFO_TARGET_DESC @@ -320,3 +321,30 @@ RISCVRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC, return &RISCV::VRRegClass; return RC; } + +void RISCVRegisterInfo::getOffsetOpcodes(const StackOffset &Offset, + SmallVectorImpl<uint64_t> &Ops) const { + // VLENB is the length of a vector register in bytes. We use <vscale x 8 x i8> + // to represent one vector register. The dwarf offset is + // VLENB * scalable_offset / 8. + assert(Offset.getScalable() % 8 == 0 && "Invalid frame offset"); + + // Add fixed-sized offset using existing DIExpression interface. + DIExpression::appendOffset(Ops, Offset.getFixed()); + + unsigned VLENB = getDwarfRegNum(RISCV::VLENB, true); + int64_t VLENBSized = Offset.getScalable() / 8; + if (VLENBSized > 0) { + Ops.push_back(dwarf::DW_OP_constu); + Ops.push_back(VLENBSized); + Ops.append({dwarf::DW_OP_bregx, VLENB, 0ULL}); + Ops.push_back(dwarf::DW_OP_mul); + Ops.push_back(dwarf::DW_OP_plus); + } else if (VLENBSized < 0) { + Ops.push_back(dwarf::DW_OP_constu); + Ops.push_back(-VLENBSized); + Ops.append({dwarf::DW_OP_bregx, VLENB, 0ULL}); + Ops.push_back(dwarf::DW_OP_mul); + Ops.push_back(dwarf::DW_OP_minus); + } +} diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h index 74a5b83ff6f3..2b2bbdfbdf32 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h @@ -63,6 +63,9 @@ struct RISCVRegisterInfo : public RISCVGenRegisterInfo { const TargetRegisterClass * getLargestLegalSuperClass(const TargetRegisterClass *RC, const MachineFunction &) const override; + + void getOffsetOpcodes(const StackOffset &Offset, + SmallVectorImpl<uint64_t> &Ops) const override; }; } diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td index a915a572f3b7..a56f992d320e 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td @@ -480,6 +480,8 @@ let RegAltNameIndices = [ABIRegAltName] in { def VL : RISCVReg<0, "vl", ["vl"]>; def VXSAT : RISCVReg<0, "vxsat", ["vxsat"]>; def VXRM : RISCVReg<0, "vxrm", ["vxrm"]>; + def VLENB : RISCVReg<0, "vlenb", ["vlenb"]>, + DwarfRegNum<[!add(4096, SysRegVLENB.Encoding)]>; } foreach m = [1, 2, 4] in { diff --git a/llvm/lib/Target/RISCV/RISCVSystemOperands.td b/llvm/lib/Target/RISCV/RISCVSystemOperands.td index 41599dd8bb3f..5a4c579dd708 100644 --- a/llvm/lib/Target/RISCV/RISCVSystemOperands.td +++ b/llvm/lib/Target/RISCV/RISCVSystemOperands.td @@ -388,4 +388,4 @@ def : SysReg<"vxrm", 0x00A>; def : SysReg<"vcsr", 0x00F>; def : SysReg<"vl", 0xC20>; def : SysReg<"vtype", 0xC21>; -def : SysReg<"vlenb", 0xC22>; +def SysRegVLENB: SysReg<"vlenb", 0xC22>; diff --git a/llvm/lib/Target/Sparc/DelaySlotFiller.cpp b/llvm/lib/Target/Sparc/DelaySlotFiller.cpp index 7319924a24ba..259b37954183 100644 --- a/llvm/lib/Target/Sparc/DelaySlotFiller.cpp +++ b/llvm/lib/Target/Sparc/DelaySlotFiller.cpp @@ -53,9 +53,8 @@ namespace { // instructions to fill delay slot. F.getRegInfo().invalidateLiveness(); - for (MachineFunction::iterator FI = F.begin(), FE = F.end(); - FI != FE; ++FI) - Changed |= runOnMachineBasicBlock(*FI); + for (MachineBasicBlock &MBB : F) + Changed |= runOnMachineBasicBlock(MBB); return Changed; } @@ -319,8 +318,7 @@ void Filler::insertDefsUses(MachineBasicBlock::iterator MI, SmallSet<unsigned, 32>& RegDefs, SmallSet<unsigned, 32>& RegUses) { - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); + for (const MachineOperand &MO : MI->operands()) { if (!MO.isReg()) continue; diff --git a/llvm/lib/Target/Sparc/LeonPasses.cpp b/llvm/lib/Target/Sparc/LeonPasses.cpp index fa05a41f3127..bd26710fcbab 100644 --- a/llvm/lib/Target/Sparc/LeonPasses.cpp +++ b/llvm/lib/Target/Sparc/LeonPasses.cpp @@ -42,8 +42,7 @@ bool InsertNOPLoad::runOnMachineFunction(MachineFunction &MF) { DebugLoc DL = DebugLoc(); bool Modified = false; - for (auto MFI = MF.begin(), E = MF.end(); MFI != E; ++MFI) { - MachineBasicBlock &MBB = *MFI; + for (MachineBasicBlock &MBB : MF) { for (auto MBBI = MBB.begin(), E = MBB.end(); MBBI != E; ++MBBI) { MachineInstr &MI = *MBBI; unsigned Opcode = MI.getOpcode(); @@ -77,10 +76,8 @@ bool DetectRoundChange::runOnMachineFunction(MachineFunction &MF) { Subtarget = &MF.getSubtarget<SparcSubtarget>(); bool Modified = false; - for (auto MFI = MF.begin(), E = MF.end(); MFI != E; ++MFI) { - MachineBasicBlock &MBB = *MFI; - for (auto MBBI = MBB.begin(), E = MBB.end(); MBBI != E; ++MBBI) { - MachineInstr &MI = *MBBI; + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { unsigned Opcode = MI.getOpcode(); if (Opcode == SP::CALL && MI.getNumOperands() > 0) { MachineOperand &MO = MI.getOperand(0); @@ -129,8 +126,7 @@ bool FixAllFDIVSQRT::runOnMachineFunction(MachineFunction &MF) { DebugLoc DL = DebugLoc(); bool Modified = false; - for (auto MFI = MF.begin(), E = MF.end(); MFI != E; ++MFI) { - MachineBasicBlock &MBB = *MFI; + for (MachineBasicBlock &MBB : MF) { for (auto MBBI = MBB.begin(), E = MBB.end(); MBBI != E; ++MBBI) { MachineInstr &MI = *MBBI; unsigned Opcode = MI.getOpcode(); diff --git a/llvm/lib/Target/Sparc/SparcFrameLowering.cpp b/llvm/lib/Target/Sparc/SparcFrameLowering.cpp index d165052ca512..a740de9123c9 100644 --- a/llvm/lib/Target/Sparc/SparcFrameLowering.cpp +++ b/llvm/lib/Target/Sparc/SparcFrameLowering.cpp @@ -343,19 +343,18 @@ void SparcFrameLowering::remapRegsForLeafProc(MachineFunction &MF) const { } // Rewrite MBB's Live-ins. - for (MachineFunction::iterator MBB = MF.begin(), E = MF.end(); - MBB != E; ++MBB) { + for (MachineBasicBlock &MBB : MF) { for (unsigned reg = SP::I0_I1; reg <= SP::I6_I7; ++reg) { - if (!MBB->isLiveIn(reg)) + if (!MBB.isLiveIn(reg)) continue; - MBB->removeLiveIn(reg); - MBB->addLiveIn(reg - SP::I0_I1 + SP::O0_O1); + MBB.removeLiveIn(reg); + MBB.addLiveIn(reg - SP::I0_I1 + SP::O0_O1); } for (unsigned reg = SP::I0; reg <= SP::I7; ++reg) { - if (!MBB->isLiveIn(reg)) + if (!MBB.isLiveIn(reg)) continue; - MBB->removeLiveIn(reg); - MBB->addLiveIn(reg - SP::I0 + SP::O0); + MBB.removeLiveIn(reg); + MBB.addLiveIn(reg - SP::I0 + SP::O0); } } diff --git a/llvm/lib/Target/Sparc/SparcMCInstLower.cpp b/llvm/lib/Target/Sparc/SparcMCInstLower.cpp index 8ea317fdd453..4e7e7bb5c81b 100644 --- a/llvm/lib/Target/Sparc/SparcMCInstLower.cpp +++ b/llvm/lib/Target/Sparc/SparcMCInstLower.cpp @@ -97,8 +97,7 @@ void llvm::LowerSparcMachineInstrToMCInst(const MachineInstr *MI, OutMI.setOpcode(MI->getOpcode()); - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); + for (const MachineOperand &MO : MI->operands()) { MCOperand MCOp = LowerOperand(MI, MO, AP); if (MCOp.isValid()) diff --git a/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp b/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp index ac94570e568f..631cbff303e8 100644 --- a/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp +++ b/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp @@ -144,8 +144,7 @@ Reference SystemZElimCompare::getRegReferences(MachineInstr &MI, unsigned Reg) { if (MI.isDebugInstr()) return Ref; - for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) { - const MachineOperand &MO = MI.getOperand(I); + for (const MachineOperand &MO : MI.operands()) { if (MO.isReg()) { if (Register MOReg = MO.getReg()) { if (TRI->regsOverlap(MOReg, Reg)) { diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp index d11d118fb8ee..2f7cdfcf7bde 100644 --- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp @@ -270,8 +270,8 @@ bool SystemZELFFrameLowering::spillCalleeSavedRegisters( // Make sure all call-saved GPRs are included as operands and are // marked as live on entry. - for (unsigned I = 0, E = CSI.size(); I != E; ++I) { - unsigned Reg = CSI[I].getReg(); + for (const CalleeSavedInfo &I : CSI) { + unsigned Reg = I.getReg(); if (SystemZ::GR64BitRegClass.contains(Reg)) addSavedGPR(MBB, MIB, Reg, true); } @@ -283,16 +283,16 @@ bool SystemZELFFrameLowering::spillCalleeSavedRegisters( } // Save FPRs/VRs in the normal TargetInstrInfo way. - for (unsigned I = 0, E = CSI.size(); I != E; ++I) { - unsigned Reg = CSI[I].getReg(); + for (const CalleeSavedInfo &I : CSI) { + unsigned Reg = I.getReg(); if (SystemZ::FP64BitRegClass.contains(Reg)) { MBB.addLiveIn(Reg); - TII->storeRegToStackSlot(MBB, MBBI, Reg, true, CSI[I].getFrameIdx(), + TII->storeRegToStackSlot(MBB, MBBI, Reg, true, I.getFrameIdx(), &SystemZ::FP64BitRegClass, TRI); } if (SystemZ::VR128BitRegClass.contains(Reg)) { MBB.addLiveIn(Reg); - TII->storeRegToStackSlot(MBB, MBBI, Reg, true, CSI[I].getFrameIdx(), + TII->storeRegToStackSlot(MBB, MBBI, Reg, true, I.getFrameIdx(), &SystemZ::VR128BitRegClass, TRI); } } @@ -313,13 +313,13 @@ bool SystemZELFFrameLowering::restoreCalleeSavedRegisters( DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); // Restore FPRs/VRs in the normal TargetInstrInfo way. - for (unsigned I = 0, E = CSI.size(); I != E; ++I) { - unsigned Reg = CSI[I].getReg(); + for (const CalleeSavedInfo &I : CSI) { + unsigned Reg = I.getReg(); if (SystemZ::FP64BitRegClass.contains(Reg)) - TII->loadRegFromStackSlot(MBB, MBBI, Reg, CSI[I].getFrameIdx(), + TII->loadRegFromStackSlot(MBB, MBBI, Reg, I.getFrameIdx(), &SystemZ::FP64BitRegClass, TRI); if (SystemZ::VR128BitRegClass.contains(Reg)) - TII->loadRegFromStackSlot(MBB, MBBI, Reg, CSI[I].getFrameIdx(), + TII->loadRegFromStackSlot(MBB, MBBI, Reg, I.getFrameIdx(), &SystemZ::VR128BitRegClass, TRI); } @@ -345,8 +345,8 @@ bool SystemZELFFrameLowering::restoreCalleeSavedRegisters( MIB.addImm(RestoreGPRs.GPROffset); // Do a second scan adding regs as being defined by instruction - for (unsigned I = 0, E = CSI.size(); I != E; ++I) { - unsigned Reg = CSI[I].getReg(); + for (const CalleeSavedInfo &I : CSI) { + unsigned Reg = I.getReg(); if (Reg != RestoreGPRs.LowGPR && Reg != RestoreGPRs.HighGPR && SystemZ::GR64BitRegClass.contains(Reg)) MIB.addReg(Reg, RegState::ImplicitDefine); @@ -965,24 +965,24 @@ bool SystemZXPLINKFrameLowering::spillCalleeSavedRegisters( // Make sure all call-saved GPRs are included as operands and are // marked as live on entry. auto &GRRegClass = SystemZ::GR64BitRegClass; - for (unsigned I = 0, E = CSI.size(); I != E; ++I) { - unsigned Reg = CSI[I].getReg(); + for (const CalleeSavedInfo &I : CSI) { + unsigned Reg = I.getReg(); if (GRRegClass.contains(Reg)) addSavedGPR(MBB, MIB, Reg, true); } } // Spill FPRs to the stack in the normal TargetInstrInfo way - for (unsigned I = 0, E = CSI.size(); I != E; ++I) { - unsigned Reg = CSI[I].getReg(); + for (const CalleeSavedInfo &I : CSI) { + unsigned Reg = I.getReg(); if (SystemZ::FP64BitRegClass.contains(Reg)) { MBB.addLiveIn(Reg); - TII->storeRegToStackSlot(MBB, MBBI, Reg, true, CSI[I].getFrameIdx(), + TII->storeRegToStackSlot(MBB, MBBI, Reg, true, I.getFrameIdx(), &SystemZ::FP64BitRegClass, TRI); } if (SystemZ::VR128BitRegClass.contains(Reg)) { MBB.addLiveIn(Reg); - TII->storeRegToStackSlot(MBB, MBBI, Reg, true, CSI[I].getFrameIdx(), + TII->storeRegToStackSlot(MBB, MBBI, Reg, true, I.getFrameIdx(), &SystemZ::VR128BitRegClass, TRI); } } diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.h b/llvm/lib/Target/SystemZ/SystemZFrameLowering.h index 6fddb4f81c41..af219da79c32 100644 --- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.h +++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.h @@ -29,7 +29,18 @@ public: create(const SystemZSubtarget &STI); // Override TargetFrameLowering. - bool isFPCloseToIncomingSP() const override { return false; } + bool allocateScavengingFrameIndexesNearIncomingSP( + const MachineFunction &MF) const override { + // SystemZ wants normal register scavenging slots, as close to the stack or + // frame pointer as possible. + // The default implementation assumes an x86-like layout, where the frame + // pointer is at the opposite end of the frame from the stack pointer. + // This meant that when frame pointer elimination was disabled, + // the slots ended up being as close as possible to the incoming + // stack pointer, which is the opposite of what we want on SystemZ. + return false; + } + bool hasReservedCallFrame(const MachineFunction &MF) const override; MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, @@ -43,7 +54,6 @@ public: SystemZELFFrameLowering(); // Override TargetFrameLowering. - bool isFPCloseToIncomingSP() const override { return false; } bool assignCalleeSavedSpillSlots(MachineFunction &MF, const TargetRegisterInfo *TRI, diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp index 2bf80882fa61..e80496e37781 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -203,8 +203,8 @@ void SystemZInstrInfo::expandZExtPseudo(MachineInstr &MI, unsigned LowOpcode, Size, MI.getOperand(1).isKill(), MI.getOperand(1).isUndef()); // Keep the remaining operands as-is. - for (unsigned I = 2; I < MI.getNumOperands(); ++I) - MIB.add(MI.getOperand(I)); + for (const MachineOperand &MO : llvm::drop_begin(MI.operands(), 2)) + MIB.add(MO); MI.eraseFromParent(); } diff --git a/llvm/lib/Target/SystemZ/SystemZMCInstLower.cpp b/llvm/lib/Target/SystemZ/SystemZMCInstLower.cpp index ef39f80a94ef..d2932de5a6ea 100644 --- a/llvm/lib/Target/SystemZ/SystemZMCInstLower.cpp +++ b/llvm/lib/Target/SystemZ/SystemZMCInstLower.cpp @@ -93,10 +93,8 @@ MCOperand SystemZMCInstLower::lowerOperand(const MachineOperand &MO) const { void SystemZMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const { OutMI.setOpcode(MI->getOpcode()); - for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) { - const MachineOperand &MO = MI->getOperand(I); + for (const MachineOperand &MO : MI->operands()) // Ignore all implicit register operands. if (!MO.isReg() || !MO.isImplicit()) OutMI.addOperand(lowerOperand(MO)); - } } diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEInstPrinter.cpp b/llvm/lib/Target/VE/MCTargetDesc/VEInstPrinter.cpp index 1fe9423e01b8..1d8c3d514bfb 100644 --- a/llvm/lib/Target/VE/MCTargetDesc/VEInstPrinter.cpp +++ b/llvm/lib/Target/VE/MCTargetDesc/VEInstPrinter.cpp @@ -23,14 +23,6 @@ using namespace llvm; #define DEBUG_TYPE "ve-asmprinter" -// The generated AsmMatcher VEGenAsmWriter uses "VE" as the target -// namespace. -namespace llvm { -namespace VE { -using namespace VE; -} -} // namespace llvm - #define GET_INSTRUCTION_NAME #define PRINT_ALIAS_INSTR #include "VEGenAsmWriter.inc" @@ -62,13 +54,10 @@ void VEInstPrinter::printOperand(const MCInst *MI, int OpNum, } if (MO.isImm()) { - switch (MI->getOpcode()) { - default: - // Expects signed 32bit literals - int32_t TruncatedImm = static_cast<int32_t>(MO.getImm()); - O << TruncatedImm; - return; - } + // Expects signed 32bit literals. + int32_t TruncatedImm = static_cast<int32_t>(MO.getImm()); + O << TruncatedImm; + return; } assert(MO.isExpr() && "Unknown operand kind in printOperand"); diff --git a/llvm/lib/Target/VE/VEInstrInfo.cpp b/llvm/lib/Target/VE/VEInstrInfo.cpp index ddcfb9da8249..46846edfeafb 100644 --- a/llvm/lib/Target/VE/VEInstrInfo.cpp +++ b/llvm/lib/Target/VE/VEInstrInfo.cpp @@ -942,11 +942,11 @@ bool VEInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, get(VE::SVMmi), Dest).addReg(VMZ).addImm(Imm); MachineInstr *Inst = MIB.getInstr(); - MI.eraseFromParent(); if (KillSrc) { const TargetRegisterInfo *TRI = &getRegisterInfo(); Inst->addRegisterKilled(MI.getOperand(1).getReg(), TRI, true); } + MI.eraseFromParent(); return true; } case VE::VFMKyal: @@ -956,6 +956,7 @@ bool VEInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { case VE::VFMKSyvl: case VE::VFMKSyvyl: expandPseudoVFMK(*this, MI); + return true; } return false; } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp index 80abccd74782..7b70d99b5f52 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp @@ -368,8 +368,8 @@ FunctionPass *WebAssemblyPassConfig::createTargetRegisterAllocator(bool) { return nullptr; // No reg alloc } -static void checkSanityForEHAndSjLj(const TargetMachine *TM) { - // Sanity checking related to -exception-model +static void basicCheckForEHAndSjLj(const TargetMachine *TM) { + // Basic Correctness checking related to -exception-model if (TM->Options.ExceptionModel != ExceptionHandling::None && TM->Options.ExceptionModel != ExceptionHandling::Wasm) report_fatal_error("-exception-model should be either 'none' or 'wasm'"); @@ -431,7 +431,7 @@ void WebAssemblyPassConfig::addIRPasses() { if (getOptLevel() != CodeGenOpt::None) addPass(createWebAssemblyOptimizeReturned()); - checkSanityForEHAndSjLj(TM); + basicCheckForEHAndSjLj(TM); // If exception handling is not enabled and setjmp/longjmp handling is // enabled, we lower invokes into calls and delete unreachable landingpad diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index d4f39b571394..3df48b466d07 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -148,7 +148,7 @@ public: AlignBranchType.addKind(X86::AlignBranchJcc); AlignBranchType.addKind(X86::AlignBranchJmp); } - // Allow overriding defaults set by master flag + // Allow overriding defaults set by main flag if (X86AlignBranchBoundary.getNumOccurrences()) AlignBoundary = assumeAligned(X86AlignBranchBoundary); if (X86AlignBranch.getNumOccurrences()) @@ -1452,9 +1452,7 @@ public: unsigned NumDefCFAOffsets = 0; int MinAbsOffset = std::numeric_limits<int>::max(); - for (unsigned i = 0, e = Instrs.size(); i != e; ++i) { - const MCCFIInstruction &Inst = Instrs[i]; - + for (const MCCFIInstruction &Inst : Instrs) { switch (Inst.getOperation()) { default: // Any other CFI directives indicate a frame that we aren't prepared diff --git a/llvm/lib/Target/X86/X86DomainReassignment.cpp b/llvm/lib/Target/X86/X86DomainReassignment.cpp index a2ae6345c006..9826bf4bf861 100644 --- a/llvm/lib/Target/X86/X86DomainReassignment.cpp +++ b/llvm/lib/Target/X86/X86DomainReassignment.cpp @@ -186,8 +186,8 @@ public: TII->getRegClass(TII->get(DstOpcode), 0, MRI->getTargetRegisterInfo(), *MBB->getParent())); MachineInstrBuilder Bld = BuildMI(*MBB, MI, DL, TII->get(DstOpcode), Reg); - for (unsigned Idx = 1, End = MI->getNumOperands(); Idx < End; ++Idx) - Bld.add(MI->getOperand(Idx)); + for (const MachineOperand &MO : llvm::drop_begin(MI->operands())) + Bld.add(MO); BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::COPY)) .add(MI->getOperand(0)) diff --git a/llvm/lib/Target/X86/X86DynAllocaExpander.cpp b/llvm/lib/Target/X86/X86DynAllocaExpander.cpp index df8df1e3a65d..c8ceebb8b8e6 100644 --- a/llvm/lib/Target/X86/X86DynAllocaExpander.cpp +++ b/llvm/lib/Target/X86/X86DynAllocaExpander.cpp @@ -212,6 +212,12 @@ void X86DynAllocaExpander::lower(MachineInstr *MI, Lowering L) { bool Is64BitAlloca = MI->getOpcode() == X86::DYN_ALLOCA_64; assert(SlotSize == 4 || SlotSize == 8); + Optional<MachineFunction::DebugInstrOperandPair> InstrNum = None; + if (unsigned Num = MI->peekDebugInstrNum()) { + // Operand 2 of DYN_ALLOCAs contains the stack def. + InstrNum = {Num, 2}; + } + switch (L) { case TouchAndSub: { assert(Amount >= SlotSize); @@ -251,7 +257,7 @@ void X86DynAllocaExpander::lower(MachineInstr *MI, Lowering L) { // Do the probe. STI->getFrameLowering()->emitStackProbe(*MBB->getParent(), *MBB, MI, DL, - /*InProlog=*/false); + /*InProlog=*/false, InstrNum); } else { // Sub BuildMI(*MBB, I, DL, diff --git a/llvm/lib/Target/X86/X86ExpandPseudo.cpp b/llvm/lib/Target/X86/X86ExpandPseudo.cpp index 01dc509df795..93bc23006dc4 100644 --- a/llvm/lib/Target/X86/X86ExpandPseudo.cpp +++ b/llvm/lib/Target/X86/X86ExpandPseudo.cpp @@ -209,10 +209,8 @@ void X86ExpandPseudo::expandCALL_RVMARKER(MachineBasicBlock &MBB, llvm_unreachable("unexpected opcode"); OriginalCall = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)).getInstr(); - unsigned OpStart = 1; bool RAXImplicitDead = false; - for (; OpStart < MI.getNumOperands(); ++OpStart) { - MachineOperand &Op = MI.getOperand(OpStart); + for (MachineOperand &Op : llvm::drop_begin(MI.operands())) { // RAX may be 'implicit dead', if there are no other users of the return // value. We introduce a new use, so change it to 'implicit def'. if (Op.isReg() && Op.isImplicit() && Op.isDead() && diff --git a/llvm/lib/Target/X86/X86FixupLEAs.cpp b/llvm/lib/Target/X86/X86FixupLEAs.cpp index 9a63cffe0a09..4730b936ec1f 100644 --- a/llvm/lib/Target/X86/X86FixupLEAs.cpp +++ b/llvm/lib/Target/X86/X86FixupLEAs.cpp @@ -278,10 +278,9 @@ FixupLEAPass::usesRegister(MachineOperand &p, MachineBasicBlock::iterator I) { RegUsageState RegUsage = RU_NotUsed; MachineInstr &MI = *I; - for (unsigned i = 0; i < MI.getNumOperands(); ++i) { - MachineOperand &opnd = MI.getOperand(i); - if (opnd.isReg() && opnd.getReg() == p.getReg()) { - if (opnd.isDef()) + for (const MachineOperand &MO : MI.operands()) { + if (MO.isReg() && MO.getReg() == p.getReg()) { + if (MO.isDef()) return RU_Write; RegUsage = RU_Read; } diff --git a/llvm/lib/Target/X86/X86FloatingPoint.cpp b/llvm/lib/Target/X86/X86FloatingPoint.cpp index 60e1b37ed61c..4d9160f35226 100644 --- a/llvm/lib/Target/X86/X86FloatingPoint.cpp +++ b/llvm/lib/Target/X86/X86FloatingPoint.cpp @@ -446,11 +446,9 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) { // Get dead variables list now because the MI pointer may be deleted as part // of processing! SmallVector<unsigned, 8> DeadRegs; - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI.getOperand(i); + for (const MachineOperand &MO : MI.operands()) if (MO.isReg() && MO.isDead()) DeadRegs.push_back(MO.getReg()); - } switch (FPInstClass) { case X86II::ZeroArgFP: handleZeroArgFP(I); break; @@ -1672,8 +1670,7 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &Inst) { // Collect all FP registers (register operands with constraints "t", "u", // and "f") to kill afer the instruction. unsigned FPKills = ((1u << NumFPRegs) - 1) & ~0xff; - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - MachineOperand &Op = MI.getOperand(i); + for (const MachineOperand &Op : MI.operands()) { if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6) continue; unsigned FPReg = getFPReg(Op); diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp index bd780273509f..c29ae9f6af4c 100644 --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -465,13 +465,11 @@ void X86FrameLowering::emitCalleeSavedFrameMoves( // Add callee saved registers to move list. const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); - if (CSI.empty()) return; // Calculate offsets. - for (std::vector<CalleeSavedInfo>::const_iterator - I = CSI.begin(), E = CSI.end(); I != E; ++I) { - int64_t Offset = MFI.getObjectOffset(I->getFrameIdx()); - unsigned Reg = I->getReg(); + for (const CalleeSavedInfo &I : CSI) { + int64_t Offset = MFI.getObjectOffset(I.getFrameIdx()); + unsigned Reg = I.getReg(); unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true); if (IsPrologue) { @@ -484,10 +482,10 @@ void X86FrameLowering::emitCalleeSavedFrameMoves( } } -void X86FrameLowering::emitStackProbe(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - const DebugLoc &DL, bool InProlog) const { +void X86FrameLowering::emitStackProbe( + MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog, + Optional<MachineFunction::DebugInstrOperandPair> InstrNum) const { const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); if (STI.isTargetWindowsCoreCLR()) { if (InProlog) { @@ -497,10 +495,14 @@ void X86FrameLowering::emitStackProbe(MachineFunction &MF, emitStackProbeInline(MF, MBB, MBBI, DL, false); } } else { - emitStackProbeCall(MF, MBB, MBBI, DL, InProlog); + emitStackProbeCall(MF, MBB, MBBI, DL, InProlog, InstrNum); } } +bool X86FrameLowering::stackProbeFunctionModifiesSP() const { + return STI.isOSWindows() && !STI.isTargetWin64(); +} + void X86FrameLowering::inlineStackProbe(MachineFunction &MF, MachineBasicBlock &PrologMBB) const { auto Where = llvm::find_if(PrologMBB, [](MachineInstr &MI) { @@ -971,11 +973,10 @@ void X86FrameLowering::emitStackProbeInlineWindowsCoreCLR64( } } -void X86FrameLowering::emitStackProbeCall(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - const DebugLoc &DL, - bool InProlog) const { +void X86FrameLowering::emitStackProbeCall( + MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog, + Optional<MachineFunction::DebugInstrOperandPair> InstrNum) const { bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large; // FIXME: Add indirect thunk support and remove this. @@ -1015,6 +1016,7 @@ void X86FrameLowering::emitStackProbeCall(MachineFunction &MF, .addReg(SP, RegState::Define | RegState::Implicit) .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); + MachineInstr *ModInst = CI; if (STI.isTargetWin64() || !STI.isOSWindows()) { // MSVC x32's _chkstk and cygwin/mingw's _alloca adjust %esp themselves. // MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp @@ -1022,9 +1024,27 @@ void X86FrameLowering::emitStackProbeCall(MachineFunction &MF, // adjusting %rsp. // All other platforms do not specify a particular ABI for the stack probe // function, so we arbitrarily define it to not adjust %esp/%rsp itself. - BuildMI(MBB, MBBI, DL, TII.get(getSUBrrOpcode(Uses64BitFramePtr)), SP) - .addReg(SP) - .addReg(AX); + ModInst = + BuildMI(MBB, MBBI, DL, TII.get(getSUBrrOpcode(Uses64BitFramePtr)), SP) + .addReg(SP) + .addReg(AX); + } + + // DebugInfo variable locations -- if there's an instruction number for the + // allocation (i.e., DYN_ALLOC_*), substitute it for the instruction that + // modifies SP. + if (InstrNum) { + if (STI.isTargetWin64() || !STI.isOSWindows()) { + // Label destination operand of the subtract. + MF.makeDebugValueSubstitution(*InstrNum, + {ModInst->getDebugInstrNum(), 0}); + } else { + // Label the call. The operand number is the penultimate operand, zero + // based. + unsigned SPDefOperand = ModInst->getNumOperands() - 2; + MF.makeDebugValueSubstitution( + *InstrNum, {ModInst->getDebugInstrNum(), SPDefOperand}); + } } if (InProlog) { @@ -2652,8 +2672,8 @@ bool X86FrameLowering::restoreCalleeSavedRegisters( DebugLoc DL = MBB.findDebugLoc(MI); // Reload XMMs from stack frame. - for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - unsigned Reg = CSI[i].getReg(); + for (const CalleeSavedInfo &I : CSI) { + unsigned Reg = I.getReg(); if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg)) continue; @@ -2664,13 +2684,13 @@ bool X86FrameLowering::restoreCalleeSavedRegisters( VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1; const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT); - TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RC, TRI); + TII.loadRegFromStackSlot(MBB, MI, Reg, I.getFrameIdx(), RC, TRI); } // POP GPRs. unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r; - for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - unsigned Reg = CSI[i].getReg(); + for (const CalleeSavedInfo &I : CSI) { + unsigned Reg = I.getReg(); if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg)) continue; diff --git a/llvm/lib/Target/X86/X86FrameLowering.h b/llvm/lib/Target/X86/X86FrameLowering.h index 6309b8a066c4..e18be0d26321 100644 --- a/llvm/lib/Target/X86/X86FrameLowering.h +++ b/llvm/lib/Target/X86/X86FrameLowering.h @@ -13,6 +13,7 @@ #ifndef LLVM_LIB_TARGET_X86_X86FRAMELOWERING_H #define LLVM_LIB_TARGET_X86_X86FRAMELOWERING_H +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/Support/TypeSize.h" @@ -51,9 +52,14 @@ public: /// Emit target stack probe code. This is required for all /// large stack allocations on Windows. The caller is required to materialize /// the number of bytes to probe in RAX/EAX. - void emitStackProbe(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, const DebugLoc &DL, - bool InProlog) const; + /// \p InstrNum optionally contains a debug-info instruction number for the + /// new stack pointer. + void emitStackProbe( + MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog, + Optional<MachineFunction::DebugInstrOperandPair> InstrNum = None) const; + + bool stackProbeFunctionModifiesSP() const override; /// Replace a StackProbe inline-stub with the actual probe code inline. void inlineStackProbe(MachineFunction &MF, @@ -198,9 +204,10 @@ private: uint64_t calculateMaxStackAlign(const MachineFunction &MF) const; /// Emit target stack probe as a call to a helper function - void emitStackProbeCall(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, const DebugLoc &DL, - bool InProlog) const; + void emitStackProbeCall( + MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog, + Optional<MachineFunction::DebugInstrOperandPair> InstrNum) const; /// Emit target stack probe as an inline sequence. void emitStackProbeInline(MachineFunction &MF, MachineBasicBlock &MBB, diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 17d14053d804..62b2387396be 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -23190,6 +23190,10 @@ static SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, bool X86TargetLowering::isFsqrtCheap(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); + // We don't need to replace SQRT with RSQRT for half type. + if (VT.getScalarType() == MVT::f16) + return true; + // We never want to use both SQRT and RSQRT instructions for the same input. if (DAG.getNodeIfExists(X86ISD::FRSQRT, DAG.getVTList(VT), Op)) return false; @@ -23228,11 +23232,15 @@ SDValue X86TargetLowering::getSqrtEstimate(SDValue Op, UseOneConstNR = false; // There is no FSQRT for 512-bits, but there is RSQRT14. unsigned Opcode = VT == MVT::v16f32 ? X86ISD::RSQRT14 : X86ISD::FRSQRT; - return DAG.getNode(Opcode, DL, VT, Op); + SDValue Estimate = DAG.getNode(Opcode, DL, VT, Op); + if (RefinementSteps == 0 && !Reciprocal) + Estimate = DAG.getNode(ISD::FMUL, DL, VT, Op, Estimate); + return Estimate; } if (VT.getScalarType() == MVT::f16 && isTypeLegal(VT) && Subtarget.hasFP16()) { + assert(Reciprocal && "Don't replace SQRT with RSQRT for half type"); if (RefinementSteps == ReciprocalEstimate::Unspecified) RefinementSteps = 0; @@ -45680,7 +45688,7 @@ static SDValue combineCompareEqual(SDNode *N, SelectionDAG &DAG, if (is64BitFP && !Subtarget.is64Bit()) { // On a 32-bit target, we cannot bitcast the 64-bit float to a // 64-bit integer, since that's not a legal type. Since - // OnesOrZeroesF is all ones of all zeroes, we don't need all the + // OnesOrZeroesF is all ones or all zeroes, we don't need all the // bits, but can do this little dance to extract the lowest 32 bits // and work with those going forward. SDValue Vector64 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f64, @@ -46577,6 +46585,59 @@ static SDValue combineOrCmpEqZeroToCtlzSrl(SDNode *N, SelectionDAG &DAG, return Ret; } +static SDValue foldMaskedMergeImpl(SDValue And0_L, SDValue And0_R, + SDValue And1_L, SDValue And1_R, SDLoc DL, + SelectionDAG &DAG) { + if (!isBitwiseNot(And0_L, true) || !And0_L->hasOneUse()) + return SDValue(); + SDValue NotOp = And0_L->getOperand(0); + if (NotOp == And1_R) + std::swap(And1_R, And1_L); + if (NotOp != And1_L) + return SDValue(); + + // (~(NotOp) & And0_R) | (NotOp & And1_R) + // --> ((And0_R ^ And1_R) & NotOp) ^ And1_R + EVT VT = And1_L->getValueType(0); + SDValue Freeze_And0_R = DAG.getNode(ISD::FREEZE, SDLoc(), VT, And0_R); + SDValue Xor0 = DAG.getNode(ISD::XOR, DL, VT, And1_R, Freeze_And0_R); + SDValue And = DAG.getNode(ISD::AND, DL, VT, Xor0, NotOp); + SDValue Xor1 = DAG.getNode(ISD::XOR, DL, VT, And, Freeze_And0_R); + return Xor1; +} + +/// Fold "masked merge" expressions like `(m & x) | (~m & y)` into the +/// equivalent `((x ^ y) & m) ^ y)` pattern. +/// This is typically a better representation for targets without a fused +/// "and-not" operation. This function is intended to be called from a +/// `TargetLowering::PerformDAGCombine` callback on `ISD::OR` nodes. +static SDValue foldMaskedMerge(SDNode *Node, SelectionDAG &DAG) { + // Note that masked-merge variants using XOR or ADD expressions are + // normalized to OR by InstCombine so we only check for OR. + assert(Node->getOpcode() == ISD::OR && "Must be called with ISD::OR node"); + SDValue N0 = Node->getOperand(0); + if (N0->getOpcode() != ISD::AND || !N0->hasOneUse()) + return SDValue(); + SDValue N1 = Node->getOperand(1); + if (N1->getOpcode() != ISD::AND || !N1->hasOneUse()) + return SDValue(); + + SDLoc DL(Node); + SDValue N00 = N0->getOperand(0); + SDValue N01 = N0->getOperand(1); + SDValue N10 = N1->getOperand(0); + SDValue N11 = N1->getOperand(1); + if (SDValue Result = foldMaskedMergeImpl(N00, N01, N10, N11, DL, DAG)) + return Result; + if (SDValue Result = foldMaskedMergeImpl(N01, N00, N10, N11, DL, DAG)) + return Result; + if (SDValue Result = foldMaskedMergeImpl(N10, N11, N00, N01, DL, DAG)) + return Result; + if (SDValue Result = foldMaskedMergeImpl(N11, N10, N00, N01, DL, DAG)) + return Result; + return SDValue(); +} + static SDValue combineOr(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { @@ -46670,6 +46731,11 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG, return Res; } + // We should fold "masked merge" patterns when `andn` is not available. + if (!Subtarget.hasBMI() && VT.isScalarInteger() && VT != MVT::i1) + if (SDValue R = foldMaskedMerge(N, DAG)) + return R; + return SDValue(); } @@ -48504,20 +48570,50 @@ static SDValue combinePMULH(SDValue Src, EVT VT, const SDLoc &DL, SDValue LHS = Src.getOperand(0).getOperand(0); SDValue RHS = Src.getOperand(0).getOperand(1); - unsigned ExtOpc = LHS.getOpcode(); - if ((ExtOpc != ISD::SIGN_EXTEND && ExtOpc != ISD::ZERO_EXTEND) || - RHS.getOpcode() != ExtOpc) - return SDValue(); - - // Peek through the extends. - LHS = LHS.getOperand(0); - RHS = RHS.getOperand(0); - - // Ensure the input types match. - if (LHS.getValueType() != VT || RHS.getValueType() != VT) - return SDValue(); + // Count leading sign/zero bits on both inputs - if there are enough then + // truncation back to vXi16 will be cheap - either as a pack/shuffle + // sequence or using AVX512 truncations. If the inputs are sext/zext then the + // truncations may actually be free by peeking through to the ext source. + auto IsSext = [&DAG](SDValue V) { + return DAG.ComputeMinSignedBits(V) <= 16; + }; + auto IsZext = [&DAG](SDValue V) { + return DAG.computeKnownBits(V).countMaxActiveBits() <= 16; + }; - unsigned Opc = ExtOpc == ISD::SIGN_EXTEND ? ISD::MULHS : ISD::MULHU; + bool IsSigned = IsSext(LHS) && IsSext(RHS); + bool IsUnsigned = IsZext(LHS) && IsZext(RHS); + if (!IsSigned && !IsUnsigned) + return SDValue(); + + // Check if both inputs are extensions, which will be removed by truncation. + bool IsTruncateFree = (LHS.getOpcode() == ISD::SIGN_EXTEND || + LHS.getOpcode() == ISD::ZERO_EXTEND) && + (RHS.getOpcode() == ISD::SIGN_EXTEND || + RHS.getOpcode() == ISD::ZERO_EXTEND) && + LHS.getOperand(0).getScalarValueSizeInBits() <= 16 && + RHS.getOperand(0).getScalarValueSizeInBits() <= 16; + + // For AVX2+ targets, with the upper bits known zero, we can perform MULHU on + // the (bitcasted) inputs directly, and then cheaply pack/truncate the result + // (upper elts will be zero). Don't attempt this with just AVX512F as MULHU + // will have to split anyway. + unsigned InSizeInBits = InVT.getSizeInBits(); + if (IsUnsigned && !IsTruncateFree && Subtarget.hasInt256() && + !(Subtarget.hasAVX512() && !Subtarget.hasBWI() && VT.is256BitVector()) && + (InSizeInBits % 16) == 0) { + EVT BCVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16, + InVT.getSizeInBits() / 16); + SDValue Res = DAG.getNode(ISD::MULHU, DL, BCVT, DAG.getBitcast(BCVT, LHS), + DAG.getBitcast(BCVT, RHS)); + return DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getBitcast(InVT, Res)); + } + + // Truncate back to source type. + LHS = DAG.getNode(ISD::TRUNCATE, DL, VT, LHS); + RHS = DAG.getNode(ISD::TRUNCATE, DL, VT, RHS); + + unsigned Opc = IsSigned ? ISD::MULHS : ISD::MULHU; return DAG.getNode(Opc, DL, VT, LHS, RHS); } diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 8aee96e1c504..1db83033ba35 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -12937,8 +12937,8 @@ def : Pat<(v16i32 (X86vzmovl (iPTR 0)))), (SUBREG_TO_REG (i32 0), (VMOVW2SHrr GR32:$src), sub_xmm)>; -def : Pat<(v8i16 (X86vzmovl (v8i16 (scalar_to_vector (i16 (trunc GR32:$src)))))), - (VMOVW2SHrr GR32:$src)>; +def : Pat<(v8i16 (X86vzmovl (scalar_to_vector (i16 GR16:$src)))), + (VMOVW2SHrr (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit))>; // AVX 128-bit movw instruction write zeros in the high 128-bit part. def : Pat<(v8i16 (X86vzload16 addr:$src)), diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 639aa5199ea5..bb5637a31947 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -1163,8 +1163,7 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB, /// True if MI has a condition code def, e.g. EFLAGS, that is not marked dead. bool X86InstrInfo::hasLiveCondCodeDef(MachineInstr &MI) const { - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI.getOperand(i); + for (const MachineOperand &MO : MI.operands()) { if (MO.isReg() && MO.isDef() && MO.getReg() == X86::EFLAGS && !MO.isDead()) { return true; @@ -5676,10 +5675,8 @@ static MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode, MachineOperand &MO = MI.getOperand(i + 2); MIB.add(MO); } - for (unsigned i = NumOps + 2, e = MI.getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI.getOperand(i); + for (const MachineOperand &MO : llvm::drop_begin(MI.operands(), NumOps + 2)) MIB.add(MO); - } updateOperandRegConstraints(MF, *NewMI, TII); diff --git a/llvm/lib/Target/X86/X86RegisterBanks.td b/llvm/lib/Target/X86/X86RegisterBanks.td index 74c515850ab1..91a497252595 100644 --- a/llvm/lib/Target/X86/X86RegisterBanks.td +++ b/llvm/lib/Target/X86/X86RegisterBanks.td @@ -1,4 +1,4 @@ -//=- X86RegisterBank.td - Describe the AArch64 Banks -----*- tablegen -*-=// +//=- X86RegisterBank.td - Describe the X86 Banks -------------*- tablegen -*-=// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 06dacb638d16..869762b35196 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -1584,54 +1584,98 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, { ISD::ZERO_EXTEND, MVT::v32i16, MVT::v32i8, 1 }, // Mask sign extend has an instruction. - { ISD::SIGN_EXTEND, MVT::v2i8, MVT::v2i1, 1 }, - { ISD::SIGN_EXTEND, MVT::v2i16, MVT::v2i1, 1 }, - { ISD::SIGN_EXTEND, MVT::v4i8, MVT::v4i1, 1 }, - { ISD::SIGN_EXTEND, MVT::v4i16, MVT::v4i1, 1 }, - { ISD::SIGN_EXTEND, MVT::v8i8, MVT::v8i1, 1 }, - { ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i1, 1 }, - { ISD::SIGN_EXTEND, MVT::v16i8, MVT::v16i1, 1 }, - { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i1, 1 }, - { ISD::SIGN_EXTEND, MVT::v32i8, MVT::v32i1, 1 }, - { ISD::SIGN_EXTEND, MVT::v32i16, MVT::v32i1, 1 }, - { ISD::SIGN_EXTEND, MVT::v32i16, MVT::v64i1, 1 }, - { ISD::SIGN_EXTEND, MVT::v64i8, MVT::v64i1, 1 }, + { ISD::SIGN_EXTEND, MVT::v2i8, MVT::v2i1, 1 }, + { ISD::SIGN_EXTEND, MVT::v16i8, MVT::v2i1, 1 }, + { ISD::SIGN_EXTEND, MVT::v2i16, MVT::v2i1, 1 }, + { ISD::SIGN_EXTEND, MVT::v8i16, MVT::v2i1, 1 }, + { ISD::SIGN_EXTEND, MVT::v4i8, MVT::v4i1, 1 }, + { ISD::SIGN_EXTEND, MVT::v16i8, MVT::v4i1, 1 }, + { ISD::SIGN_EXTEND, MVT::v4i16, MVT::v4i1, 1 }, + { ISD::SIGN_EXTEND, MVT::v8i16, MVT::v4i1, 1 }, + { ISD::SIGN_EXTEND, MVT::v8i8, MVT::v8i1, 1 }, + { ISD::SIGN_EXTEND, MVT::v16i8, MVT::v8i1, 1 }, + { ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i1, 1 }, + { ISD::SIGN_EXTEND, MVT::v16i8, MVT::v16i1, 1 }, + { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i1, 1 }, + { ISD::SIGN_EXTEND, MVT::v32i8, MVT::v32i1, 1 }, + { ISD::SIGN_EXTEND, MVT::v32i16, MVT::v32i1, 1 }, + { ISD::SIGN_EXTEND, MVT::v64i8, MVT::v64i1, 1 }, + { ISD::SIGN_EXTEND, MVT::v32i16, MVT::v64i1, 1 }, // Mask zero extend is a sext + shift. - { ISD::ZERO_EXTEND, MVT::v2i8, MVT::v2i1, 2 }, - { ISD::ZERO_EXTEND, MVT::v2i16, MVT::v2i1, 2 }, - { ISD::ZERO_EXTEND, MVT::v4i8, MVT::v4i1, 2 }, - { ISD::ZERO_EXTEND, MVT::v4i16, MVT::v4i1, 2 }, - { ISD::ZERO_EXTEND, MVT::v8i8, MVT::v8i1, 2 }, - { ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i1, 2 }, - { ISD::ZERO_EXTEND, MVT::v16i8, MVT::v16i1, 2 }, - { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i1, 2 }, - { ISD::ZERO_EXTEND, MVT::v32i8, MVT::v32i1, 2 }, - { ISD::ZERO_EXTEND, MVT::v32i16, MVT::v32i1, 2 }, - { ISD::ZERO_EXTEND, MVT::v32i16, MVT::v64i1, 2 }, - { ISD::ZERO_EXTEND, MVT::v64i8, MVT::v64i1, 2 }, + { ISD::ZERO_EXTEND, MVT::v2i8, MVT::v2i1, 2 }, + { ISD::ZERO_EXTEND, MVT::v16i8, MVT::v2i1, 2 }, + { ISD::ZERO_EXTEND, MVT::v2i16, MVT::v2i1, 2 }, + { ISD::ZERO_EXTEND, MVT::v8i16, MVT::v2i1, 2 }, + { ISD::ZERO_EXTEND, MVT::v4i8, MVT::v4i1, 2 }, + { ISD::ZERO_EXTEND, MVT::v16i8, MVT::v4i1, 2 }, + { ISD::ZERO_EXTEND, MVT::v4i16, MVT::v4i1, 2 }, + { ISD::ZERO_EXTEND, MVT::v8i16, MVT::v4i1, 2 }, + { ISD::ZERO_EXTEND, MVT::v8i8, MVT::v8i1, 2 }, + { ISD::ZERO_EXTEND, MVT::v16i8, MVT::v8i1, 2 }, + { ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i1, 2 }, + { ISD::ZERO_EXTEND, MVT::v16i8, MVT::v16i1, 2 }, + { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i1, 2 }, + { ISD::ZERO_EXTEND, MVT::v32i8, MVT::v32i1, 2 }, + { ISD::ZERO_EXTEND, MVT::v32i16, MVT::v32i1, 2 }, + { ISD::ZERO_EXTEND, MVT::v64i8, MVT::v64i1, 2 }, + { ISD::ZERO_EXTEND, MVT::v32i16, MVT::v64i1, 2 }, + + { ISD::TRUNCATE, MVT::v2i1, MVT::v2i8, 2 }, + { ISD::TRUNCATE, MVT::v2i1, MVT::v16i8, 2 }, + { ISD::TRUNCATE, MVT::v2i1, MVT::v2i16, 2 }, + { ISD::TRUNCATE, MVT::v2i1, MVT::v8i16, 2 }, + { ISD::TRUNCATE, MVT::v4i1, MVT::v4i8, 2 }, + { ISD::TRUNCATE, MVT::v4i1, MVT::v16i8, 2 }, + { ISD::TRUNCATE, MVT::v4i1, MVT::v4i16, 2 }, + { ISD::TRUNCATE, MVT::v4i1, MVT::v8i16, 2 }, + { ISD::TRUNCATE, MVT::v8i1, MVT::v8i8, 2 }, + { ISD::TRUNCATE, MVT::v8i1, MVT::v16i8, 2 }, + { ISD::TRUNCATE, MVT::v8i1, MVT::v8i16, 2 }, + { ISD::TRUNCATE, MVT::v16i1, MVT::v16i8, 2 }, + { ISD::TRUNCATE, MVT::v16i1, MVT::v16i16, 2 }, + { ISD::TRUNCATE, MVT::v32i1, MVT::v32i8, 2 }, + { ISD::TRUNCATE, MVT::v32i1, MVT::v32i16, 2 }, + { ISD::TRUNCATE, MVT::v64i1, MVT::v64i8, 2 }, + { ISD::TRUNCATE, MVT::v64i1, MVT::v32i16, 2 }, { ISD::TRUNCATE, MVT::v32i8, MVT::v32i16, 2 }, { ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 2 }, // widen to zmm - { ISD::TRUNCATE, MVT::v2i1, MVT::v2i8, 2 }, // widen to zmm - { ISD::TRUNCATE, MVT::v2i1, MVT::v2i16, 2 }, // widen to zmm { ISD::TRUNCATE, MVT::v2i8, MVT::v2i16, 2 }, // vpmovwb - { ISD::TRUNCATE, MVT::v4i1, MVT::v4i8, 2 }, // widen to zmm - { ISD::TRUNCATE, MVT::v4i1, MVT::v4i16, 2 }, // widen to zmm { ISD::TRUNCATE, MVT::v4i8, MVT::v4i16, 2 }, // vpmovwb - { ISD::TRUNCATE, MVT::v8i1, MVT::v8i8, 2 }, // widen to zmm - { ISD::TRUNCATE, MVT::v8i1, MVT::v16i8, 2 }, // widen to zmm - { ISD::TRUNCATE, MVT::v8i1, MVT::v8i16, 2 }, // widen to zmm { ISD::TRUNCATE, MVT::v8i8, MVT::v8i16, 2 }, // vpmovwb - { ISD::TRUNCATE, MVT::v16i1, MVT::v16i8, 2 }, // widen to zmm - { ISD::TRUNCATE, MVT::v16i1, MVT::v16i16, 2 }, // widen to zmm - { ISD::TRUNCATE, MVT::v32i1, MVT::v32i8, 2 }, // widen to zmm - { ISD::TRUNCATE, MVT::v32i1, MVT::v32i16, 2 }, - { ISD::TRUNCATE, MVT::v64i1, MVT::v32i16, 2 }, - { ISD::TRUNCATE, MVT::v64i1, MVT::v64i8, 2 }, }; static const TypeConversionCostTblEntry AVX512DQConversionTbl[] = { + // Mask sign extend has an instruction. + { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i1, 1 }, + { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v2i1, 1 }, + { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i1, 1 }, + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 1 }, + { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 1 }, + { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v16i1, 1 }, + { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i1, 1 }, + { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i1, 1 }, + + // Mask zero extend is a sext + shift. + { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i1, 2 }, + { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v2i1, 2 }, + { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i1, 2 }, + { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i1, 2 }, + { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 2 }, + { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v16i1, 2 }, + { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i1, 2 }, + { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i1, 2 }, + + { ISD::TRUNCATE, MVT::v2i1, MVT::v2i64, 2 }, + { ISD::TRUNCATE, MVT::v2i1, MVT::v4i32, 2 }, + { ISD::TRUNCATE, MVT::v4i1, MVT::v4i32, 2 }, + { ISD::TRUNCATE, MVT::v4i1, MVT::v4i64, 2 }, + { ISD::TRUNCATE, MVT::v8i1, MVT::v8i32, 2 }, + { ISD::TRUNCATE, MVT::v8i1, MVT::v8i64, 2 }, + { ISD::TRUNCATE, MVT::v16i1, MVT::v16i32, 2 }, + { ISD::TRUNCATE, MVT::v16i1, MVT::v8i64, 2 }, + { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i64, 1 }, { ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i64, 1 }, @@ -1786,40 +1830,94 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, static const TypeConversionCostTblEntry AVX512BWVLConversionTbl[] { // Mask sign extend has an instruction. - { ISD::SIGN_EXTEND, MVT::v2i8, MVT::v2i1, 1 }, - { ISD::SIGN_EXTEND, MVT::v2i16, MVT::v2i1, 1 }, - { ISD::SIGN_EXTEND, MVT::v4i8, MVT::v4i1, 1 }, - { ISD::SIGN_EXTEND, MVT::v4i16, MVT::v4i1, 1 }, - { ISD::SIGN_EXTEND, MVT::v8i8, MVT::v8i1, 1 }, - { ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i1, 1 }, - { ISD::SIGN_EXTEND, MVT::v16i8, MVT::v16i1, 1 }, - { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i1, 1 }, - { ISD::SIGN_EXTEND, MVT::v32i8, MVT::v32i1, 1 }, + { ISD::SIGN_EXTEND, MVT::v2i8, MVT::v2i1, 1 }, + { ISD::SIGN_EXTEND, MVT::v16i8, MVT::v2i1, 1 }, + { ISD::SIGN_EXTEND, MVT::v2i16, MVT::v2i1, 1 }, + { ISD::SIGN_EXTEND, MVT::v8i16, MVT::v2i1, 1 }, + { ISD::SIGN_EXTEND, MVT::v4i16, MVT::v4i1, 1 }, + { ISD::SIGN_EXTEND, MVT::v16i8, MVT::v4i1, 1 }, + { ISD::SIGN_EXTEND, MVT::v4i8, MVT::v4i1, 1 }, + { ISD::SIGN_EXTEND, MVT::v8i16, MVT::v4i1, 1 }, + { ISD::SIGN_EXTEND, MVT::v8i8, MVT::v8i1, 1 }, + { ISD::SIGN_EXTEND, MVT::v16i8, MVT::v8i1, 1 }, + { ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i1, 1 }, + { ISD::SIGN_EXTEND, MVT::v16i8, MVT::v16i1, 1 }, + { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i1, 1 }, + { ISD::SIGN_EXTEND, MVT::v32i8, MVT::v32i1, 1 }, + { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v32i1, 1 }, + { ISD::SIGN_EXTEND, MVT::v32i8, MVT::v64i1, 1 }, + { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v64i1, 1 }, // Mask zero extend is a sext + shift. - { ISD::ZERO_EXTEND, MVT::v2i8, MVT::v2i1, 2 }, - { ISD::ZERO_EXTEND, MVT::v2i16, MVT::v2i1, 2 }, - { ISD::ZERO_EXTEND, MVT::v4i8, MVT::v4i1, 2 }, - { ISD::ZERO_EXTEND, MVT::v4i16, MVT::v4i1, 2 }, - { ISD::ZERO_EXTEND, MVT::v8i8, MVT::v8i1, 2 }, - { ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i1, 2 }, - { ISD::ZERO_EXTEND, MVT::v16i8, MVT::v16i1, 2 }, - { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i1, 2 }, - { ISD::ZERO_EXTEND, MVT::v32i8, MVT::v32i1, 2 }, + { ISD::ZERO_EXTEND, MVT::v2i8, MVT::v2i1, 2 }, + { ISD::ZERO_EXTEND, MVT::v16i8, MVT::v2i1, 2 }, + { ISD::ZERO_EXTEND, MVT::v2i16, MVT::v2i1, 2 }, + { ISD::ZERO_EXTEND, MVT::v8i16, MVT::v2i1, 2 }, + { ISD::ZERO_EXTEND, MVT::v4i8, MVT::v4i1, 2 }, + { ISD::ZERO_EXTEND, MVT::v16i8, MVT::v4i1, 2 }, + { ISD::ZERO_EXTEND, MVT::v4i16, MVT::v4i1, 2 }, + { ISD::ZERO_EXTEND, MVT::v8i16, MVT::v4i1, 2 }, + { ISD::ZERO_EXTEND, MVT::v8i8, MVT::v8i1, 2 }, + { ISD::ZERO_EXTEND, MVT::v16i8, MVT::v8i1, 2 }, + { ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i1, 2 }, + { ISD::ZERO_EXTEND, MVT::v16i8, MVT::v16i1, 2 }, + { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i1, 2 }, + { ISD::ZERO_EXTEND, MVT::v32i8, MVT::v32i1, 2 }, + { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v32i1, 2 }, + { ISD::ZERO_EXTEND, MVT::v32i8, MVT::v64i1, 2 }, + { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v64i1, 2 }, + + { ISD::TRUNCATE, MVT::v2i1, MVT::v2i8, 2 }, + { ISD::TRUNCATE, MVT::v2i1, MVT::v16i8, 2 }, + { ISD::TRUNCATE, MVT::v2i1, MVT::v2i16, 2 }, + { ISD::TRUNCATE, MVT::v2i1, MVT::v8i16, 2 }, + { ISD::TRUNCATE, MVT::v4i1, MVT::v4i8, 2 }, + { ISD::TRUNCATE, MVT::v4i1, MVT::v16i8, 2 }, + { ISD::TRUNCATE, MVT::v4i1, MVT::v4i16, 2 }, + { ISD::TRUNCATE, MVT::v4i1, MVT::v8i16, 2 }, + { ISD::TRUNCATE, MVT::v8i1, MVT::v8i8, 2 }, + { ISD::TRUNCATE, MVT::v8i1, MVT::v16i8, 2 }, + { ISD::TRUNCATE, MVT::v8i1, MVT::v8i16, 2 }, + { ISD::TRUNCATE, MVT::v16i1, MVT::v16i8, 2 }, + { ISD::TRUNCATE, MVT::v16i1, MVT::v16i16, 2 }, + { ISD::TRUNCATE, MVT::v32i1, MVT::v32i8, 2 }, + { ISD::TRUNCATE, MVT::v32i1, MVT::v16i16, 2 }, + { ISD::TRUNCATE, MVT::v64i1, MVT::v32i8, 2 }, + { ISD::TRUNCATE, MVT::v64i1, MVT::v16i16, 2 }, { ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 2 }, - { ISD::TRUNCATE, MVT::v2i1, MVT::v2i8, 2 }, // vpsllw+vptestmb - { ISD::TRUNCATE, MVT::v2i1, MVT::v2i16, 2 }, // vpsllw+vptestmw - { ISD::TRUNCATE, MVT::v4i1, MVT::v4i8, 2 }, // vpsllw+vptestmb - { ISD::TRUNCATE, MVT::v4i1, MVT::v4i16, 2 }, // vpsllw+vptestmw - { ISD::TRUNCATE, MVT::v8i1, MVT::v8i8, 2 }, // vpsllw+vptestmb - { ISD::TRUNCATE, MVT::v8i1, MVT::v8i16, 2 }, // vpsllw+vptestmw - { ISD::TRUNCATE, MVT::v16i1, MVT::v16i8, 2 }, // vpsllw+vptestmb - { ISD::TRUNCATE, MVT::v16i1, MVT::v16i16, 2 }, // vpsllw+vptestmw - { ISD::TRUNCATE, MVT::v32i1, MVT::v32i8, 2 }, // vpsllw+vptestmb }; static const TypeConversionCostTblEntry AVX512DQVLConversionTbl[] = { + // Mask sign extend has an instruction. + { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i1, 1 }, + { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v2i1, 1 }, + { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i1, 1 }, + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v16i1, 1 }, + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 1 }, + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v8i1, 1 }, + { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v16i1, 1 }, + { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 1 }, + + // Mask zero extend is a sext + shift. + { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i1, 2 }, + { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v2i1, 2 }, + { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i1, 2 }, + { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v16i1, 2 }, + { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i1, 2 }, + { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v8i1, 2 }, + { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v16i1, 2 }, + { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 2 }, + + { ISD::TRUNCATE, MVT::v16i1, MVT::v4i64, 2 }, + { ISD::TRUNCATE, MVT::v16i1, MVT::v8i32, 2 }, + { ISD::TRUNCATE, MVT::v2i1, MVT::v2i64, 2 }, + { ISD::TRUNCATE, MVT::v2i1, MVT::v4i32, 2 }, + { ISD::TRUNCATE, MVT::v4i1, MVT::v4i32, 2 }, + { ISD::TRUNCATE, MVT::v4i1, MVT::v4i64, 2 }, + { ISD::TRUNCATE, MVT::v8i1, MVT::v4i64, 2 }, + { ISD::TRUNCATE, MVT::v8i1, MVT::v8i32, 2 }, + { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i64, 1 }, { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 }, { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i64, 1 }, @@ -3674,6 +3772,10 @@ X86TTIImpl::getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, PromEltTyBits = 16; // promote to i16, AVX512BW. break; } + if (ST->hasDQI()) { + PromEltTyBits = 32; // promote to i32, AVX512F. + break; + } return bailout(); default: return bailout(); @@ -3969,7 +4071,9 @@ InstructionCost X86TTIImpl::getAddressComputationCost(Type *Ty, // Even in the case of (loop invariant) stride whose value is not known at // compile time, the address computation will not incur more than one extra // ADD instruction. - if (Ty->isVectorTy() && SE) { + if (Ty->isVectorTy() && SE && !ST->hasAVX2()) { + // TODO: AVX2 is the current cut-off because we don't have correct + // interleaving costs for prior ISA's. if (!BaseT::isStridedAccess(Ptr)) return NumVectorInstToHideOverhead; if (!BaseT::getConstantStrideStep(SE, Ptr)) @@ -5173,7 +5277,8 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX512( auto *SingleMemOpTy = FixedVectorType::get(VecTy->getElementType(), LegalVT.getVectorNumElements()); InstructionCost MemOpCost; - if (UseMaskForCond || UseMaskForGaps) + bool UseMaskedMemOp = UseMaskForCond || UseMaskForGaps; + if (UseMaskedMemOp) MemOpCost = getMaskedMemoryOpCost(Opcode, SingleMemOpTy, Alignment, AddressSpace, CostKind); else @@ -5183,9 +5288,8 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX512( unsigned VF = VecTy->getNumElements() / Factor; MVT VT = MVT::getVectorVT(MVT::getVT(VecTy->getScalarType()), VF); - // FIXME: this is the most conservative estimate for the mask cost. InstructionCost MaskCost; - if (UseMaskForCond || UseMaskForGaps) { + if (UseMaskedMemOp) { APInt DemandedLoadStoreElts = APInt::getZero(VecTy->getNumElements()); for (unsigned Index : Indices) { assert(Index < Factor && "Invalid index for interleaved memory op"); @@ -5193,10 +5297,10 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX512( DemandedLoadStoreElts.setBit(Index + Elm * Factor); } - Type *I8Type = Type::getInt8Ty(VecTy->getContext()); + Type *I1Type = Type::getInt1Ty(VecTy->getContext()); MaskCost = getReplicationShuffleCost( - I8Type, Factor, VF, + I1Type, Factor, VF, UseMaskForGaps ? DemandedLoadStoreElts : APInt::getAllOnes(VecTy->getNumElements()), CostKind); @@ -5207,7 +5311,7 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX512( // memory access, we need to account for the cost of And-ing the two masks // inside the loop. if (UseMaskForGaps) { - auto *MaskVT = FixedVectorType::get(I8Type, VecTy->getNumElements()); + auto *MaskVT = FixedVectorType::get(I1Type, VecTy->getNumElements()); MaskCost += getArithmeticInstrCost(BinaryOperator::And, MaskVT, CostKind); } } @@ -5248,9 +5352,10 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX512( NumOfLoadsInInterleaveGrp; // About a half of the loads may be folded in shuffles when we have only - // one result. If we have more than one result, we do not fold loads at all. + // one result. If we have more than one result, or the loads are masked, + // we do not fold loads at all. unsigned NumOfUnfoldedLoads = - NumOfResults > 1 ? NumOfMemOps : NumOfMemOps / 2; + UseMaskedMemOp || NumOfResults > 1 ? NumOfMemOps : NumOfMemOps / 2; // Get a number of shuffle operations per result. unsigned NumOfShufflesPerResult = diff --git a/llvm/lib/Target/XCore/XCoreFrameLowering.cpp b/llvm/lib/Target/XCore/XCoreFrameLowering.cpp index 27ac6a4d1439..f2f89f4269ed 100644 --- a/llvm/lib/Target/XCore/XCoreFrameLowering.cpp +++ b/llvm/lib/Target/XCore/XCoreFrameLowering.cpp @@ -427,19 +427,19 @@ bool XCoreFrameLowering::spillCalleeSavedRegisters( if (MI != MBB.end() && !MI->isDebugInstr()) DL = MI->getDebugLoc(); - for (auto it = CSI.begin(); it != CSI.end(); ++it) { - unsigned Reg = it->getReg(); + for (const CalleeSavedInfo &I : CSI) { + unsigned Reg = I.getReg(); assert(Reg != XCore::LR && !(Reg == XCore::R10 && hasFP(*MF)) && "LR & FP are always handled in emitPrologue"); // Add the callee-saved register as live-in. It's killed at the spill. MBB.addLiveIn(Reg); const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.storeRegToStackSlot(MBB, MI, Reg, true, it->getFrameIdx(), RC, TRI); + TII.storeRegToStackSlot(MBB, MI, Reg, true, I.getFrameIdx(), RC, TRI); if (emitFrameMoves) { auto Store = MI; --Store; - XFI->getSpillLabels().push_back(std::make_pair(Store, *it)); + XFI->getSpillLabels().push_back(std::make_pair(Store, I)); } } return true; diff --git a/llvm/lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp b/llvm/lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp index b5dbdea98eea..71836133fae6 100644 --- a/llvm/lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp +++ b/llvm/lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp @@ -48,9 +48,7 @@ bool XCoreFTAOElim::runOnMachineFunction(MachineFunction &MF) { const XCoreInstrInfo &TII = *static_cast<const XCoreInstrInfo *>(MF.getSubtarget().getInstrInfo()); unsigned StackSize = MF.getFrameInfo().getStackSize(); - for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); MFI != E; - ++MFI) { - MachineBasicBlock &MBB = *MFI; + for (MachineBasicBlock &MBB : MF) { for (MachineBasicBlock::iterator MBBI = MBB.begin(), EE = MBB.end(); MBBI != EE; ++MBBI) { if (MBBI->getOpcode() == XCore::FRAME_TO_ARGS_OFFSET) { diff --git a/llvm/lib/Target/XCore/XCoreMCInstLower.cpp b/llvm/lib/Target/XCore/XCoreMCInstLower.cpp index cd28fa5cd144..6f5dcb291e6e 100644 --- a/llvm/lib/Target/XCore/XCoreMCInstLower.cpp +++ b/llvm/lib/Target/XCore/XCoreMCInstLower.cpp @@ -103,8 +103,7 @@ MCOperand XCoreMCInstLower::LowerOperand(const MachineOperand &MO, void XCoreMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { OutMI.setOpcode(MI->getOpcode()); - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); + for (const MachineOperand &MO : MI->operands()) { MCOperand MCOp = LowerOperand(MO); if (MCOp.isValid()) diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp index b2c2efed7db8..ba7589c2bf60 100644 --- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -25,6 +25,7 @@ #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" @@ -275,94 +276,64 @@ CleanupPointerRootUsers(GlobalVariable *GV, /// We just marked GV constant. Loop over all users of the global, cleaning up /// the obvious ones. This is largely just a quick scan over the use list to /// clean up the easy and obvious cruft. This returns true if it made a change. -static bool CleanupConstantGlobalUsers( - Value *V, Constant *Init, const DataLayout &DL, - function_ref<TargetLibraryInfo &(Function &)> GetTLI) { +static bool CleanupConstantGlobalUsers(GlobalVariable *GV, + const DataLayout &DL) { + Constant *Init = GV->getInitializer(); + SmallVector<User *, 8> WorkList(GV->users()); + SmallPtrSet<User *, 8> Visited; bool Changed = false; - // Note that we need to use a weak value handle for the worklist items. When - // we delete a constant array, we may also be holding pointer to one of its - // elements (or an element of one of its elements if we're dealing with an - // array of arrays) in the worklist. - SmallVector<WeakTrackingVH, 8> WorkList(V->users()); + + SmallVector<WeakTrackingVH> MaybeDeadInsts; + auto EraseFromParent = [&](Instruction *I) { + for (Value *Op : I->operands()) + if (auto *OpI = dyn_cast<Instruction>(Op)) + MaybeDeadInsts.push_back(OpI); + I->eraseFromParent(); + Changed = true; + }; while (!WorkList.empty()) { - Value *UV = WorkList.pop_back_val(); - if (!UV) + User *U = WorkList.pop_back_val(); + if (!Visited.insert(U).second) continue; - User *U = cast<User>(UV); + if (auto *BO = dyn_cast<BitCastOperator>(U)) + append_range(WorkList, BO->users()); + if (auto *ASC = dyn_cast<AddrSpaceCastOperator>(U)) + append_range(WorkList, ASC->users()); + else if (auto *GEP = dyn_cast<GEPOperator>(U)) + append_range(WorkList, GEP->users()); + else if (auto *LI = dyn_cast<LoadInst>(U)) { + // A load from zeroinitializer is always zeroinitializer, regardless of + // any applied offset. + if (Init->isNullValue()) { + LI->replaceAllUsesWith(Constant::getNullValue(LI->getType())); + EraseFromParent(LI); + continue; + } - if (LoadInst *LI = dyn_cast<LoadInst>(U)) { - if (Init) { - if (auto *Casted = - ConstantFoldLoadThroughBitcast(Init, LI->getType(), DL)) { - // Replace the load with the initializer. - LI->replaceAllUsesWith(Casted); - LI->eraseFromParent(); - Changed = true; + Value *PtrOp = LI->getPointerOperand(); + APInt Offset(DL.getIndexTypeSizeInBits(PtrOp->getType()), 0); + PtrOp = PtrOp->stripAndAccumulateConstantOffsets( + DL, Offset, /* AllowNonInbounds */ true); + if (PtrOp == GV) { + if (auto *Value = ConstantFoldLoadFromConst(Init, LI->getType(), + Offset, DL)) { + LI->replaceAllUsesWith(Value); + EraseFromParent(LI); } } } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) { // Store must be unreachable or storing Init into the global. - SI->eraseFromParent(); - Changed = true; - } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) { - if (CE->getOpcode() == Instruction::GetElementPtr) { - Constant *SubInit = nullptr; - if (Init) - SubInit = ConstantFoldLoadThroughGEPConstantExpr( - Init, CE, V->getType()->getPointerElementType(), DL); - Changed |= CleanupConstantGlobalUsers(CE, SubInit, DL, GetTLI); - } else if ((CE->getOpcode() == Instruction::BitCast && - CE->getType()->isPointerTy()) || - CE->getOpcode() == Instruction::AddrSpaceCast) { - // Pointer cast, delete any stores and memsets to the global. - Changed |= CleanupConstantGlobalUsers(CE, nullptr, DL, GetTLI); - } - - if (CE->use_empty()) { - CE->destroyConstant(); - Changed = true; - } - } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) { - // Do not transform "gepinst (gep constexpr (GV))" here, because forming - // "gepconstexpr (gep constexpr (GV))" will cause the two gep's to fold - // and will invalidate our notion of what Init is. - Constant *SubInit = nullptr; - if (!isa<ConstantExpr>(GEP->getOperand(0))) { - ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>( - ConstantFoldInstruction(GEP, DL, &GetTLI(*GEP->getFunction()))); - if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr) - SubInit = ConstantFoldLoadThroughGEPConstantExpr( - Init, CE, V->getType()->getPointerElementType(), DL); - - // If the initializer is an all-null value and we have an inbounds GEP, - // we already know what the result of any load from that GEP is. - // TODO: Handle splats. - if (Init && isa<ConstantAggregateZero>(Init) && GEP->isInBounds()) - SubInit = Constant::getNullValue(GEP->getResultElementType()); - } - Changed |= CleanupConstantGlobalUsers(GEP, SubInit, DL, GetTLI); - - if (GEP->use_empty()) { - GEP->eraseFromParent(); - Changed = true; - } + EraseFromParent(SI); } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U)) { // memset/cpy/mv - if (MI->getRawDest() == V) { - MI->eraseFromParent(); - Changed = true; - } - - } else if (Constant *C = dyn_cast<Constant>(U)) { - // If we have a chain of dead constantexprs or other things dangling from - // us, and if they are all dead, nuke them without remorse. - if (isSafeToDestroyConstant(C)) { - C->destroyConstant(); - CleanupConstantGlobalUsers(V, Init, DL, GetTLI); - return true; - } + if (getUnderlyingObject(MI->getRawDest()) == GV) + EraseFromParent(MI); } } + + Changed |= + RecursivelyDeleteTriviallyDeadInstructionsPermissive(MaybeDeadInsts); + GV->removeDeadConstantUsers(); return Changed; } @@ -889,7 +860,7 @@ static bool OptimizeAwayTrappingUsesOfLoads( Changed |= CleanupPointerRootUsers(GV, GetTLI); } else { Changed = true; - CleanupConstantGlobalUsers(GV, nullptr, DL, GetTLI); + CleanupConstantGlobalUsers(GV, DL); } if (GV->use_empty()) { LLVM_DEBUG(dbgs() << " *** GLOBAL NOW DEAD!\n"); @@ -1557,8 +1528,7 @@ processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS, } else { // Delete any stores we can find to the global. We may not be able to // make it completely dead though. - Changed = - CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, GetTLI); + Changed = CleanupConstantGlobalUsers(GV, DL); } // If the global is dead now, delete it. @@ -1583,7 +1553,7 @@ processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS, } // Clean up any obviously simplifiable users now. - Changed |= CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, GetTLI); + Changed |= CleanupConstantGlobalUsers(GV, DL); // If the global is dead now, just nuke it. if (GV->use_empty()) { @@ -1628,7 +1598,7 @@ processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS, GV->setInitializer(SOVConstant); // Clean up any obviously simplifiable users now. - CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, GetTLI); + CleanupConstantGlobalUsers(GV, DL); if (GV->use_empty()) { LLVM_DEBUG(dbgs() << " *** Substituting initializer allowed us to " diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index f342c35fa283..055ee6b50296 100644 --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -1885,6 +1885,7 @@ private: OMPRTL___kmpc_barrier_simple_generic); ExternalizationRAII ThreadId(OMPInfoCache, OMPRTL___kmpc_get_hardware_thread_id_in_block); + ExternalizationRAII WarpSize(OMPInfoCache, OMPRTL___kmpc_get_warp_size); registerAAs(IsModulePass); @@ -3727,12 +3728,37 @@ struct AAKernelInfoFunction : AAKernelInfo { CheckRWInst, *this, UsedAssumedInformationInCheckRWInst)) SPMDCompatibilityTracker.indicatePessimisticFixpoint(); + bool UsedAssumedInformationFromReachingKernels = false; if (!IsKernelEntry) { - updateReachingKernelEntries(A); updateParallelLevels(A); + bool AllReachingKernelsKnown = true; + updateReachingKernelEntries(A, AllReachingKernelsKnown); + UsedAssumedInformationFromReachingKernels = !AllReachingKernelsKnown; + if (!ParallelLevels.isValidState()) SPMDCompatibilityTracker.indicatePessimisticFixpoint(); + else if (!ReachingKernelEntries.isValidState()) + SPMDCompatibilityTracker.indicatePessimisticFixpoint(); + else if (!SPMDCompatibilityTracker.empty()) { + // Check if all reaching kernels agree on the mode as we can otherwise + // not guard instructions. We might not be sure about the mode so we + // we cannot fix the internal spmd-zation state either. + int SPMD = 0, Generic = 0; + for (auto *Kernel : ReachingKernelEntries) { + auto &CBAA = A.getAAFor<AAKernelInfo>( + *this, IRPosition::function(*Kernel), DepClassTy::OPTIONAL); + if (CBAA.SPMDCompatibilityTracker.isValidState() && + CBAA.SPMDCompatibilityTracker.isAssumed()) + ++SPMD; + else + ++Generic; + if (!CBAA.SPMDCompatibilityTracker.isAtFixpoint()) + UsedAssumedInformationFromReachingKernels = true; + } + if (SPMD != 0 && Generic != 0) + SPMDCompatibilityTracker.indicatePessimisticFixpoint(); + } } // Callback to check a call instruction. @@ -3779,7 +3805,8 @@ struct AAKernelInfoFunction : AAKernelInfo { // If we haven't used any assumed information for the SPMD state we can fix // it. if (!UsedAssumedInformationInCheckRWInst && - !UsedAssumedInformationInCheckCallInst && AllSPMDStatesWereFixed) + !UsedAssumedInformationInCheckCallInst && + !UsedAssumedInformationFromReachingKernels && AllSPMDStatesWereFixed) SPMDCompatibilityTracker.indicateOptimisticFixpoint(); return StateBefore == getState() ? ChangeStatus::UNCHANGED @@ -3788,7 +3815,8 @@ struct AAKernelInfoFunction : AAKernelInfo { private: /// Update info regarding reaching kernels. - void updateReachingKernelEntries(Attributor &A) { + void updateReachingKernelEntries(Attributor &A, + bool &AllReachingKernelsKnown) { auto PredCallSite = [&](AbstractCallSite ACS) { Function *Caller = ACS.getInstruction()->getFunction(); @@ -3808,10 +3836,9 @@ private: return true; }; - bool AllCallSitesKnown; if (!A.checkForAllCallSites(PredCallSite, *this, true /* RequireAllCallSites */, - AllCallSitesKnown)) + AllReachingKernelsKnown)) ReachingKernelEntries.indicatePessimisticFixpoint(); } diff --git a/llvm/lib/Transforms/IPO/PartialInlining.cpp b/llvm/lib/Transforms/IPO/PartialInlining.cpp index 7402e399a88a..2d717475ce7f 100644 --- a/llvm/lib/Transforms/IPO/PartialInlining.cpp +++ b/llvm/lib/Transforms/IPO/PartialInlining.cpp @@ -641,8 +641,7 @@ PartialInlinerImpl::computeOutliningInfo(Function &F) const { if (!CandidateFound) return std::unique_ptr<FunctionOutliningInfo>(); - // Do sanity check of the entries: threre should not - // be any successors (not in the entry set) other than + // There should not be any successors (not in the entry set) other than // {ReturnBlock, NonReturnBlock} assert(OutliningInfo->Entries[0] == &F.front() && "Function Entry must be the first in Entries vector"); diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp index a961c47a7501..b8fac9d47763 100644 --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -84,6 +84,7 @@ #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Utils/CallPromotionUtils.h" #include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/SampleProfileInference.h" #include "llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h" #include "llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h" #include <algorithm> @@ -173,6 +174,9 @@ static cl::opt<bool> cl::desc("Process functions in a top-down order " "defined by the profiled call graph when " "-sample-profile-top-down-load is on.")); +cl::opt<bool> + SortProfiledSCC("sort-profiled-scc-member", cl::init(true), cl::Hidden, + cl::desc("Sort profiled recursion by edge weights.")); static cl::opt<bool> ProfileSizeInline( "sample-profile-inline-size", cl::Hidden, cl::init(false), @@ -1648,6 +1652,19 @@ void SampleProfileLoader::generateMDProfMetadata(Function &F) { SmallVector<uint32_t, 4> Weights; uint32_t MaxWeight = 0; Instruction *MaxDestInst; + // Since profi treats multiple edges (multiway branches) as a single edge, + // we need to distribute the computed weight among the branches. We do + // this by evenly splitting the edge weight among destinations. + DenseMap<const BasicBlock *, uint64_t> EdgeMultiplicity; + std::vector<uint64_t> EdgeIndex; + if (SampleProfileUseProfi) { + EdgeIndex.resize(TI->getNumSuccessors()); + for (unsigned I = 0; I < TI->getNumSuccessors(); ++I) { + const BasicBlock *Succ = TI->getSuccessor(I); + EdgeIndex[I] = EdgeMultiplicity[Succ]; + EdgeMultiplicity[Succ]++; + } + } for (unsigned I = 0; I < TI->getNumSuccessors(); ++I) { BasicBlock *Succ = TI->getSuccessor(I); Edge E = std::make_pair(BB, Succ); @@ -1660,9 +1677,19 @@ void SampleProfileLoader::generateMDProfMetadata(Function &F) { LLVM_DEBUG(dbgs() << " (saturated due to uint32_t overflow)"); Weight = std::numeric_limits<uint32_t>::max(); } - // Weight is added by one to avoid propagation errors introduced by - // 0 weights. - Weights.push_back(static_cast<uint32_t>(Weight + 1)); + if (!SampleProfileUseProfi) { + // Weight is added by one to avoid propagation errors introduced by + // 0 weights. + Weights.push_back(static_cast<uint32_t>(Weight + 1)); + } else { + // Profi creates proper weights that do not require "+1" adjustments but + // we evenly split the weight among branches with the same destination. + uint64_t W = Weight / EdgeMultiplicity[Succ]; + // Rounding up, if needed, so that first branches are hotter. + if (EdgeIndex[I] < Weight % EdgeMultiplicity[Succ]) + W++; + Weights.push_back(static_cast<uint32_t>(W)); + } if (Weight != 0) { if (Weight > MaxWeight) { MaxWeight = Weight; @@ -1853,7 +1880,13 @@ SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) { std::unique_ptr<ProfiledCallGraph> ProfiledCG = buildProfiledCallGraph(*CG); scc_iterator<ProfiledCallGraph *> CGI = scc_begin(ProfiledCG.get()); while (!CGI.isAtEnd()) { - for (ProfiledCallGraphNode *Node : *CGI) { + auto Range = *CGI; + if (SortProfiledSCC) { + // Sort nodes in one SCC based on callsite hotness. + scc_member_iterator<ProfiledCallGraph *> SI(*CGI); + Range = *SI; + } + for (auto *Node : Range) { Function *F = SymbolMap.lookup(Node->Name); if (F && !F->isDeclaration() && F->hasFnAttribute("use-sample-profile")) FunctionOrderList.push_back(F); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 06c9bf650f37..dc55b5a31596 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -1727,16 +1727,18 @@ static Instruction *foldComplexAndOrPatterns(BinaryOperator &I, (Opcode == Instruction::And) ? Instruction::Or : Instruction::And; Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - Value *A, *B, *C; + Value *A, *B, *C, *X, *Y; // (~(A | B) & C) | ... --> ... // (~(A & B) | C) & ... --> ... // TODO: One use checks are conservative. We just need to check that a total // number of multiple used values does not exceed reduction // in operations. - if (match(Op0, m_c_BinOp(FlippedOpcode, - m_Not(m_BinOp(Opcode, m_Value(A), m_Value(B))), - m_Value(C)))) { + if (match(Op0, + m_c_BinOp(FlippedOpcode, + m_CombineAnd(m_Value(X), m_Not(m_BinOp(Opcode, m_Value(A), + m_Value(B)))), + m_Value(C)))) { // (~(A | B) & C) | (~(A | C) & B) --> (B ^ C) & ~A // (~(A & B) | C) & (~(A & C) | B) --> ~((B ^ C) & A) if (match(Op1, @@ -1776,6 +1778,21 @@ static Instruction *foldComplexAndOrPatterns(BinaryOperator &I, m_c_BinOp(Opcode, m_Specific(B), m_Specific(C))))))) return BinaryOperator::CreateNot(Builder.CreateBinOp( Opcode, Builder.CreateBinOp(FlippedOpcode, A, C), B)); + + // (~(A | B) & C) | ~(C | (A ^ B)) --> ~((A | B) & (C | (A ^ B))) + // Note, the pattern with swapped and/or is not handled because the + // result is more undefined than a source: + // (~(A & B) | C) & ~(C & (A ^ B)) --> (A ^ B ^ C) | ~(A | C) is invalid. + if (Opcode == Instruction::Or && Op0->hasOneUse() && + match(Op1, m_OneUse(m_Not(m_CombineAnd( + m_Value(Y), + m_c_BinOp(Opcode, m_Specific(C), + m_c_Xor(m_Specific(A), m_Specific(B)))))))) { + // X = ~(A | B) + // Y = (C | (A ^ B) + Value *Or = cast<BinaryOperator>(X)->getOperand(0); + return BinaryOperator::CreateNot(Builder.CreateAnd(Or, Y)); + } } return nullptr; @@ -2061,7 +2078,14 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) { if (Instruction *CastedAnd = foldCastedBitwiseLogic(I)) return CastedAnd; + if (Instruction *Sel = foldBinopOfSextBoolToSelect(I)) + return Sel; + // and(sext(A), B) / and(B, sext(A)) --> A ? B : 0, where A is i1 or <N x i1>. + // TODO: Move this into foldBinopOfSextBoolToSelect as a more generalized fold + // with binop identity constant. But creating a select with non-constant + // arm may not be reversible due to poison semantics. Is that a good + // canonicalization? Value *A; if (match(Op0, m_OneUse(m_SExt(m_Value(A)))) && A->getType()->isIntOrIntVectorTy(1)) @@ -2322,11 +2346,20 @@ Value *InstCombinerImpl::getSelectCondition(Value *A, Value *B) { Value *Cond; Value *NotB; if (match(A, m_SExt(m_Value(Cond))) && - Cond->getType()->isIntOrIntVectorTy(1) && - match(B, m_OneUse(m_Not(m_Value(NotB))))) { - NotB = peekThroughBitcast(NotB, true); - if (match(NotB, m_SExt(m_Specific(Cond)))) + Cond->getType()->isIntOrIntVectorTy(1)) { + // A = sext i1 Cond; B = sext (not (i1 Cond)) + if (match(B, m_SExt(m_Not(m_Specific(Cond))))) return Cond; + + // A = sext i1 Cond; B = not ({bitcast} (sext (i1 Cond))) + // TODO: The one-use checks are unnecessary or misplaced. If the caller + // checked for uses on logic ops/casts, that should be enough to + // make this transform worthwhile. + if (match(B, m_OneUse(m_Not(m_Value(NotB))))) { + NotB = peekThroughBitcast(NotB, true); + if (match(NotB, m_SExt(m_Specific(Cond)))) + return Cond; + } } // All scalar (and most vector) possibilities should be handled now. @@ -2569,7 +2602,8 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) { return replaceInstUsesWith(I, V); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - if (I.getType()->isIntOrIntVectorTy(1)) { + Type *Ty = I.getType(); + if (Ty->isIntOrIntVectorTy(1)) { if (auto *SI0 = dyn_cast<SelectInst>(Op0)) { if (auto *I = foldAndOrOfSelectUsingImpliedCond(Op1, *SI0, /* IsAnd */ false)) @@ -2602,7 +2636,16 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) { // (X ^ C) | Y -> (X | Y) ^ C iff Y & C == 0 // The check for a 'not' op is for efficiency (if Y is known zero --> ~X). Value *Or = Builder.CreateOr(X, Y); - return BinaryOperator::CreateXor(Or, ConstantInt::get(I.getType(), *CV)); + return BinaryOperator::CreateXor(Or, ConstantInt::get(Ty, *CV)); + } + + // If the operands have no common bits set: + // or (mul X, Y), X --> add (mul X, Y), X --> mul X, (Y + 1) + if (match(&I, + m_c_Or(m_OneUse(m_Mul(m_Value(X), m_Value(Y))), m_Deferred(X))) && + haveNoCommonBitsSet(Op0, Op1, DL)) { + Value *IncrementY = Builder.CreateAdd(Y, ConstantInt::get(Ty, 1)); + return BinaryOperator::CreateMul(X, IncrementY); } // (A & C) | (B & D) @@ -2635,14 +2678,14 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) { // iff (C0 & C1) == 0 and (X & ~C0) == 0 if (match(A, m_c_Or(m_Value(X), m_Specific(B))) && MaskedValueIsZero(X, ~*C0, 0, &I)) { - Constant *C01 = ConstantInt::get(I.getType(), *C0 | *C1); + Constant *C01 = ConstantInt::get(Ty, *C0 | *C1); return BinaryOperator::CreateAnd(A, C01); } // (A & C0) | ((X | A) & C1) --> (X | A) & (C0 | C1) // iff (C0 & C1) == 0 and (X & ~C1) == 0 if (match(B, m_c_Or(m_Value(X), m_Specific(A))) && MaskedValueIsZero(X, ~*C1, 0, &I)) { - Constant *C01 = ConstantInt::get(I.getType(), *C0 | *C1); + Constant *C01 = ConstantInt::get(Ty, *C0 | *C1); return BinaryOperator::CreateAnd(B, C01); } // ((X | C2) & C0) | ((X | C3) & C1) --> (X | C2 | C3) & (C0 | C1) @@ -2652,7 +2695,7 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) { match(B, m_Or(m_Specific(X), m_APInt(C3))) && (*C2 & ~*C0).isZero() && (*C3 & ~*C1).isZero()) { Value *Or = Builder.CreateOr(X, *C2 | *C3, "bitfield"); - Constant *C01 = ConstantInt::get(I.getType(), *C0 | *C1); + Constant *C01 = ConstantInt::get(Ty, *C0 | *C1); return BinaryOperator::CreateAnd(Or, C01); } } @@ -2788,13 +2831,20 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) { if (Instruction *CastedOr = foldCastedBitwiseLogic(I)) return CastedOr; + if (Instruction *Sel = foldBinopOfSextBoolToSelect(I)) + return Sel; + // or(sext(A), B) / or(B, sext(A)) --> A ? -1 : B, where A is i1 or <N x i1>. + // TODO: Move this into foldBinopOfSextBoolToSelect as a more generalized fold + // with binop identity constant. But creating a select with non-constant + // arm may not be reversible due to poison semantics. Is that a good + // canonicalization? if (match(Op0, m_OneUse(m_SExt(m_Value(A)))) && A->getType()->isIntOrIntVectorTy(1)) - return SelectInst::Create(A, ConstantInt::getSigned(I.getType(), -1), Op1); + return SelectInst::Create(A, ConstantInt::getAllOnesValue(Ty), Op1); if (match(Op1, m_OneUse(m_SExt(m_Value(A)))) && A->getType()->isIntOrIntVectorTy(1)) - return SelectInst::Create(A, ConstantInt::getSigned(I.getType(), -1), Op0); + return SelectInst::Create(A, ConstantInt::getAllOnesValue(Ty), Op0); // Note: If we've gotten to the point of visiting the outer OR, then the // inner one couldn't be simplified. If it was a constant, then it won't @@ -2826,7 +2876,6 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) { // or(ashr(subNSW(Y, X), ScalarSizeInBits(Y) - 1), X) --> X s> Y ? -1 : X. { Value *X, *Y; - Type *Ty = I.getType(); if (match(&I, m_c_Or(m_OneUse(m_AShr( m_NSWSub(m_Value(Y), m_Value(X)), m_SpecificInt(Ty->getScalarSizeInBits() - 1))), @@ -2876,7 +2925,6 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) { if (match(&I, m_c_Or(m_Add(m_Shl(m_One(), m_Value(X)), m_AllOnes()), m_Shl(m_One(), m_Deferred(X)))) && match(&I, m_c_Or(m_OneUse(m_Value()), m_Value()))) { - Type *Ty = X->getType(); Value *Sub = Builder.CreateSub( ConstantInt::get(Ty, Ty->getScalarSizeInBits() - 1), X); return BinaryOperator::CreateLShr(Constant::getAllOnesValue(Ty), Sub); @@ -3601,6 +3649,14 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) { if (match(&I, m_c_Xor(m_c_And(m_Not(m_Value(A)), m_Value(B)), m_Deferred(A)))) return BinaryOperator::CreateOr(A, B); + // (~A | B) ^ A --> ~(A & B) + if (match(Op0, m_OneUse(m_c_Or(m_Not(m_Specific(Op1)), m_Value(B))))) + return BinaryOperator::CreateNot(Builder.CreateAnd(Op1, B)); + + // A ^ (~A | B) --> ~(A & B) + if (match(Op1, m_OneUse(m_c_Or(m_Not(m_Specific(Op0)), m_Value(B))))) + return BinaryOperator::CreateNot(Builder.CreateAnd(Op0, B)); + // (A | B) ^ (A | C) --> (B ^ C) & ~A -- There are 4 commuted variants. // TODO: Loosen one-use restriction if common operand is a constant. Value *D; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index bfa7bfa2290a..7da2669e1d13 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -2641,7 +2641,7 @@ Instruction *InstCombinerImpl::visitCallBase(CallBase &Call) { ArgNo++; } - assert(ArgNo == Call.arg_size() && "sanity check"); + assert(ArgNo == Call.arg_size() && "Call arguments not processed correctly."); if (!ArgNos.empty()) { AttributeList AS = Call.getAttributes(); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp index ca87477c5d81..33f217659c01 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -2771,7 +2771,7 @@ Instruction *InstCombinerImpl::visitBitCast(BitCastInst &CI) { if (match(Src, m_OneUse(m_InsertElt(m_OneUse(m_BitCast(m_Value(X))), m_Value(Y), m_ConstantInt(IndexC)))) && DestTy->isIntegerTy() && X->getType() == DestTy && - isDesirableIntType(BitWidth)) { + Y->getType()->isIntegerTy() && isDesirableIntType(BitWidth)) { // Adjust for big endian - the LSBs are at the high index. if (DL.isBigEndian()) IndexC = SrcVTy->getNumElements() - 1 - IndexC; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 7a9e177f19da..ed53b88aed61 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -14,6 +14,7 @@ #include "llvm/ADT/APSInt.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/CmpInstAnalysis.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/TargetLibraryInfo.h" @@ -1894,23 +1895,6 @@ Instruction *InstCombinerImpl::foldICmpAndConstant(ICmpInst &Cmp, return new ICmpInst(NewPred, X, SubOne(cast<Constant>(Cmp.getOperand(1)))); } - // (X & C2) == 0 -> (trunc X) >= 0 - // (X & C2) != 0 -> (trunc X) < 0 - // iff C2 is a power of 2 and it masks the sign bit of a legal integer type. - const APInt *C2; - if (And->hasOneUse() && C.isZero() && match(Y, m_APInt(C2))) { - int32_t ExactLogBase2 = C2->exactLogBase2(); - if (ExactLogBase2 != -1 && DL.isLegalInteger(ExactLogBase2 + 1)) { - Type *NTy = IntegerType::get(Cmp.getContext(), ExactLogBase2 + 1); - if (auto *AndVTy = dyn_cast<VectorType>(And->getType())) - NTy = VectorType::get(NTy, AndVTy->getElementCount()); - Value *Trunc = Builder.CreateTrunc(X, NTy); - auto NewPred = - Pred == CmpInst::ICMP_EQ ? CmpInst::ICMP_SGE : CmpInst::ICMP_SLT; - return new ICmpInst(NewPred, Trunc, Constant::getNullValue(NTy)); - } - } - return nullptr; } @@ -2803,7 +2787,8 @@ bool InstCombinerImpl::matchThreeWayIntCompare(SelectInst *SI, Value *&LHS, PredB, cast<Constant>(RHS2)); if (!FlippedStrictness) return false; - assert(FlippedStrictness->first == ICmpInst::ICMP_SGE && "Sanity check"); + assert(FlippedStrictness->first == ICmpInst::ICMP_SGE && + "basic correctness failure"); RHS2 = FlippedStrictness->second; // And kind-of perform the result swap. std::swap(Less, Greater); @@ -4614,7 +4599,7 @@ Instruction *InstCombinerImpl::foldICmpEquality(ICmpInst &I) { static Instruction *foldICmpWithTrunc(ICmpInst &ICmp, InstCombiner::BuilderTy &Builder) { - const ICmpInst::Predicate Pred = ICmp.getPredicate(); + ICmpInst::Predicate Pred = ICmp.getPredicate(); Value *Op0 = ICmp.getOperand(0), *Op1 = ICmp.getOperand(1); // Try to canonicalize trunc + compare-to-constant into a mask + cmp. @@ -4624,41 +4609,31 @@ static Instruction *foldICmpWithTrunc(ICmpInst &ICmp, if (!match(Op0, m_OneUse(m_Trunc(m_Value(X)))) || !match(Op1, m_APInt(C))) return nullptr; + // This matches patterns corresponding to tests of the signbit as well as: + // (trunc X) u< C --> (X & -C) == 0 (are all masked-high-bits clear?) + // (trunc X) u> C --> (X & ~C) != 0 (are any masked-high-bits set?) + APInt Mask; + if (decomposeBitTestICmp(Op0, Op1, Pred, X, Mask, true /* WithTrunc */)) { + Value *And = Builder.CreateAnd(X, Mask); + Constant *Zero = ConstantInt::getNullValue(X->getType()); + return new ICmpInst(Pred, And, Zero); + } + unsigned SrcBits = X->getType()->getScalarSizeInBits(); - if (Pred == ICmpInst::ICMP_ULT) { - if (C->isPowerOf2()) { - // If C is a power-of-2 (one set bit): - // (trunc X) u< C --> (X & -C) == 0 (are all masked-high-bits clear?) - Constant *MaskC = ConstantInt::get(X->getType(), (-*C).zext(SrcBits)); - Value *And = Builder.CreateAnd(X, MaskC); - Constant *Zero = ConstantInt::getNullValue(X->getType()); - return new ICmpInst(ICmpInst::ICMP_EQ, And, Zero); - } + if (Pred == ICmpInst::ICMP_ULT && C->isNegatedPowerOf2()) { // If C is a negative power-of-2 (high-bit mask): // (trunc X) u< C --> (X & C) != C (are any masked-high-bits clear?) - if (C->isNegatedPowerOf2()) { - Constant *MaskC = ConstantInt::get(X->getType(), C->zext(SrcBits)); - Value *And = Builder.CreateAnd(X, MaskC); - return new ICmpInst(ICmpInst::ICMP_NE, And, MaskC); - } + Constant *MaskC = ConstantInt::get(X->getType(), C->zext(SrcBits)); + Value *And = Builder.CreateAnd(X, MaskC); + return new ICmpInst(ICmpInst::ICMP_NE, And, MaskC); } - if (Pred == ICmpInst::ICMP_UGT) { - // If C is a low-bit-mask (C+1 is a power-of-2): - // (trunc X) u> C --> (X & ~C) != 0 (are any masked-high-bits set?) - if (C->isMask()) { - Constant *MaskC = ConstantInt::get(X->getType(), (~*C).zext(SrcBits)); - Value *And = Builder.CreateAnd(X, MaskC); - Constant *Zero = ConstantInt::getNullValue(X->getType()); - return new ICmpInst(ICmpInst::ICMP_NE, And, Zero); - } + if (Pred == ICmpInst::ICMP_UGT && (~*C).isPowerOf2()) { // If C is not-of-power-of-2 (one clear bit): // (trunc X) u> C --> (X & (C+1)) == C+1 (are all masked-high-bits set?) - if ((~*C).isPowerOf2()) { - Constant *MaskC = ConstantInt::get(X->getType(), (*C + 1).zext(SrcBits)); - Value *And = Builder.CreateAnd(X, MaskC); - return new ICmpInst(ICmpInst::ICMP_EQ, And, MaskC); - } + Constant *MaskC = ConstantInt::get(X->getType(), (*C + 1).zext(SrcBits)); + Value *And = Builder.CreateAnd(X, MaskC); + return new ICmpInst(ICmpInst::ICMP_EQ, And, MaskC); } return nullptr; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index 72e1b21e8d49..20c75188ec9f 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -319,6 +319,7 @@ private: Instruction *scalarizePHI(ExtractElementInst &EI, PHINode *PN); Instruction *foldBitcastExtElt(ExtractElementInst &ExtElt); Instruction *foldCastedBitwiseLogic(BinaryOperator &I); + Instruction *foldBinopOfSextBoolToSelect(BinaryOperator &I); Instruction *narrowBinOp(TruncInst &Trunc); Instruction *narrowMaskedBinOp(BinaryOperator &And); Instruction *narrowMathIfNoOverflow(BinaryOperator &I); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp b/llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp index 7dc516c6fdc3..42ba4a34a5a9 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp @@ -403,7 +403,7 @@ LLVM_NODISCARD Value *Negator::visitImpl(Value *V, unsigned Depth) { NonNegatedOps.emplace_back(Op); // Just record which operand that was. } assert((NegatedOps.size() + NonNegatedOps.size()) == 2 && - "Internal consistency sanity check."); + "Internal consistency check failed."); // Did we manage to sink negation into both of the operands? if (NegatedOps.size() == 2) // Then we get to keep the `add`! return Builder.CreateAdd(NegatedOps[0], NegatedOps[1], diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index 4a1e82ae9c1d..518d3952dce5 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -246,12 +246,16 @@ static Value *foldSelectICmpAnd(SelectInst &Sel, ICmpInst *Cmp, static unsigned getSelectFoldableOperands(BinaryOperator *I) { switch (I->getOpcode()) { case Instruction::Add: + case Instruction::FAdd: case Instruction::Mul: + case Instruction::FMul: case Instruction::And: case Instruction::Or: case Instruction::Xor: return 3; // Can fold through either operand. case Instruction::Sub: // Can only fold on the amount subtracted. + case Instruction::FSub: + case Instruction::FDiv: // Can only fold on the divisor amount. case Instruction::Shl: // Can only fold on the shift amount. case Instruction::LShr: case Instruction::AShr: diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 47b6dcb67a78..1f81624f79e7 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -967,6 +967,29 @@ Value *InstCombinerImpl::dyn_castNegVal(Value *V) const { return nullptr; } +/// A binop with a constant operand and a sign-extended boolean operand may be +/// converted into a select of constants by applying the binary operation to +/// the constant with the two possible values of the extended boolean (0 or -1). +Instruction *InstCombinerImpl::foldBinopOfSextBoolToSelect(BinaryOperator &BO) { + // TODO: Handle non-commutative binop (constant is operand 0). + // TODO: Handle zext. + // TODO: Peek through 'not' of cast. + Value *BO0 = BO.getOperand(0); + Value *BO1 = BO.getOperand(1); + Value *X; + Constant *C; + if (!match(BO0, m_SExt(m_Value(X))) || !match(BO1, m_ImmConstant(C)) || + !X->getType()->isIntOrIntVectorTy(1)) + return nullptr; + + // bo (sext i1 X), C --> select X, (bo -1, C), (bo 0, C) + Constant *Ones = ConstantInt::getAllOnesValue(BO.getType()); + Constant *Zero = ConstantInt::getNullValue(BO.getType()); + Constant *TVal = ConstantExpr::get(BO.getOpcode(), Ones, C); + Constant *FVal = ConstantExpr::get(BO.getOpcode(), Zero, C); + return SelectInst::Create(X, TVal, FVal); +} + static Value *foldOperationIntoSelectOperand(Instruction &I, Value *SO, InstCombiner::BuilderTy &Builder) { if (auto *Cast = dyn_cast<CastInst>(&I)) diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp index b56329ad76ae..bd2dc8d639fc 100644 --- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -6,7 +6,8 @@ // //===----------------------------------------------------------------------===// // -// This file is a part of AddressSanitizer, an address sanity checker. +// This file is a part of AddressSanitizer, an address basic correctness +// checker. // Details of the algorithm: // https://github.com/google/sanitizers/wiki/AddressSanitizerAlgorithm // diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp index 62c265e40dab..8d3bc1383e96 100644 --- a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp @@ -7,8 +7,8 @@ //===----------------------------------------------------------------------===// // /// \file -/// This file is a part of HWAddressSanitizer, an address sanity checker -/// based on tagged addressing. +/// This file is a part of HWAddressSanitizer, an address basic correctness +/// checker based on tagged addressing. //===----------------------------------------------------------------------===// #include "llvm/Transforms/Instrumentation/HWAddressSanitizer.h" diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp index 36a66e096382..d1d3b8ffdf7a 100644 --- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -64,10 +64,10 @@ cl::opt<bool> DoHashBasedCounterSplit( cl::desc("Rename counter variable of a comdat function based on cfg hash"), cl::init(true)); -cl::opt<bool> RuntimeCounterRelocation( - "runtime-counter-relocation", - cl::desc("Enable relocating counters at runtime."), - cl::init(false)); +cl::opt<bool> + RuntimeCounterRelocation("runtime-counter-relocation", + cl::desc("Enable relocating counters at runtime."), + cl::init(false)); cl::opt<bool> ValueProfileStaticAlloc( "vp-static-alloc", @@ -331,8 +331,9 @@ private: // Check whether the loop satisfies the basic conditions needed to perform // Counter Promotions. - bool isPromotionPossible(Loop *LP, - const SmallVectorImpl<BasicBlock *> &LoopExitBlocks) { + bool + isPromotionPossible(Loop *LP, + const SmallVectorImpl<BasicBlock *> &LoopExitBlocks) { // We can't insert into a catchswitch. if (llvm::any_of(LoopExitBlocks, [](BasicBlock *Exit) { return isa<CatchSwitchInst>(Exit->getTerminator()); @@ -421,13 +422,13 @@ PreservedAnalyses InstrProfiling::run(Module &M, ModuleAnalysisManager &AM) { } char InstrProfilingLegacyPass::ID = 0; -INITIALIZE_PASS_BEGIN( - InstrProfilingLegacyPass, "instrprof", - "Frontend instrumentation-based coverage lowering.", false, false) +INITIALIZE_PASS_BEGIN(InstrProfilingLegacyPass, "instrprof", + "Frontend instrumentation-based coverage lowering.", + false, false) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) -INITIALIZE_PASS_END( - InstrProfilingLegacyPass, "instrprof", - "Frontend instrumentation-based coverage lowering.", false, false) +INITIALIZE_PASS_END(InstrProfilingLegacyPass, "instrprof", + "Frontend instrumentation-based coverage lowering.", false, + false) ModulePass * llvm::createInstrProfilingLegacyPass(const InstrProfOptions &Options, @@ -634,13 +635,9 @@ void InstrProfiling::computeNumValueSiteCounts(InstrProfValueProfileInst *Ind) { GlobalVariable *Name = Ind->getName(); uint64_t ValueKind = Ind->getValueKind()->getZExtValue(); uint64_t Index = Ind->getIndex()->getZExtValue(); - auto It = ProfileDataMap.find(Name); - if (It == ProfileDataMap.end()) { - PerFunctionProfileData PD; - PD.NumValueSites[ValueKind] = Index + 1; - ProfileDataMap[Name] = PD; - } else if (It->second.NumValueSites[ValueKind] <= Index) - It->second.NumValueSites[ValueKind] = Index + 1; + auto &PD = ProfileDataMap[Name]; + PD.NumValueSites[ValueKind] = + std::max(PD.NumValueSites[ValueKind], (uint32_t)(Index + 1)); } void InstrProfiling::lowerValueProfileInst(InstrProfValueProfileInst *Ind) { @@ -703,14 +700,15 @@ void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) { LoadInst *LI = dyn_cast<LoadInst>(&I); if (!LI) { IRBuilder<> Builder(&I); - GlobalVariable *Bias = M->getGlobalVariable(getInstrProfCounterBiasVarName()); + GlobalVariable *Bias = + M->getGlobalVariable(getInstrProfCounterBiasVarName()); if (!Bias) { // Compiler must define this variable when runtime counter relocation // is being used. Runtime has a weak external reference that is used // to check whether that's the case or not. - Bias = new GlobalVariable(*M, Int64Ty, false, GlobalValue::LinkOnceODRLinkage, - Constant::getNullValue(Int64Ty), - getInstrProfCounterBiasVarName()); + Bias = new GlobalVariable( + *M, Int64Ty, false, GlobalValue::LinkOnceODRLinkage, + Constant::getNullValue(Int64Ty), getInstrProfCounterBiasVarName()); Bias->setVisibility(GlobalVariable::HiddenVisibility); // A definition that's weak (linkonce_odr) without being in a COMDAT // section wouldn't lead to link errors, but it would lead to a dead @@ -839,8 +837,7 @@ static bool needsRuntimeRegistrationOfSectionRange(const Triple &TT) { return false; // Use linker script magic to get data/cnts/name start/end. if (TT.isOSLinux() || TT.isOSFreeBSD() || TT.isOSNetBSD() || - TT.isOSSolaris() || TT.isOSFuchsia() || TT.isPS4CPU() || - TT.isOSWindows()) + TT.isOSSolaris() || TT.isOSFuchsia() || TT.isPS4CPU() || TT.isOSWindows()) return false; return true; @@ -849,13 +846,9 @@ static bool needsRuntimeRegistrationOfSectionRange(const Triple &TT) { GlobalVariable * InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) { GlobalVariable *NamePtr = Inc->getName(); - auto It = ProfileDataMap.find(NamePtr); - PerFunctionProfileData PD; - if (It != ProfileDataMap.end()) { - if (It->second.RegionCounters) - return It->second.RegionCounters; - PD = It->second; - } + auto &PD = ProfileDataMap[NamePtr]; + if (PD.RegionCounters) + return PD.RegionCounters; // Match the linkage and visibility of the name global. Function *Fn = Inc->getParent()->getParent(); @@ -922,6 +915,7 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) { CounterPtr->setAlignment(Align(8)); MaybeSetComdat(CounterPtr); CounterPtr->setLinkage(Linkage); + PD.RegionCounters = CounterPtr; auto *Int8PtrTy = Type::getInt8PtrTy(Ctx); // Allocate statically the array of pointers to value profile nodes for @@ -1000,9 +994,7 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) { MaybeSetComdat(Data); Data->setLinkage(Linkage); - PD.RegionCounters = CounterPtr; PD.DataVar = Data; - ProfileDataMap[NamePtr] = PD; // Mark the data variable as used so that it isn't stripped out. CompilerUsedVars.push_back(Data); @@ -1013,7 +1005,7 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) { // Collect the referenced names to be used by emitNameData. ReferencedNames.push_back(NamePtr); - return CounterPtr; + return PD.RegionCounters; } void InstrProfiling::emitVNodes() { @@ -1078,8 +1070,8 @@ void InstrProfiling::emitNameData() { } auto &Ctx = M->getContext(); - auto *NamesVal = ConstantDataArray::getString( - Ctx, StringRef(CompressedNameStr), false); + auto *NamesVal = + ConstantDataArray::getString(Ctx, StringRef(CompressedNameStr), false); NamesVar = new GlobalVariable(*M, NamesVal->getType(), true, GlobalValue::PrivateLinkage, NamesVal, getInstrProfNamesVarName()); diff --git a/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp index f98e39d751f4..180012198c42 100644 --- a/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp @@ -110,7 +110,7 @@ namespace { /// the module. struct ThreadSanitizer { ThreadSanitizer() { - // Sanity check options and warn user. + // Check options and warn user. if (ClInstrumentReadBeforeWrite && ClCompoundReadBeforeWrite) { errs() << "warning: Option -tsan-compound-read-before-write has no effect " diff --git a/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.cpp b/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.cpp index 74e4eb07b219..4921209f041b 100644 --- a/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.cpp +++ b/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.cpp @@ -94,11 +94,9 @@ bool llvm::objcarc::CanUse(const Instruction *Inst, const Value *Ptr, return false; } else if (const auto *CS = dyn_cast<CallBase>(Inst)) { // For calls, just check the arguments (and not the callee operand). - for (auto OI = CS->arg_begin(), OE = CS->arg_end(); OI != OE; ++OI) { - const Value *Op = *OI; + for (const Value *Op : CS->args()) if (IsPotentialRetainableObjPtr(Op, *PA.getAA()) && PA.related(Ptr, Op)) return true; - } return false; } else if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) { // Special-case stores, because we don't care about the stored value, just diff --git a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp index ca9567dc7ac8..a3fd97079b1d 100644 --- a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -52,6 +52,11 @@ using namespace llvm; #define DEBUG_TYPE "correlated-value-propagation" +static cl::opt<bool> CanonicalizeICmpPredicatesToUnsigned( + "canonicalize-icmp-predicates-to-unsigned", cl::init(true), cl::Hidden, + cl::desc("Enables canonicalization of signed relational predicates to " + "unsigned (e.g. sgt => ugt)")); + STATISTIC(NumPhis, "Number of phis propagated"); STATISTIC(NumPhiCommon, "Number of phis deleted via common incoming value"); STATISTIC(NumSelects, "Number of selects propagated"); @@ -64,7 +69,8 @@ STATISTIC(NumSDivSRemsNarrowed, STATISTIC(NumSDivs, "Number of sdiv converted to udiv"); STATISTIC(NumUDivURemsNarrowed, "Number of udivs/urems whose width was decreased"); -STATISTIC(NumAShrs, "Number of ashr converted to lshr"); +STATISTIC(NumAShrsConverted, "Number of ashr converted to lshr"); +STATISTIC(NumAShrsRemoved, "Number of ashr removed"); STATISTIC(NumSRems, "Number of srem converted to urem"); STATISTIC(NumSExt, "Number of sext converted to zext"); STATISTIC(NumSICmps, "Number of signed icmp preds simplified to unsigned"); @@ -297,6 +303,9 @@ static bool processMemAccess(Instruction *I, LazyValueInfo *LVI) { } static bool processICmp(ICmpInst *Cmp, LazyValueInfo *LVI) { + if (!CanonicalizeICmpPredicatesToUnsigned) + return false; + // Only for signed relational comparisons of scalar integers. if (Cmp->getType()->isVectorTy() || !Cmp->getOperand(0)->getType()->isIntegerTy()) @@ -376,13 +385,7 @@ static bool processSwitch(SwitchInst *I, LazyValueInfo *LVI, // ConstantFoldTerminator() as the underlying SwitchInst can be changed. SwitchInstProfUpdateWrapper SI(*I); - APInt Low = - APInt::getSignedMaxValue(Cond->getType()->getScalarSizeInBits()); - APInt High = - APInt::getSignedMinValue(Cond->getType()->getScalarSizeInBits()); - - SwitchInst::CaseIt CI = SI->case_begin(); - for (auto CE = SI->case_end(); CI != CE;) { + for (auto CI = SI->case_begin(), CE = SI->case_end(); CI != CE;) { ConstantInt *Case = CI->getCaseValue(); LazyValueInfo::Tristate State = LVI->getPredicateAt(CmpInst::ICMP_EQ, Cond, Case, I, @@ -415,28 +418,9 @@ static bool processSwitch(SwitchInst *I, LazyValueInfo *LVI, break; } - // Get Lower/Upper bound from switch cases. - Low = APIntOps::smin(Case->getValue(), Low); - High = APIntOps::smax(Case->getValue(), High); - // Increment the case iterator since we didn't delete it. ++CI; } - - // Try to simplify default case as unreachable - if (CI == SI->case_end() && SI->getNumCases() != 0 && - !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg())) { - const ConstantRange SIRange = - LVI->getConstantRange(SI->getCondition(), SI); - - // If the numbered switch cases cover the entire range of the condition, - // then the default case is not reachable. - if (SIRange.getSignedMin() == Low && SIRange.getSignedMax() == High && - SI->getNumCases() == High - Low + 1) { - createUnreachableSwitchDefault(SI, &DTU); - Changed = true; - } - } } if (Changed) @@ -688,7 +672,7 @@ static bool processCallSite(CallBase &CB, LazyValueInfo *LVI) { ArgNo++; } - assert(ArgNo == CB.arg_size() && "sanity check"); + assert(ArgNo == CB.arg_size() && "Call arguments not processed correctly."); if (ArgNos.empty()) return Changed; @@ -954,10 +938,22 @@ static bool processAShr(BinaryOperator *SDI, LazyValueInfo *LVI) { if (SDI->getType()->isVectorTy()) return false; + ConstantRange LRange = LVI->getConstantRange(SDI->getOperand(0), SDI); + unsigned OrigWidth = SDI->getType()->getIntegerBitWidth(); + ConstantRange NegOneOrZero = + ConstantRange(APInt(OrigWidth, (uint64_t)-1, true), APInt(OrigWidth, 1)); + if (NegOneOrZero.contains(LRange)) { + // ashr of -1 or 0 never changes the value, so drop the whole instruction + ++NumAShrsRemoved; + SDI->replaceAllUsesWith(SDI->getOperand(0)); + SDI->eraseFromParent(); + return true; + } + if (!isNonNegative(SDI->getOperand(0), LVI, SDI)) return false; - ++NumAShrs; + ++NumAShrsConverted; auto *BO = BinaryOperator::CreateLShr(SDI->getOperand(0), SDI->getOperand(1), SDI->getName(), SDI); BO->setDebugLoc(SDI->getDebugLoc()); diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index a8ec8bb97970..e0d3a6accadd 100644 --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -159,52 +159,22 @@ static cl::opt<unsigned> MemorySSAPathCheckLimit( cl::desc("The maximum number of blocks to check when trying to prove that " "all paths to an exit go through a killing block (default = 50)")); +// This flags allows or disallows DSE to optimize MemorySSA during its +// traversal. Note that DSE optimizing MemorySSA may impact other passes +// downstream of the DSE invocation and can lead to issues not being +// reproducible in isolation (i.e. when MemorySSA is built from scratch). In +// those cases, the flag can be used to check if DSE's MemorySSA optimizations +// impact follow-up passes. +static cl::opt<bool> + OptimizeMemorySSA("dse-optimize-memoryssa", cl::init(true), cl::Hidden, + cl::desc("Allow DSE to optimize memory accesses.")); + //===----------------------------------------------------------------------===// // Helper functions //===----------------------------------------------------------------------===// using OverlapIntervalsTy = std::map<int64_t, int64_t>; using InstOverlapIntervalsTy = DenseMap<Instruction *, OverlapIntervalsTy>; -/// Does this instruction write some memory? This only returns true for things -/// that we can analyze with other helpers below. -static bool hasAnalyzableMemoryWrite(Instruction *I, - const TargetLibraryInfo &TLI) { - if (isa<StoreInst>(I)) - return true; - if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { - switch (II->getIntrinsicID()) { - default: - return false; - case Intrinsic::memset: - case Intrinsic::memmove: - case Intrinsic::memcpy: - case Intrinsic::memcpy_inline: - case Intrinsic::memcpy_element_unordered_atomic: - case Intrinsic::memmove_element_unordered_atomic: - case Intrinsic::memset_element_unordered_atomic: - case Intrinsic::init_trampoline: - case Intrinsic::lifetime_end: - case Intrinsic::masked_store: - return true; - } - } - if (auto *CB = dyn_cast<CallBase>(I)) { - LibFunc LF; - if (TLI.getLibFunc(*CB, LF) && TLI.has(LF)) { - switch (LF) { - case LibFunc_strcpy: - case LibFunc_strncpy: - case LibFunc_strcat: - case LibFunc_strncat: - return true; - default: - return false; - } - } - } - return false; -} - /// If the value of this instruction and the memory it writes to is unused, may /// we delete this instruction? static bool isRemovable(Instruction *I) { @@ -214,7 +184,7 @@ static bool isRemovable(Instruction *I) { if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { switch (II->getIntrinsicID()) { - default: llvm_unreachable("doesn't pass 'hasAnalyzableMemoryWrite' predicate"); + default: llvm_unreachable("Does not have LocForWrite"); case Intrinsic::lifetime_end: // Never remove dead lifetime_end's, e.g. because it is followed by a // free. @@ -296,6 +266,7 @@ enum OverwriteResult { OW_End, OW_PartialEarlierWithFullLater, OW_MaybePartial, + OW_None, OW_Unknown }; @@ -841,7 +812,7 @@ struct DSEState { /// Keep track of instructions (partly) overlapping with killing MemoryDefs per /// basic block. - DenseMap<BasicBlock *, InstOverlapIntervalsTy> IOLs; + MapVector<BasicBlock *, InstOverlapIntervalsTy> IOLs; // Class contains self-reference, make sure it's not copied/moved. DSEState(const DSEState &) = delete; @@ -889,6 +860,7 @@ struct DSEState { /// Return OW_MaybePartial if \p KillingI does not completely overwrite /// \p DeadI, but they both write to the same underlying object. In that /// case, use isPartialOverwrite to check if \p KillingI partially overwrites + /// \p DeadI. Returns 'OR_None' if \p KillingI is known to not overwrite the /// \p DeadI. Returns 'OW_Unknown' if nothing can be determined. OverwriteResult isOverwrite(const Instruction *KillingI, const Instruction *DeadI, @@ -951,8 +923,16 @@ struct DSEState { // If we can't resolve the same pointers to the same object, then we can't // analyze them at all. - if (DeadUndObj != KillingUndObj) + if (DeadUndObj != KillingUndObj) { + // Non aliasing stores to different objects don't overlap. Note that + // if the killing store is known to overwrite whole object (out of + // bounds access overwrites whole object as well) then it is assumed to + // completely overwrite any store to the same object even if they don't + // actually alias (see next check). + if (AAR == AliasResult::NoAlias) + return OW_None; return OW_Unknown; + } // If the KillingI store is to a recognizable object, get its size. uint64_t KillingUndObjSize = getPointerSize(KillingUndObj, DL, TLI, &F); @@ -1006,9 +986,8 @@ struct DSEState { return OW_MaybePartial; } - // Can reach here only if accesses are known not to overlap. There is no - // dedicated code to indicate no overlap so signal "unknown". - return OW_Unknown; + // Can reach here only if accesses are known not to overlap. + return OW_None; } bool isInvisibleToCallerAfterRet(const Value *V) { @@ -1304,6 +1283,15 @@ struct DSEState { Instruction *KillingI = KillingDef->getMemoryInst(); LLVM_DEBUG(dbgs() << " trying to get dominating access\n"); + // Only optimize defining access of KillingDef when directly starting at its + // defining access. The defining access also must only access KillingLoc. At + // the moment we only support instructions with a single write location, so + // it should be sufficient to disable optimizations for instructions that + // also read from memory. + bool CanOptimize = OptimizeMemorySSA && + KillingDef->getDefiningAccess() == StartAccess && + !KillingI->mayReadFromMemory(); + // Find the next clobbering Mod access for DefLoc, starting at StartAccess. Optional<MemoryLocation> CurrentLoc; for (;; Current = cast<MemoryDef>(Current)->getDefiningAccess()) { @@ -1345,8 +1333,10 @@ struct DSEState { Instruction *CurrentI = CurrentDef->getMemoryInst(); if (canSkipDef(CurrentDef, !isInvisibleToCallerBeforeRet(KillingUndObj), - TLI)) + TLI)) { + CanOptimize = false; continue; + } // Before we try to remove anything, check for any extra throwing // instructions that block us from DSEing @@ -1380,15 +1370,13 @@ struct DSEState { return None; } - // If Current cannot be analyzed or is not removable, check the next - // candidate. - if (!hasAnalyzableMemoryWrite(CurrentI, TLI) || !isRemovable(CurrentI)) - continue; - - // If Current does not have an analyzable write location, skip it + // If Current does not have an analyzable write location or is not + // removable, skip it. CurrentLoc = getLocForWriteEx(CurrentI); - if (!CurrentLoc) + if (!CurrentLoc || !isRemovable(CurrentI)) { + CanOptimize = false; continue; + } // AliasAnalysis does not account for loops. Limit elimination to // candidates for which we can guarantee they always store to the same @@ -1396,6 +1384,7 @@ struct DSEState { if (!isGuaranteedLoopIndependent(CurrentI, KillingI, *CurrentLoc)) { LLVM_DEBUG(dbgs() << " ... not guaranteed loop independent\n"); WalkerStepLimit -= 1; + CanOptimize = false; continue; } @@ -1403,16 +1392,32 @@ struct DSEState { // If the killing def is a memory terminator (e.g. lifetime.end), check // the next candidate if the current Current does not write the same // underlying object as the terminator. - if (!isMemTerminator(*CurrentLoc, CurrentI, KillingI)) + if (!isMemTerminator(*CurrentLoc, CurrentI, KillingI)) { + CanOptimize = false; continue; + } } else { int64_t KillingOffset = 0; int64_t DeadOffset = 0; auto OR = isOverwrite(KillingI, CurrentI, KillingLoc, *CurrentLoc, KillingOffset, DeadOffset); + if (CanOptimize) { + // CurrentDef is the earliest write clobber of KillingDef. Use it as + // optimized access. Do not optimize if CurrentDef is already the + // defining access of KillingDef. + if (CurrentDef != KillingDef->getDefiningAccess() && + (OR == OW_Complete || OR == OW_MaybePartial)) + KillingDef->setOptimized(CurrentDef); + + // Once a may-aliasing def is encountered do not set an optimized + // access. + if (OR != OW_None) + CanOptimize = false; + } + // If Current does not write to the same object as KillingDef, check // the next candidate. - if (OR == OW_Unknown) + if (OR == OW_Unknown || OR == OW_None) continue; else if (OR == OW_MaybePartial) { // If KillingDef only partially overwrites Current, check the next @@ -1421,6 +1426,7 @@ struct DSEState { // which are less likely to be removable in the end. if (PartialLimit <= 1) { WalkerStepLimit -= 1; + LLVM_DEBUG(dbgs() << " ... reached partial limit ... continue with next access\n"); continue; } PartialLimit -= 1; @@ -1922,7 +1928,14 @@ struct DSEState { if (SkipStores.contains(Def) || MSSA.isLiveOnEntryDef(Def) || !isRemovable(Def->getMemoryInst())) continue; - auto *UpperDef = dyn_cast<MemoryDef>(Def->getDefiningAccess()); + MemoryDef *UpperDef; + // To conserve compile-time, we avoid walking to the next clobbering def. + // Instead, we just try to get the optimized access, if it exists. DSE + // will try to optimize defs during the earlier traversal. + if (Def->isOptimized()) + UpperDef = dyn_cast<MemoryDef>(Def->getOptimized()); + else + UpperDef = dyn_cast<MemoryDef>(Def->getDefiningAccess()); if (!UpperDef || MSSA.isLiveOnEntryDef(UpperDef)) continue; diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp index ae2fe2767074..7001d330fce0 100644 --- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -1951,7 +1951,6 @@ bool IndVarSimplify::run(Loop *L) { // using it. if (!DisableLFTR) { BasicBlock *PreHeader = L->getLoopPreheader(); - BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator()); SmallVector<BasicBlock*, 16> ExitingBlocks; L->getExitingBlocks(ExitingBlocks); @@ -1987,7 +1986,7 @@ bool IndVarSimplify::run(Loop *L) { // Avoid high cost expansions. Note: This heuristic is questionable in // that our definition of "high cost" is not exactly principled. if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget, - TTI, PreHeaderBR)) + TTI, PreHeader->getTerminator())) continue; // Check preconditions for proper SCEVExpander operation. SCEV does not diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp index bf714d167670..6f97f3e93123 100644 --- a/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/llvm/lib/Transforms/Scalar/LICM.cpp @@ -486,7 +486,7 @@ bool LoopInvariantCodeMotion::runOnLoop( // Check that neither this loop nor its parent have had LCSSA broken. LICM is // specifically moving instructions across the loop boundary and so it is - // especially in need of sanity checking here. + // especially in need of basic functional correctness checking here. assert(L->isLCSSAForm(*DT) && "Loop not left in LCSSA form after LICM!"); assert((L->isOutermost() || L->getParentLoop()->isLCSSAForm(*DT)) && "Parent loop not left in LCSSA form after LICM!"); @@ -1860,6 +1860,7 @@ class LoopPromoter : public LoadAndStorePromoter { bool UnorderedAtomic; AAMDNodes AATags; ICFLoopSafetyInfo &SafetyInfo; + bool CanInsertStoresInExitBlocks; // We're about to add a use of V in a loop exit block. Insert an LCSSA phi // (if legal) if doing so would add an out-of-loop use to an instruction @@ -1886,12 +1887,13 @@ public: SmallVectorImpl<MemoryAccess *> &MSSAIP, PredIteratorCache &PIC, MemorySSAUpdater *MSSAU, LoopInfo &li, DebugLoc dl, Align Alignment, bool UnorderedAtomic, const AAMDNodes &AATags, - ICFLoopSafetyInfo &SafetyInfo) + ICFLoopSafetyInfo &SafetyInfo, bool CanInsertStoresInExitBlocks) : LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA), LoopExitBlocks(LEB), LoopInsertPts(LIP), MSSAInsertPts(MSSAIP), PredCache(PIC), MSSAU(MSSAU), LI(li), DL(std::move(dl)), Alignment(Alignment), UnorderedAtomic(UnorderedAtomic), AATags(AATags), - SafetyInfo(SafetyInfo) {} + SafetyInfo(SafetyInfo), + CanInsertStoresInExitBlocks(CanInsertStoresInExitBlocks) {} bool isInstInList(Instruction *I, const SmallVectorImpl<Instruction *> &) const override { @@ -1903,7 +1905,7 @@ public: return PointerMustAliases.count(Ptr); } - void doExtraRewritesBeforeFinalDeletion() override { + void insertStoresInLoopExitBlocks() { // Insert stores after in the loop exit blocks. Each exit block gets a // store of the live-out values that feed them. Since we've already told // the SSA updater about the defs in the loop and the preheader @@ -1937,10 +1939,21 @@ public: } } + void doExtraRewritesBeforeFinalDeletion() override { + if (CanInsertStoresInExitBlocks) + insertStoresInLoopExitBlocks(); + } + void instructionDeleted(Instruction *I) const override { SafetyInfo.removeInstruction(I); MSSAU->removeMemoryAccess(I); } + + bool shouldDelete(Instruction *I) const override { + if (isa<StoreInst>(I)) + return CanInsertStoresInExitBlocks; + return true; + } }; bool isNotCapturedBeforeOrInLoop(const Value *V, const Loop *L, @@ -2039,6 +2052,7 @@ bool llvm::promoteLoopAccessesToScalars( bool DereferenceableInPH = false; bool SafeToInsertStore = false; + bool FoundLoadToPromote = false; SmallVector<Instruction *, 64> LoopUses; @@ -2067,16 +2081,11 @@ bool llvm::promoteLoopAccessesToScalars( IsKnownThreadLocalObject = !isa<AllocaInst>(Object); } - // Check that all of the pointers in the alias set have the same type. We - // cannot (yet) promote a memory location that is loaded and stored in + // Check that all accesses to pointers in the aliass set use the same type. + // We cannot (yet) promote a memory location that is loaded and stored in // different sizes. While we are at it, collect alignment and AA info. + Type *AccessTy = nullptr; for (Value *ASIV : PointerMustAliases) { - // Check that all of the pointers in the alias set have the same type. We - // cannot (yet) promote a memory location that is loaded and stored in - // different sizes. - if (SomePtr->getType() != ASIV->getType()) - return false; - for (User *U : ASIV->users()) { // Ignore instructions that are outside the loop. Instruction *UI = dyn_cast<Instruction>(U); @@ -2091,6 +2100,7 @@ bool llvm::promoteLoopAccessesToScalars( SawUnorderedAtomic |= Load->isAtomic(); SawNotAtomic |= !Load->isAtomic(); + FoundLoadToPromote = true; Align InstAlignment = Load->getAlign(); @@ -2153,6 +2163,11 @@ bool llvm::promoteLoopAccessesToScalars( } else return false; // Not a load or store. + if (!AccessTy) + AccessTy = getLoadStoreType(UI); + else if (AccessTy != getLoadStoreType(UI)) + return false; + // Merge the AA tags. if (LoopUses.empty()) { // On the first load/store, just take its AA tags. @@ -2175,9 +2190,7 @@ bool llvm::promoteLoopAccessesToScalars( // If we're inserting an atomic load in the preheader, we must be able to // lower it. We're only guaranteed to be able to lower naturally aligned // atomics. - auto *SomePtrElemType = SomePtr->getType()->getPointerElementType(); - if (SawUnorderedAtomic && - Alignment < MDL.getTypeStoreSize(SomePtrElemType)) + if (SawUnorderedAtomic && Alignment < MDL.getTypeStoreSize(AccessTy)) return false; // If we couldn't prove we can hoist the load, bail. @@ -2199,13 +2212,20 @@ bool llvm::promoteLoopAccessesToScalars( } } - // If we've still failed to prove we can sink the store, give up. - if (!SafeToInsertStore) + // If we've still failed to prove we can sink the store, hoist the load + // only, if possible. + if (!SafeToInsertStore && !FoundLoadToPromote) + // If we cannot hoist the load either, give up. return false; - // Otherwise, this is safe to promote, lets do it! - LLVM_DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " << *SomePtr - << '\n'); + // Lets do the promotion! + if (SafeToInsertStore) + LLVM_DEBUG(dbgs() << "LICM: Promoting load/store of the value: " << *SomePtr + << '\n'); + else + LLVM_DEBUG(dbgs() << "LICM: Promoting load of the value: " << *SomePtr + << '\n'); + ORE->emit([&]() { return OptimizationRemark(DEBUG_TYPE, "PromoteLoopAccessesToScalar", LoopUses[0]) @@ -2224,13 +2244,14 @@ bool llvm::promoteLoopAccessesToScalars( SSAUpdater SSA(&NewPHIs); LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, MSSAU, *LI, DL, - Alignment, SawUnorderedAtomic, AATags, *SafetyInfo); + Alignment, SawUnorderedAtomic, AATags, *SafetyInfo, + SafeToInsertStore); // Set up the preheader to have a definition of the value. It is the live-out // value from the preheader that uses in the loop will use. LoadInst *PreheaderLoad = new LoadInst( - SomePtr->getType()->getPointerElementType(), SomePtr, - SomePtr->getName() + ".promoted", Preheader->getTerminator()); + AccessTy, SomePtr, SomePtr->getName() + ".promoted", + Preheader->getTerminator()); if (SawUnorderedAtomic) PreheaderLoad->setOrdering(AtomicOrdering::Unordered); PreheaderLoad->setAlignment(Alignment); diff --git a/llvm/lib/Transforms/Scalar/LoopPassManager.cpp b/llvm/lib/Transforms/Scalar/LoopPassManager.cpp index 3df4cfe8e4c1..6c783848432b 100644 --- a/llvm/lib/Transforms/Scalar/LoopPassManager.cpp +++ b/llvm/lib/Transforms/Scalar/LoopPassManager.cpp @@ -49,9 +49,17 @@ void PassManager<Loop, LoopAnalysisManager, LoopStandardAnalysisResults &, LPMUpdater &>::printPipeline(raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) { - for (unsigned Idx = 0, Size = LoopPasses.size(); Idx != Size; ++Idx) { - auto *P = LoopPasses[Idx].get(); - P->printPipeline(OS, MapClassName2PassName); + assert(LoopPasses.size() + LoopNestPasses.size() == IsLoopNestPass.size()); + + unsigned IdxLP = 0, IdxLNP = 0; + for (unsigned Idx = 0, Size = IsLoopNestPass.size(); Idx != Size; ++Idx) { + if (IsLoopNestPass[Idx]) { + auto *P = LoopNestPasses[IdxLNP++].get(); + P->printPipeline(OS, MapClassName2PassName); + } else { + auto *P = LoopPasses[IdxLP++].get(); + P->printPipeline(OS, MapClassName2PassName); + } if (Idx + 1 < Size) OS << ","; } diff --git a/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp b/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp index a87843d658a9..728d63fe2847 100644 --- a/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp +++ b/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp @@ -256,8 +256,8 @@ private: } } - // Sanity check: amount of dead and live loop blocks should match the total - // number of blocks in loop. + // Amount of dead and live loop blocks should match the total number of + // blocks in loop. assert(L.getNumBlocks() == LiveLoopBlocks.size() + DeadLoopBlocks.size() && "Malformed block sets?"); @@ -305,7 +305,6 @@ private: BlocksInLoopAfterFolding.insert(BB); } - // Sanity check: header must be in loop. assert(BlocksInLoopAfterFolding.count(L.getHeader()) && "Header not in loop?"); assert(BlocksInLoopAfterFolding.size() <= LiveLoopBlocks.size() && diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp index 67702520511b..39c8b65968aa 100644 --- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -806,28 +806,27 @@ static Optional<unsigned> shouldFullUnroll( ScalarEvolution &SE, const SmallPtrSetImpl<const Value *> &EphValues, const unsigned FullUnrollTripCount, const UnrollCostEstimator UCE, const TargetTransformInfo::UnrollingPreferences &UP) { + assert(FullUnrollTripCount && "should be non-zero!"); - if (FullUnrollTripCount && FullUnrollTripCount <= UP.FullUnrollMaxCount) { - // When computing the unrolled size, note that BEInsns are not replicated - // like the rest of the loop body. - if (UCE.getUnrolledLoopSize(UP) < UP.Threshold) { - return FullUnrollTripCount; + if (FullUnrollTripCount > UP.FullUnrollMaxCount) + return None; - } else { - // The loop isn't that small, but we still can fully unroll it if that - // helps to remove a significant number of instructions. - // To check that, run additional analysis on the loop. - if (Optional<EstimatedUnrollCost> Cost = analyzeLoopUnrollCost( - L, FullUnrollTripCount, DT, SE, EphValues, TTI, - UP.Threshold * UP.MaxPercentThresholdBoost / 100, - UP.MaxIterationsCountToAnalyze)) { - unsigned Boost = - getFullUnrollBoostingFactor(*Cost, UP.MaxPercentThresholdBoost); - if (Cost->UnrolledCost < UP.Threshold * Boost / 100) { - return FullUnrollTripCount; - } - } - } + // When computing the unrolled size, note that BEInsns are not replicated + // like the rest of the loop body. + if (UCE.getUnrolledLoopSize(UP) < UP.Threshold) + return FullUnrollTripCount; + + // The loop isn't that small, but we still can fully unroll it if that + // helps to remove a significant number of instructions. + // To check that, run additional analysis on the loop. + if (Optional<EstimatedUnrollCost> Cost = analyzeLoopUnrollCost( + L, FullUnrollTripCount, DT, SE, EphValues, TTI, + UP.Threshold * UP.MaxPercentThresholdBoost / 100, + UP.MaxIterationsCountToAnalyze)) { + unsigned Boost = + getFullUnrollBoostingFactor(*Cost, UP.MaxPercentThresholdBoost); + if (Cost->UnrolledCost < UP.Threshold * Boost / 100) + return FullUnrollTripCount; } return None; } @@ -837,51 +836,48 @@ shouldPartialUnroll(const unsigned LoopSize, const unsigned TripCount, const UnrollCostEstimator UCE, const TargetTransformInfo::UnrollingPreferences &UP) { + if (!TripCount) + return None; + + if (!UP.Partial) { + LLVM_DEBUG(dbgs() << " will not try to unroll partially because " + << "-unroll-allow-partial not given\n"); + return 0; + } unsigned count = UP.Count; - if (TripCount) { - if (!UP.Partial) { - LLVM_DEBUG(dbgs() << " will not try to unroll partially because " - << "-unroll-allow-partial not given\n"); - count = 0; - return count; - } - if (count == 0) - count = TripCount; - if (UP.PartialThreshold != NoThreshold) { - // Reduce unroll count to be modulo of TripCount for partial unrolling. - if (UCE.getUnrolledLoopSize(UP, count) > UP.PartialThreshold) - count = (std::max(UP.PartialThreshold, UP.BEInsns + 1) - UP.BEInsns) / - (LoopSize - UP.BEInsns); - if (count > UP.MaxCount) - count = UP.MaxCount; - while (count != 0 && TripCount % count != 0) - count--; - if (UP.AllowRemainder && count <= 1) { - // If there is no Count that is modulo of TripCount, set Count to - // largest power-of-two factor that satisfies the threshold limit. - // As we'll create fixup loop, do the type of unrolling only if - // remainder loop is allowed. - count = UP.DefaultUnrollRuntimeCount; - while (count != 0 && - UCE.getUnrolledLoopSize(UP, count) > UP.PartialThreshold) - count >>= 1; - } - if (count < 2) { - count = 0; - } - } else { - count = TripCount; - } + if (count == 0) + count = TripCount; + if (UP.PartialThreshold != NoThreshold) { + // Reduce unroll count to be modulo of TripCount for partial unrolling. + if (UCE.getUnrolledLoopSize(UP, count) > UP.PartialThreshold) + count = (std::max(UP.PartialThreshold, UP.BEInsns + 1) - UP.BEInsns) / + (LoopSize - UP.BEInsns); if (count > UP.MaxCount) count = UP.MaxCount; - - LLVM_DEBUG(dbgs() << " partially unrolling with count: " << count << "\n"); - - return count; + while (count != 0 && TripCount % count != 0) + count--; + if (UP.AllowRemainder && count <= 1) { + // If there is no Count that is modulo of TripCount, set Count to + // largest power-of-two factor that satisfies the threshold limit. + // As we'll create fixup loop, do the type of unrolling only if + // remainder loop is allowed. + count = UP.DefaultUnrollRuntimeCount; + while (count != 0 && + UCE.getUnrolledLoopSize(UP, count) > UP.PartialThreshold) + count >>= 1; + } + if (count < 2) { + count = 0; + } + } else { + count = TripCount; } + if (count > UP.MaxCount) + count = UP.MaxCount; - // if didn't return until here, should continue to other priorties - return None; + LLVM_DEBUG(dbgs() << " partially unrolling with count: " << count << "\n"); + + return count; } // Returns true if unroll count was set explicitly. // Calculates unroll count and writes it to UP.Count. @@ -900,7 +896,6 @@ bool llvm::computeUnrollCount( TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound) { UnrollCostEstimator UCE(*L, LoopSize); - Optional<unsigned> UnrollFactor; const bool UserUnrollCount = UnrollCount.getNumOccurrences() > 0; const bool PragmaFullUnroll = hasUnrollFullPragma(L); @@ -926,9 +921,8 @@ bool llvm::computeUnrollCount( // Check for explicit Count. // 1st priority is unroll count set by "unroll-count" option. // 2nd priority is unroll count set by pragma. - UnrollFactor = shouldPragmaUnroll(L, PInfo, TripMultiple, TripCount, UCE, UP); - - if (UnrollFactor) { + if (auto UnrollFactor = shouldPragmaUnroll(L, PInfo, TripMultiple, TripCount, + UCE, UP)) { UP.Count = *UnrollFactor; if (UserUnrollCount || (PragmaCount > 0)) { @@ -948,11 +942,20 @@ bool llvm::computeUnrollCount( } } - // 3rd priority is full unroll count. - // Full unroll makes sense only when TripCount or its upper bound could be - // statically calculated. - // Also we need to check if we exceed FullUnrollMaxCount. + // 3rd priority is exact full unrolling. This will eliminate all copies + // of some exit test. + UP.Count = 0; + if (TripCount) { + UP.Count = TripCount; + if (auto UnrollFactor = shouldFullUnroll(L, TTI, DT, SE, EphValues, + TripCount, UCE, UP)) { + UP.Count = *UnrollFactor; + UseUpperBound = false; + return ExplicitUnroll; + } + } + // 4th priority is bounded unrolling. // We can unroll by the upper bound amount if it's generally allowed or if // we know that the loop is executed either the upper bound or zero times. // (MaxOrZero unrolling keeps only the first loop test, so the number of @@ -961,37 +964,21 @@ bool llvm::computeUnrollCount( // number of loop tests goes up which may end up being worse on targets with // constrained branch predictor resources so is controlled by an option.) // In addition we only unroll small upper bounds. - unsigned FullUnrollMaxTripCount = MaxTripCount; - if (!(UP.UpperBound || MaxOrZero) || - FullUnrollMaxTripCount > UnrollMaxUpperBound) - FullUnrollMaxTripCount = 0; - - // UnrollByMaxCount and ExactTripCount cannot both be non zero since we only - // compute the former when the latter is zero. - unsigned ExactTripCount = TripCount; - assert((ExactTripCount == 0 || FullUnrollMaxTripCount == 0) && - "ExtractTripCount and UnrollByMaxCount cannot both be non zero."); - - unsigned FullUnrollTripCount = - ExactTripCount ? ExactTripCount : FullUnrollMaxTripCount; - UP.Count = FullUnrollTripCount; - - UnrollFactor = - shouldFullUnroll(L, TTI, DT, SE, EphValues, FullUnrollTripCount, UCE, UP); - - // if shouldFullUnroll can do the unrolling, some side parameteres should be - // set - if (UnrollFactor) { - UP.Count = *UnrollFactor; - UseUpperBound = (FullUnrollMaxTripCount == FullUnrollTripCount); - TripCount = FullUnrollTripCount; - TripMultiple = UP.UpperBound ? 1 : TripMultiple; - return ExplicitUnroll; - } else { - UP.Count = FullUnrollTripCount; + // Note that the cost of bounded unrolling is always strictly greater than + // cost of exact full unrolling. As such, if we have an exact count and + // found it unprofitable, we'll never chose to bounded unroll. + if (!TripCount && MaxTripCount && (UP.UpperBound || MaxOrZero) && + MaxTripCount <= UnrollMaxUpperBound) { + UP.Count = MaxTripCount; + if (auto UnrollFactor = shouldFullUnroll(L, TTI, DT, SE, EphValues, + MaxTripCount, UCE, UP)) { + UP.Count = *UnrollFactor; + UseUpperBound = true; + return ExplicitUnroll; + } } - // 4th priority is loop peeling. + // 5th priority is loop peeling. computePeelCount(L, LoopSize, PP, TripCount, DT, SE, UP.Threshold); if (PP.PeelCount) { UP.Runtime = false; @@ -1004,11 +991,9 @@ bool llvm::computeUnrollCount( if (TripCount) UP.Partial |= ExplicitUnroll; - // 5th priority is partial unrolling. + // 6th priority is partial unrolling. // Try partial unroll only when TripCount could be statically calculated. - UnrollFactor = shouldPartialUnroll(LoopSize, TripCount, UCE, UP); - - if (UnrollFactor) { + if (auto UnrollFactor = shouldPartialUnroll(LoopSize, TripCount, UCE, UP)) { UP.Count = *UnrollFactor; if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount && @@ -1049,7 +1034,7 @@ bool llvm::computeUnrollCount( "because loop has a runtime trip count."; }); - // 6th priority is runtime unrolling. + // 7th priority is runtime unrolling. // Don't unroll a runtime trip count loop when it is disabled. if (hasRuntimeUnrollDisablePragma(L)) { UP.Count = 0; diff --git a/llvm/lib/Transforms/Scalar/Reassociate.cpp b/llvm/lib/Transforms/Scalar/Reassociate.cpp index b0fb8daaba8f..c354fa177a60 100644 --- a/llvm/lib/Transforms/Scalar/Reassociate.cpp +++ b/llvm/lib/Transforms/Scalar/Reassociate.cpp @@ -494,7 +494,7 @@ static bool LinearizeExprTree(Instruction *I, SmallVector<Value *, 8> LeafOrder; // Ensure deterministic leaf output order. #ifndef NDEBUG - SmallPtrSet<Value *, 8> Visited; // For sanity checking the iteration scheme. + SmallPtrSet<Value *, 8> Visited; // For checking the iteration scheme. #endif while (!Worklist.empty()) { std::pair<Instruction*, APInt> P = Worklist.pop_back_val(); @@ -2313,11 +2313,8 @@ void ReassociatePass::ReassociateExpression(BinaryOperator *I) { MadeChange |= LinearizeExprTree(I, Tree); SmallVector<ValueEntry, 8> Ops; Ops.reserve(Tree.size()); - for (unsigned i = 0, e = Tree.size(); i != e; ++i) { - RepeatedValue E = Tree[i]; - Ops.append(E.second.getZExtValue(), - ValueEntry(getRank(E.first), E.first)); - } + for (const RepeatedValue &E : Tree) + Ops.append(E.second.getZExtValue(), ValueEntry(getRank(E.first), E.first)); LLVM_DEBUG(dbgs() << "RAIn:\t"; PrintOps(I, Ops); dbgs() << '\n'); diff --git a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp index 86d3620c312e..3799d2dd1cf2 100644 --- a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -227,8 +227,7 @@ static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI, unsigned IterCnt = 0; (void)IterCnt; while (LocalChange) { - assert(IterCnt++ < 1000 && - "Sanity: iterative simplification didn't converge!"); + assert(IterCnt++ < 1000 && "Iterative simplification didn't converge!"); LocalChange = false; // Loop over all of the basic blocks and remove them if they are unneeded. diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp index 6469c899feea..d6d6b1a7fa09 100644 --- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -235,22 +235,26 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU, // These dominator edges will be redirected from Pred. std::vector<DominatorTree::UpdateType> Updates; if (DTU) { - SmallPtrSet<BasicBlock *, 2> SuccsOfBB(succ_begin(BB), succ_end(BB)); + // To avoid processing the same predecessor more than once. + SmallPtrSet<BasicBlock *, 8> SeenSuccs; SmallPtrSet<BasicBlock *, 2> SuccsOfPredBB(succ_begin(PredBB), succ_end(PredBB)); - Updates.reserve(Updates.size() + 2 * SuccsOfBB.size() + 1); + Updates.reserve(Updates.size() + 2 * succ_size(BB) + 1); // Add insert edges first. Experimentally, for the particular case of two // blocks that can be merged, with a single successor and single predecessor // respectively, it is beneficial to have all insert updates first. Deleting // edges first may lead to unreachable blocks, followed by inserting edges // making the blocks reachable again. Such DT updates lead to high compile // times. We add inserts before deletes here to reduce compile time. - for (BasicBlock *SuccOfBB : SuccsOfBB) + for (BasicBlock *SuccOfBB : successors(BB)) // This successor of BB may already be a PredBB's successor. if (!SuccsOfPredBB.contains(SuccOfBB)) - Updates.push_back({DominatorTree::Insert, PredBB, SuccOfBB}); - for (BasicBlock *SuccOfBB : SuccsOfBB) - Updates.push_back({DominatorTree::Delete, BB, SuccOfBB}); + if (SeenSuccs.insert(SuccOfBB).second) + Updates.push_back({DominatorTree::Insert, PredBB, SuccOfBB}); + SeenSuccs.clear(); + for (BasicBlock *SuccOfBB : successors(BB)) + if (SeenSuccs.insert(SuccOfBB).second) + Updates.push_back({DominatorTree::Delete, BB, SuccOfBB}); Updates.push_back({DominatorTree::Delete, PredBB, BB}); } @@ -804,14 +808,14 @@ static BasicBlock *SplitBlockImpl(BasicBlock *Old, Instruction *SplitPt, if (DTU) { SmallVector<DominatorTree::UpdateType, 8> Updates; // Old dominates New. New node dominates all other nodes dominated by Old. - SmallPtrSet<BasicBlock *, 8> UniqueSuccessorsOfOld(succ_begin(New), - succ_end(New)); + SmallPtrSet<BasicBlock *, 8> UniqueSuccessorsOfOld; Updates.push_back({DominatorTree::Insert, Old, New}); - Updates.reserve(Updates.size() + 2 * UniqueSuccessorsOfOld.size()); - for (BasicBlock *UniqueSuccessorOfOld : UniqueSuccessorsOfOld) { - Updates.push_back({DominatorTree::Insert, New, UniqueSuccessorOfOld}); - Updates.push_back({DominatorTree::Delete, Old, UniqueSuccessorOfOld}); - } + Updates.reserve(Updates.size() + 2 * succ_size(New)); + for (BasicBlock *SuccessorOfOld : successors(New)) + if (UniqueSuccessorsOfOld.insert(SuccessorOfOld).second) { + Updates.push_back({DominatorTree::Insert, New, SuccessorOfOld}); + Updates.push_back({DominatorTree::Delete, Old, SuccessorOfOld}); + } DTU->applyUpdates(Updates); } else if (DT) @@ -870,14 +874,14 @@ BasicBlock *llvm::splitBlockBefore(BasicBlock *Old, Instruction *SplitPt, SmallVector<DominatorTree::UpdateType, 8> DTUpdates; // New dominates Old. The predecessor nodes of the Old node dominate // New node. - SmallPtrSet<BasicBlock *, 8> UniquePredecessorsOfOld(pred_begin(New), - pred_end(New)); + SmallPtrSet<BasicBlock *, 8> UniquePredecessorsOfOld; DTUpdates.push_back({DominatorTree::Insert, New, Old}); - DTUpdates.reserve(DTUpdates.size() + 2 * UniquePredecessorsOfOld.size()); - for (BasicBlock *UniquePredecessorOfOld : UniquePredecessorsOfOld) { - DTUpdates.push_back({DominatorTree::Insert, UniquePredecessorOfOld, New}); - DTUpdates.push_back({DominatorTree::Delete, UniquePredecessorOfOld, Old}); - } + DTUpdates.reserve(DTUpdates.size() + 2 * pred_size(New)); + for (BasicBlock *PredecessorOfOld : predecessors(New)) + if (UniquePredecessorsOfOld.insert(PredecessorOfOld).second) { + DTUpdates.push_back({DominatorTree::Insert, PredecessorOfOld, New}); + DTUpdates.push_back({DominatorTree::Delete, PredecessorOfOld, Old}); + } DTU->applyUpdates(DTUpdates); @@ -910,13 +914,14 @@ static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB, } else { // Split block expects NewBB to have a non-empty set of predecessors. SmallVector<DominatorTree::UpdateType, 8> Updates; - SmallPtrSet<BasicBlock *, 8> UniquePreds(Preds.begin(), Preds.end()); + SmallPtrSet<BasicBlock *, 8> UniquePreds; Updates.push_back({DominatorTree::Insert, NewBB, OldBB}); - Updates.reserve(Updates.size() + 2 * UniquePreds.size()); - for (auto *UniquePred : UniquePreds) { - Updates.push_back({DominatorTree::Insert, UniquePred, NewBB}); - Updates.push_back({DominatorTree::Delete, UniquePred, OldBB}); - } + Updates.reserve(Updates.size() + 2 * Preds.size()); + for (auto *Pred : Preds) + if (UniquePreds.insert(Pred).second) { + Updates.push_back({DominatorTree::Insert, Pred, NewBB}); + Updates.push_back({DominatorTree::Delete, Pred, OldBB}); + } DTU->applyUpdates(Updates); } } else if (DT) { @@ -1376,14 +1381,14 @@ SplitBlockAndInsertIfThenImpl(Value *Cond, Instruction *SplitBefore, BasicBlock *Head = SplitBefore->getParent(); BasicBlock *Tail = Head->splitBasicBlock(SplitBefore->getIterator()); if (DTU) { - SmallPtrSet<BasicBlock *, 8> UniqueSuccessorsOfHead(succ_begin(Tail), - succ_end(Tail)); + SmallPtrSet<BasicBlock *, 8> UniqueSuccessorsOfHead; Updates.push_back({DominatorTree::Insert, Head, Tail}); - Updates.reserve(Updates.size() + 2 * UniqueSuccessorsOfHead.size()); - for (BasicBlock *UniqueSuccessorOfHead : UniqueSuccessorsOfHead) { - Updates.push_back({DominatorTree::Insert, Tail, UniqueSuccessorOfHead}); - Updates.push_back({DominatorTree::Delete, Head, UniqueSuccessorOfHead}); - } + Updates.reserve(Updates.size() + 2 * succ_size(Tail)); + for (BasicBlock *SuccessorOfHead : successors(Tail)) + if (UniqueSuccessorsOfHead.insert(SuccessorOfHead).second) { + Updates.push_back({DominatorTree::Insert, Tail, SuccessorOfHead}); + Updates.push_back({DominatorTree::Delete, Head, SuccessorOfHead}); + } } Instruction *HeadOldTerm = Head->getTerminator(); LLVMContext &C = Head->getContext(); diff --git a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp index 957935398972..580cfd80141e 100644 --- a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp @@ -452,18 +452,17 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { return Changed; case LibFunc_mempcpy: case LibFunc_memccpy: + Changed |= setWillReturn(F); + LLVM_FALLTHROUGH; + case LibFunc_memcpy_chk: Changed |= setDoesNotThrow(F); Changed |= setOnlyAccessesArgMemory(F); - Changed |= setWillReturn(F); Changed |= setDoesNotAlias(F, 0); Changed |= setOnlyWritesMemory(F, 0); Changed |= setDoesNotAlias(F, 1); Changed |= setDoesNotCapture(F, 1); Changed |= setOnlyReadsMemory(F, 1); return Changed; - case LibFunc_memcpy_chk: - Changed |= setDoesNotThrow(F); - return Changed; case LibFunc_memalign: Changed |= setOnlyAccessesInaccessibleMemory(F); Changed |= setRetNoUndef(F); @@ -1018,9 +1017,8 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); return Changed; - // TODO: add LibFunc entries for: - // case LibFunc_memset_pattern4: - // case LibFunc_memset_pattern8: + case LibFunc_memset_pattern4: + case LibFunc_memset_pattern8: case LibFunc_memset_pattern16: Changed |= setOnlyAccessesArgMemory(F); Changed |= setDoesNotCapture(F, 0); @@ -1029,10 +1027,12 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_memset: - Changed |= setOnlyAccessesArgMemory(F); Changed |= setWillReturn(F); - Changed |= setDoesNotThrow(F); + LLVM_FALLTHROUGH; + case LibFunc_memset_chk: + Changed |= setOnlyAccessesArgMemory(F); Changed |= setOnlyWritesMemory(F, 0); + Changed |= setDoesNotThrow(F); return Changed; // int __nvvm_reflect(const char *) case LibFunc_nvvm_reflect: diff --git a/llvm/lib/Transforms/Utils/CloneModule.cpp b/llvm/lib/Transforms/Utils/CloneModule.cpp index 200deca4b317..57c273a0e3c5 100644 --- a/llvm/lib/Transforms/Utils/CloneModule.cpp +++ b/llvm/lib/Transforms/Utils/CloneModule.cpp @@ -135,10 +135,18 @@ std::unique_ptr<Module> llvm::CloneModule( // Similarly, copy over function bodies now... // for (const Function &I : M) { - if (I.isDeclaration()) + Function *F = cast<Function>(VMap[&I]); + + if (I.isDeclaration()) { + // Copy over metadata for declarations since we're not doing it below in + // CloneFunctionInto(). + SmallVector<std::pair<unsigned, MDNode *>, 1> MDs; + I.getAllMetadata(MDs); + for (auto MD : MDs) + F->addMetadata(MD.first, *MapMetadata(MD.second, VMap)); continue; + } - Function *F = cast<Function>(VMap[&I]); if (!ShouldCloneDefinition(&I)) { // Skip after setting the correct linkage for an external reference. F->setLinkage(GlobalValue::ExternalLinkage); diff --git a/llvm/lib/Transforms/Utils/GuardUtils.cpp b/llvm/lib/Transforms/Utils/GuardUtils.cpp index 4dbcbf80d3da..7c310f16d46e 100644 --- a/llvm/lib/Transforms/Utils/GuardUtils.cpp +++ b/llvm/lib/Transforms/Utils/GuardUtils.cpp @@ -74,7 +74,7 @@ void llvm::makeGuardControlFlowExplicit(Function *DeoptIntrinsic, {}, {}, nullptr, "widenable_cond"); CheckBI->setCondition(B.CreateAnd(CheckBI->getCondition(), WC, "exiplicit_guard_cond")); - assert(isWidenableBranch(CheckBI) && "sanity check"); + assert(isWidenableBranch(CheckBI) && "Branch must be widenable."); } } diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp index f4776589910f..997667810580 100644 --- a/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -1218,10 +1218,9 @@ static void AddReturnAttributes(CallBase &CB, ValueToValueMapTy &VMap) { if (!RI || !isa<CallBase>(RI->getOperand(0))) continue; auto *RetVal = cast<CallBase>(RI->getOperand(0)); - // Sanity check that the cloned RetVal exists and is a call, otherwise we - // cannot add the attributes on the cloned RetVal. - // Simplification during inlining could have transformed the cloned - // instruction. + // Check that the cloned RetVal exists and is a call, otherwise we cannot + // add the attributes on the cloned RetVal. Simplification during inlining + // could have transformed the cloned instruction. auto *NewRetVal = dyn_cast_or_null<CallBase>(VMap.lookup(RetVal)); if (!NewRetVal) continue; diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index 74ab37fadf36..ec926b1f5a94 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -529,8 +529,8 @@ bool llvm::RecursivelyDeleteTriviallyDeadInstructionsPermissive( std::function<void(Value *)> AboutToDeleteCallback) { unsigned S = 0, E = DeadInsts.size(), Alive = 0; for (; S != E; ++S) { - auto *I = cast<Instruction>(DeadInsts[S]); - if (!isInstructionTriviallyDead(I)) { + auto *I = dyn_cast<Instruction>(DeadInsts[S]); + if (!I || !isInstructionTriviallyDead(I)) { DeadInsts[S] = nullptr; ++Alive; } @@ -760,15 +760,18 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, SmallVector<DominatorTree::UpdateType, 32> Updates; if (DTU) { - SmallPtrSet<BasicBlock *, 2> PredsOfPredBB(pred_begin(PredBB), - pred_end(PredBB)); - Updates.reserve(Updates.size() + 2 * PredsOfPredBB.size() + 1); - for (BasicBlock *PredOfPredBB : PredsOfPredBB) + // To avoid processing the same predecessor more than once. + SmallPtrSet<BasicBlock *, 2> SeenPreds; + Updates.reserve(Updates.size() + 2 * pred_size(PredBB) + 1); + for (BasicBlock *PredOfPredBB : predecessors(PredBB)) // This predecessor of PredBB may already have DestBB as a successor. if (PredOfPredBB != PredBB) - Updates.push_back({DominatorTree::Insert, PredOfPredBB, DestBB}); - for (BasicBlock *PredOfPredBB : PredsOfPredBB) - Updates.push_back({DominatorTree::Delete, PredOfPredBB, PredBB}); + if (SeenPreds.insert(PredOfPredBB).second) + Updates.push_back({DominatorTree::Insert, PredOfPredBB, DestBB}); + SeenPreds.clear(); + for (BasicBlock *PredOfPredBB : predecessors(PredBB)) + if (SeenPreds.insert(PredOfPredBB).second) + Updates.push_back({DominatorTree::Delete, PredOfPredBB, PredBB}); Updates.push_back({DominatorTree::Delete, PredBB, DestBB}); } @@ -1096,16 +1099,20 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, SmallVector<DominatorTree::UpdateType, 32> Updates; if (DTU) { + // To avoid processing the same predecessor more than once. + SmallPtrSet<BasicBlock *, 8> SeenPreds; // All predecessors of BB will be moved to Succ. - SmallPtrSet<BasicBlock *, 8> PredsOfBB(pred_begin(BB), pred_end(BB)); SmallPtrSet<BasicBlock *, 8> PredsOfSucc(pred_begin(Succ), pred_end(Succ)); - Updates.reserve(Updates.size() + 2 * PredsOfBB.size() + 1); - for (auto *PredOfBB : PredsOfBB) + Updates.reserve(Updates.size() + 2 * pred_size(BB) + 1); + for (auto *PredOfBB : predecessors(BB)) // This predecessor of BB may already have Succ as a successor. if (!PredsOfSucc.contains(PredOfBB)) - Updates.push_back({DominatorTree::Insert, PredOfBB, Succ}); - for (auto *PredOfBB : PredsOfBB) - Updates.push_back({DominatorTree::Delete, PredOfBB, BB}); + if (SeenPreds.insert(PredOfBB).second) + Updates.push_back({DominatorTree::Insert, PredOfBB, Succ}); + SeenPreds.clear(); + for (auto *PredOfBB : predecessors(BB)) + if (SeenPreds.insert(PredOfBB).second) + Updates.push_back({DominatorTree::Delete, PredOfBB, BB}); Updates.push_back({DominatorTree::Delete, BB, Succ}); } @@ -2190,26 +2197,6 @@ void llvm::changeToCall(InvokeInst *II, DomTreeUpdater *DTU) { DTU->applyUpdates({{DominatorTree::Delete, BB, UnwindDestBB}}); } -void llvm::createUnreachableSwitchDefault(SwitchInst *Switch, - DomTreeUpdater *DTU) { - LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n"); - auto *BB = Switch->getParent(); - auto *OrigDefaultBlock = Switch->getDefaultDest(); - OrigDefaultBlock->removePredecessor(BB); - BasicBlock *NewDefaultBlock = BasicBlock::Create( - BB->getContext(), BB->getName() + ".unreachabledefault", BB->getParent(), - OrigDefaultBlock); - new UnreachableInst(Switch->getContext(), NewDefaultBlock); - Switch->setDefaultDest(&*NewDefaultBlock); - if (DTU) { - SmallVector<DominatorTree::UpdateType, 2> Updates; - Updates.push_back({DominatorTree::Insert, BB, &*NewDefaultBlock}); - if (!is_contained(successors(BB), OrigDefaultBlock)) - Updates.push_back({DominatorTree::Delete, BB, &*OrigDefaultBlock}); - DTU->applyUpdates(Updates); - } -} - BasicBlock *llvm::changeToInvokeAndSplitBasicBlock(CallInst *CI, BasicBlock *UnwindEdge, DomTreeUpdater *DTU) { diff --git a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp index a92cb6a313d3..bb719a499a4c 100644 --- a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -623,15 +623,13 @@ bool llvm::UnrollRuntimeLoopRemainder( if (!SE) return false; - // Only unroll loops with a computable trip count, and the trip count needs - // to be an int value (allowing a pointer type is a TODO item). + // Only unroll loops with a computable trip count. // We calculate the backedge count by using getExitCount on the Latch block, // which is proven to be the only exiting block in this loop. This is same as // calculating getBackedgeTakenCount on the loop (which computes SCEV for all // exiting blocks). const SCEV *BECountSC = SE->getExitCount(L, Latch); - if (isa<SCEVCouldNotCompute>(BECountSC) || - !BECountSC->getType()->isIntegerTy()) { + if (isa<SCEVCouldNotCompute>(BECountSC)) { LLVM_DEBUG(dbgs() << "Could not compute exit block SCEV\n"); return false; } diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp index 68572d479742..c8e42acdffb3 100644 --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -1049,6 +1049,7 @@ Value *llvm::createSimpleTargetReduction(IRBuilderBase &Builder, return Builder.CreateOrReduce(Src); case RecurKind::Xor: return Builder.CreateXorReduce(Src); + case RecurKind::FMulAdd: case RecurKind::FAdd: return Builder.CreateFAddReduce(ConstantFP::getNegativeZero(SrcVecEltTy), Src); @@ -1091,7 +1092,8 @@ Value *llvm::createTargetReduction(IRBuilderBase &B, Value *llvm::createOrderedReduction(IRBuilderBase &B, const RecurrenceDescriptor &Desc, Value *Src, Value *Start) { - assert(Desc.getRecurrenceKind() == RecurKind::FAdd && + assert((Desc.getRecurrenceKind() == RecurKind::FAdd || + Desc.getRecurrenceKind() == RecurKind::FMulAdd) && "Unexpected reduction kind"); assert(Src->getType()->isVectorTy() && "Expected a vector type"); assert(!Start->getType()->isVectorTy() && "Expected a scalar type"); diff --git a/llvm/lib/Transforms/Utils/SSAUpdater.cpp b/llvm/lib/Transforms/Utils/SSAUpdater.cpp index 5893ce15b129..7d9992176658 100644 --- a/llvm/lib/Transforms/Utils/SSAUpdater.cpp +++ b/llvm/lib/Transforms/Utils/SSAUpdater.cpp @@ -446,6 +446,9 @@ void LoadAndStorePromoter::run(const SmallVectorImpl<Instruction *> &Insts) { // Now that everything is rewritten, delete the old instructions from the // function. They should all be dead now. for (Instruction *User : Insts) { + if (!shouldDelete(User)) + continue; + // If this is a load that still has uses, then the load must have been added // as a live value in the SSAUpdate data structure for a block (e.g. because // the loaded value was stored later). In this case, we need to recursively diff --git a/llvm/lib/Transforms/Utils/SampleProfileInference.cpp b/llvm/lib/Transforms/Utils/SampleProfileInference.cpp new file mode 100644 index 000000000000..9495e442e0bf --- /dev/null +++ b/llvm/lib/Transforms/Utils/SampleProfileInference.cpp @@ -0,0 +1,462 @@ +//===- SampleProfileInference.cpp - Adjust sample profiles in the IR ------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a profile inference algorithm. Given an incomplete and +// possibly imprecise block counts, the algorithm reconstructs realistic block +// and edge counts that satisfy flow conservation rules, while minimally modify +// input block counts. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/SampleProfileInference.h" +#include "llvm/Support/Debug.h" +#include <queue> +#include <set> + +using namespace llvm; +#define DEBUG_TYPE "sample-profile-inference" + +namespace { + +/// A value indicating an infinite flow/capacity/weight of a block/edge. +/// Not using numeric_limits<int64_t>::max(), as the values can be summed up +/// during the execution. +static constexpr int64_t INF = ((int64_t)1) << 50; + +/// The minimum-cost maximum flow algorithm. +/// +/// The algorithm finds the maximum flow of minimum cost on a given (directed) +/// network using a modified version of the classical Moore-Bellman-Ford +/// approach. The algorithm applies a number of augmentation iterations in which +/// flow is sent along paths of positive capacity from the source to the sink. +/// The worst-case time complexity of the implementation is O(v(f)*m*n), where +/// where m is the number of edges, n is the number of vertices, and v(f) is the +/// value of the maximum flow. However, the observed running time on typical +/// instances is sub-quadratic, that is, o(n^2). +/// +/// The input is a set of edges with specified costs and capacities, and a pair +/// of nodes (source and sink). The output is the flow along each edge of the +/// minimum total cost respecting the given edge capacities. +class MinCostMaxFlow { +public: + // Initialize algorithm's data structures for a network of a given size. + void initialize(uint64_t NodeCount, uint64_t SourceNode, uint64_t SinkNode) { + Source = SourceNode; + Target = SinkNode; + + Nodes = std::vector<Node>(NodeCount); + Edges = std::vector<std::vector<Edge>>(NodeCount, std::vector<Edge>()); + } + + // Run the algorithm. + int64_t run() { + // Find an augmenting path and update the flow along the path + size_t AugmentationIters = 0; + while (findAugmentingPath()) { + augmentFlowAlongPath(); + AugmentationIters++; + } + + // Compute the total flow and its cost + int64_t TotalCost = 0; + int64_t TotalFlow = 0; + for (uint64_t Src = 0; Src < Nodes.size(); Src++) { + for (auto &Edge : Edges[Src]) { + if (Edge.Flow > 0) { + TotalCost += Edge.Cost * Edge.Flow; + if (Src == Source) + TotalFlow += Edge.Flow; + } + } + } + LLVM_DEBUG(dbgs() << "Completed profi after " << AugmentationIters + << " iterations with " << TotalFlow << " total flow" + << " of " << TotalCost << " cost\n"); + (void)TotalFlow; + return TotalCost; + } + + /// Adding an edge to the network with a specified capacity and a cost. + /// Multiple edges between a pair of nodes are allowed but self-edges + /// are not supported. + void addEdge(uint64_t Src, uint64_t Dst, int64_t Capacity, int64_t Cost) { + assert(Capacity > 0 && "adding an edge of zero capacity"); + assert(Src != Dst && "loop edge are not supported"); + + Edge SrcEdge; + SrcEdge.Dst = Dst; + SrcEdge.Cost = Cost; + SrcEdge.Capacity = Capacity; + SrcEdge.Flow = 0; + SrcEdge.RevEdgeIndex = Edges[Dst].size(); + + Edge DstEdge; + DstEdge.Dst = Src; + DstEdge.Cost = -Cost; + DstEdge.Capacity = 0; + DstEdge.Flow = 0; + DstEdge.RevEdgeIndex = Edges[Src].size(); + + Edges[Src].push_back(SrcEdge); + Edges[Dst].push_back(DstEdge); + } + + /// Adding an edge to the network of infinite capacity and a given cost. + void addEdge(uint64_t Src, uint64_t Dst, int64_t Cost) { + addEdge(Src, Dst, INF, Cost); + } + + /// Get the total flow from a given source node. + /// Returns a list of pairs (target node, amount of flow to the target). + const std::vector<std::pair<uint64_t, int64_t>> getFlow(uint64_t Src) const { + std::vector<std::pair<uint64_t, int64_t>> Flow; + for (auto &Edge : Edges[Src]) { + if (Edge.Flow > 0) + Flow.push_back(std::make_pair(Edge.Dst, Edge.Flow)); + } + return Flow; + } + + /// Get the total flow between a pair of nodes. + int64_t getFlow(uint64_t Src, uint64_t Dst) const { + int64_t Flow = 0; + for (auto &Edge : Edges[Src]) { + if (Edge.Dst == Dst) { + Flow += Edge.Flow; + } + } + return Flow; + } + + /// A cost of increasing a block's count by one. + static constexpr int64_t AuxCostInc = 10; + /// A cost of decreasing a block's count by one. + static constexpr int64_t AuxCostDec = 20; + /// A cost of increasing a count of zero-weight block by one. + static constexpr int64_t AuxCostIncZero = 11; + /// A cost of increasing the entry block's count by one. + static constexpr int64_t AuxCostIncEntry = 40; + /// A cost of decreasing the entry block's count by one. + static constexpr int64_t AuxCostDecEntry = 10; + /// A cost of taking an unlikely jump. + static constexpr int64_t AuxCostUnlikely = ((int64_t)1) << 20; + +private: + /// Check for existence of an augmenting path with a positive capacity. + bool findAugmentingPath() { + // Initialize data structures + for (auto &Node : Nodes) { + Node.Distance = INF; + Node.ParentNode = uint64_t(-1); + Node.ParentEdgeIndex = uint64_t(-1); + Node.Taken = false; + } + + std::queue<uint64_t> Queue; + Queue.push(Source); + Nodes[Source].Distance = 0; + Nodes[Source].Taken = true; + while (!Queue.empty()) { + uint64_t Src = Queue.front(); + Queue.pop(); + Nodes[Src].Taken = false; + // Although the residual network contains edges with negative costs + // (in particular, backward edges), it can be shown that there are no + // negative-weight cycles and the following two invariants are maintained: + // (i) Dist[Source, V] >= 0 and (ii) Dist[V, Target] >= 0 for all nodes V, + // where Dist is the length of the shortest path between two nodes. This + // allows to prune the search-space of the path-finding algorithm using + // the following early-stop criteria: + // -- If we find a path with zero-distance from Source to Target, stop the + // search, as the path is the shortest since Dist[Source, Target] >= 0; + // -- If we have Dist[Source, V] > Dist[Source, Target], then do not + // process node V, as it is guaranteed _not_ to be on a shortest path + // from Source to Target; it follows from inequalities + // Dist[Source, Target] >= Dist[Source, V] + Dist[V, Target] + // >= Dist[Source, V] + if (Nodes[Target].Distance == 0) + break; + if (Nodes[Src].Distance > Nodes[Target].Distance) + continue; + + // Process adjacent edges + for (uint64_t EdgeIdx = 0; EdgeIdx < Edges[Src].size(); EdgeIdx++) { + auto &Edge = Edges[Src][EdgeIdx]; + if (Edge.Flow < Edge.Capacity) { + uint64_t Dst = Edge.Dst; + int64_t NewDistance = Nodes[Src].Distance + Edge.Cost; + if (Nodes[Dst].Distance > NewDistance) { + // Update the distance and the parent node/edge + Nodes[Dst].Distance = NewDistance; + Nodes[Dst].ParentNode = Src; + Nodes[Dst].ParentEdgeIndex = EdgeIdx; + // Add the node to the queue, if it is not there yet + if (!Nodes[Dst].Taken) { + Queue.push(Dst); + Nodes[Dst].Taken = true; + } + } + } + } + } + + return Nodes[Target].Distance != INF; + } + + /// Update the current flow along the augmenting path. + void augmentFlowAlongPath() { + // Find path capacity + int64_t PathCapacity = INF; + uint64_t Now = Target; + while (Now != Source) { + uint64_t Pred = Nodes[Now].ParentNode; + auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex]; + PathCapacity = std::min(PathCapacity, Edge.Capacity - Edge.Flow); + Now = Pred; + } + + assert(PathCapacity > 0 && "found incorrect augmenting path"); + + // Update the flow along the path + Now = Target; + while (Now != Source) { + uint64_t Pred = Nodes[Now].ParentNode; + auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex]; + auto &RevEdge = Edges[Now][Edge.RevEdgeIndex]; + + Edge.Flow += PathCapacity; + RevEdge.Flow -= PathCapacity; + + Now = Pred; + } + } + + /// An node in a flow network. + struct Node { + /// The cost of the cheapest path from the source to the current node. + int64_t Distance; + /// The node preceding the current one in the path. + uint64_t ParentNode; + /// The index of the edge between ParentNode and the current node. + uint64_t ParentEdgeIndex; + /// An indicator of whether the current node is in a queue. + bool Taken; + }; + /// An edge in a flow network. + struct Edge { + /// The cost of the edge. + int64_t Cost; + /// The capacity of the edge. + int64_t Capacity; + /// The current flow on the edge. + int64_t Flow; + /// The destination node of the edge. + uint64_t Dst; + /// The index of the reverse edge between Dst and the current node. + uint64_t RevEdgeIndex; + }; + + /// The set of network nodes. + std::vector<Node> Nodes; + /// The set of network edges. + std::vector<std::vector<Edge>> Edges; + /// Source node of the flow. + uint64_t Source; + /// Target (sink) node of the flow. + uint64_t Target; +}; + +/// Initializing flow network for a given function. +/// +/// Every block is split into three nodes that are responsible for (i) an +/// incoming flow, (ii) an outgoing flow, and (iii) penalizing an increase or +/// reduction of the block weight. +void initializeNetwork(MinCostMaxFlow &Network, FlowFunction &Func) { + uint64_t NumBlocks = Func.Blocks.size(); + assert(NumBlocks > 1 && "Too few blocks in a function"); + LLVM_DEBUG(dbgs() << "Initializing profi for " << NumBlocks << " blocks\n"); + + // Pre-process data: make sure the entry weight is at least 1 + if (Func.Blocks[Func.Entry].Weight == 0) { + Func.Blocks[Func.Entry].Weight = 1; + } + // Introducing dummy source/sink pairs to allow flow circulation. + // The nodes corresponding to blocks of Func have indicies in the range + // [0..3 * NumBlocks); the dummy nodes are indexed by the next four values. + uint64_t S = 3 * NumBlocks; + uint64_t T = S + 1; + uint64_t S1 = S + 2; + uint64_t T1 = S + 3; + + Network.initialize(3 * NumBlocks + 4, S1, T1); + + // Create three nodes for every block of the function + for (uint64_t B = 0; B < NumBlocks; B++) { + auto &Block = Func.Blocks[B]; + assert((!Block.UnknownWeight || Block.Weight == 0 || Block.isEntry()) && + "non-zero weight of a block w/o weight except for an entry"); + + // Split every block into two nodes + uint64_t Bin = 3 * B; + uint64_t Bout = 3 * B + 1; + uint64_t Baux = 3 * B + 2; + if (Block.Weight > 0) { + Network.addEdge(S1, Bout, Block.Weight, 0); + Network.addEdge(Bin, T1, Block.Weight, 0); + } + + // Edges from S and to T + assert((!Block.isEntry() || !Block.isExit()) && + "a block cannot be an entry and an exit"); + if (Block.isEntry()) { + Network.addEdge(S, Bin, 0); + } else if (Block.isExit()) { + Network.addEdge(Bout, T, 0); + } + + // An auxiliary node to allow increase/reduction of block counts: + // We assume that decreasing block counts is more expensive than increasing, + // and thus, setting separate costs here. In the future we may want to tune + // the relative costs so as to maximize the quality of generated profiles. + int64_t AuxCostInc = MinCostMaxFlow::AuxCostInc; + int64_t AuxCostDec = MinCostMaxFlow::AuxCostDec; + if (Block.UnknownWeight) { + // Do not penalize changing weights of blocks w/o known profile count + AuxCostInc = 0; + AuxCostDec = 0; + } else { + // Increasing the count for "cold" blocks with zero initial count is more + // expensive than for "hot" ones + if (Block.Weight == 0) { + AuxCostInc = MinCostMaxFlow::AuxCostIncZero; + } + // Modifying the count of the entry block is expensive + if (Block.isEntry()) { + AuxCostInc = MinCostMaxFlow::AuxCostIncEntry; + AuxCostDec = MinCostMaxFlow::AuxCostDecEntry; + } + } + // For blocks with self-edges, do not penalize a reduction of the count, + // as all of the increase can be attributed to the self-edge + if (Block.HasSelfEdge) { + AuxCostDec = 0; + } + + Network.addEdge(Bin, Baux, AuxCostInc); + Network.addEdge(Baux, Bout, AuxCostInc); + if (Block.Weight > 0) { + Network.addEdge(Bout, Baux, AuxCostDec); + Network.addEdge(Baux, Bin, AuxCostDec); + } + } + + // Creating edges for every jump + for (auto &Jump : Func.Jumps) { + uint64_t Src = Jump.Source; + uint64_t Dst = Jump.Target; + if (Src != Dst) { + uint64_t SrcOut = 3 * Src + 1; + uint64_t DstIn = 3 * Dst; + uint64_t Cost = Jump.IsUnlikely ? MinCostMaxFlow::AuxCostUnlikely : 0; + Network.addEdge(SrcOut, DstIn, Cost); + } + } + + // Make sure we have a valid flow circulation + Network.addEdge(T, S, 0); +} + +/// Extract resulting block and edge counts from the flow network. +void extractWeights(MinCostMaxFlow &Network, FlowFunction &Func) { + uint64_t NumBlocks = Func.Blocks.size(); + + // Extract resulting block counts + for (uint64_t Src = 0; Src < NumBlocks; Src++) { + auto &Block = Func.Blocks[Src]; + uint64_t SrcOut = 3 * Src + 1; + int64_t Flow = 0; + for (auto &Adj : Network.getFlow(SrcOut)) { + uint64_t DstIn = Adj.first; + int64_t DstFlow = Adj.second; + bool IsAuxNode = (DstIn < 3 * NumBlocks && DstIn % 3 == 2); + if (!IsAuxNode || Block.HasSelfEdge) { + Flow += DstFlow; + } + } + Block.Flow = Flow; + assert(Flow >= 0 && "negative block flow"); + } + + // Extract resulting jump counts + for (auto &Jump : Func.Jumps) { + uint64_t Src = Jump.Source; + uint64_t Dst = Jump.Target; + int64_t Flow = 0; + if (Src != Dst) { + uint64_t SrcOut = 3 * Src + 1; + uint64_t DstIn = 3 * Dst; + Flow = Network.getFlow(SrcOut, DstIn); + } else { + uint64_t SrcOut = 3 * Src + 1; + uint64_t SrcAux = 3 * Src + 2; + int64_t AuxFlow = Network.getFlow(SrcOut, SrcAux); + if (AuxFlow > 0) + Flow = AuxFlow; + } + Jump.Flow = Flow; + assert(Flow >= 0 && "negative jump flow"); + } +} + +#ifndef NDEBUG +/// Verify that the computed flow values satisfy flow conservation rules +void verifyWeights(const FlowFunction &Func) { + const uint64_t NumBlocks = Func.Blocks.size(); + auto InFlow = std::vector<uint64_t>(NumBlocks, 0); + auto OutFlow = std::vector<uint64_t>(NumBlocks, 0); + for (auto &Jump : Func.Jumps) { + InFlow[Jump.Target] += Jump.Flow; + OutFlow[Jump.Source] += Jump.Flow; + } + + uint64_t TotalInFlow = 0; + uint64_t TotalOutFlow = 0; + for (uint64_t I = 0; I < NumBlocks; I++) { + auto &Block = Func.Blocks[I]; + if (Block.isEntry()) { + TotalInFlow += Block.Flow; + assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow"); + } else if (Block.isExit()) { + TotalOutFlow += Block.Flow; + assert(Block.Flow == InFlow[I] && "incorrectly computed control flow"); + } else { + assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow"); + assert(Block.Flow == InFlow[I] && "incorrectly computed control flow"); + } + } + assert(TotalInFlow == TotalOutFlow && "incorrectly computed control flow"); +} +#endif + +} // end of anonymous namespace + +/// Apply the profile inference algorithm for a given flow function +void llvm::applyFlowInference(FlowFunction &Func) { + // Create and apply an inference network model + auto InferenceNetwork = MinCostMaxFlow(); + initializeNetwork(InferenceNetwork, Func); + InferenceNetwork.run(); + + // Extract flow values for every block and every edge + extractWeights(InferenceNetwork, Func); + +#ifndef NDEBUG + // Verify the result + verifyWeights(Func); +#endif +} diff --git a/llvm/lib/Transforms/Utils/SampleProfileLoaderBaseUtil.cpp b/llvm/lib/Transforms/Utils/SampleProfileLoaderBaseUtil.cpp index 6d995cf4c048..ea0e8343eb88 100644 --- a/llvm/lib/Transforms/Utils/SampleProfileLoaderBaseUtil.cpp +++ b/llvm/lib/Transforms/Utils/SampleProfileLoaderBaseUtil.cpp @@ -34,6 +34,10 @@ cl::opt<bool> NoWarnSampleUnused( cl::desc("Use this option to turn off/on warnings about function with " "samples but without debug information to use those samples. ")); +cl::opt<bool> SampleProfileUseProfi( + "sample-profile-use-profi", cl::init(false), cl::Hidden, cl::ZeroOrMore, + cl::desc("Use profi to infer block and edge counts.")); + namespace sampleprofutil { /// Return true if the given callsite is hot wrt to hot cutoff threshold. diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp index a042146d7ace..71c15d5c51fc 100644 --- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp +++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp @@ -18,6 +18,7 @@ #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IntrinsicInst.h" @@ -1833,22 +1834,6 @@ Value *SCEVExpander::expandCodeForImpl(const SCEV *SH, Type *Ty, bool Root) { return V; } -/// Check whether value has nuw/nsw/exact set but SCEV does not. -/// TODO: In reality it is better to check the poison recursively -/// but this is better than nothing. -static bool SCEVLostPoisonFlags(const SCEV *S, const Instruction *I) { - if (isa<OverflowingBinaryOperator>(I)) { - if (auto *NS = dyn_cast<SCEVNAryExpr>(S)) { - if (I->hasNoSignedWrap() && !NS->hasNoSignedWrap()) - return true; - if (I->hasNoUnsignedWrap() && !NS->hasNoUnsignedWrap()) - return true; - } - } else if (isa<PossiblyExactOperator>(I) && I->isExact()) - return true; - return false; -} - ScalarEvolution::ValueOffsetPair SCEVExpander::FindValueInExprValueMap(const SCEV *S, const Instruction *InsertPt) { @@ -1872,8 +1857,7 @@ SCEVExpander::FindValueInExprValueMap(const SCEV *S, if (S->getType() == V->getType() && SE.DT.dominates(EntInst, InsertPt) && (SE.LI.getLoopFor(EntInst->getParent()) == nullptr || - SE.LI.getLoopFor(EntInst->getParent())->contains(InsertPt)) && - !SCEVLostPoisonFlags(S, EntInst)) + SE.LI.getLoopFor(EntInst->getParent())->contains(InsertPt))) return {V, Offset}; } } @@ -1952,26 +1936,36 @@ Value *SCEVExpander::expand(const SCEV *S) { if (!V) V = visit(S); - else if (VO.second) { - if (PointerType *Vty = dyn_cast<PointerType>(V->getType())) { - Type *Ety = Vty->getPointerElementType(); - int64_t Offset = VO.second->getSExtValue(); - int64_t ESize = SE.getTypeSizeInBits(Ety); - if ((Offset * 8) % ESize == 0) { - ConstantInt *Idx = + else { + // If we're reusing an existing instruction, we are effectively CSEing two + // copies of the instruction (with potentially different flags). As such, + // we need to drop any poison generating flags unless we can prove that + // said flags must be valid for all new users. + if (auto *I = dyn_cast<Instruction>(V)) + if (I->hasPoisonGeneratingFlags() && !programUndefinedIfPoison(I)) + I->dropPoisonGeneratingFlags(); + + if (VO.second) { + if (PointerType *Vty = dyn_cast<PointerType>(V->getType())) { + Type *Ety = Vty->getPointerElementType(); + int64_t Offset = VO.second->getSExtValue(); + int64_t ESize = SE.getTypeSizeInBits(Ety); + if ((Offset * 8) % ESize == 0) { + ConstantInt *Idx = ConstantInt::getSigned(VO.second->getType(), -(Offset * 8) / ESize); - V = Builder.CreateGEP(Ety, V, Idx, "scevgep"); - } else { - ConstantInt *Idx = + V = Builder.CreateGEP(Ety, V, Idx, "scevgep"); + } else { + ConstantInt *Idx = ConstantInt::getSigned(VO.second->getType(), -Offset); - unsigned AS = Vty->getAddressSpace(); - V = Builder.CreateBitCast(V, Type::getInt8PtrTy(SE.getContext(), AS)); - V = Builder.CreateGEP(Type::getInt8Ty(SE.getContext()), V, Idx, - "uglygep"); - V = Builder.CreateBitCast(V, Vty); + unsigned AS = Vty->getAddressSpace(); + V = Builder.CreateBitCast(V, Type::getInt8PtrTy(SE.getContext(), AS)); + V = Builder.CreateGEP(Type::getInt8Ty(SE.getContext()), V, Idx, + "uglygep"); + V = Builder.CreateBitCast(V, Vty); + } + } else { + V = Builder.CreateSub(V, VO.second); } - } else { - V = Builder.CreateSub(V, VO.second); } } // Remember the expanded value for this SCEV at this location. @@ -2180,7 +2174,9 @@ SCEVExpander::getRelatedExistingExpansion(const SCEV *S, const Instruction *At, } // Use expand's logic which is used for reusing a previous Value in - // ExprValueMap. + // ExprValueMap. Note that we don't currently model the cost of + // needing to drop poison generating flags on the instruction if we + // want to reuse it. We effectively assume that has zero cost. ScalarEvolution::ValueOffsetPair VO = FindValueInExprValueMap(S, At); if (VO.first) return VO; diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index f467de5f924e..afa3ecde77f9 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -3936,7 +3936,7 @@ bool SimplifyCFGOpt::SimplifyTerminatorOnSelect(Instruction *OldTerm, BasicBlock *KeepEdge1 = TrueBB; BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr; - SmallPtrSet<BasicBlock *, 2> RemovedSuccessors; + SmallSetVector<BasicBlock *, 2> RemovedSuccessors; // Then remove the rest. for (BasicBlock *Succ : successors(OldTerm)) { @@ -4782,6 +4782,26 @@ static bool CasesAreContiguous(SmallVectorImpl<ConstantInt *> &Cases) { return true; } +static void createUnreachableSwitchDefault(SwitchInst *Switch, + DomTreeUpdater *DTU) { + LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n"); + auto *BB = Switch->getParent(); + auto *OrigDefaultBlock = Switch->getDefaultDest(); + OrigDefaultBlock->removePredecessor(BB); + BasicBlock *NewDefaultBlock = BasicBlock::Create( + BB->getContext(), BB->getName() + ".unreachabledefault", BB->getParent(), + OrigDefaultBlock); + new UnreachableInst(Switch->getContext(), NewDefaultBlock); + Switch->setDefaultDest(&*NewDefaultBlock); + if (DTU) { + SmallVector<DominatorTree::UpdateType, 2> Updates; + Updates.push_back({DominatorTree::Insert, BB, &*NewDefaultBlock}); + if (!is_contained(successors(BB), OrigDefaultBlock)) + Updates.push_back({DominatorTree::Delete, BB, &*OrigDefaultBlock}); + DTU->applyUpdates(Updates); + } +} + /// Turn a switch with two reachable destinations into an integer range /// comparison and branch. bool SimplifyCFGOpt::TurnSwitchRangeIntoICmp(SwitchInst *SI, @@ -4927,10 +4947,14 @@ static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU, // Gather dead cases. SmallVector<ConstantInt *, 8> DeadCases; SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases; + SmallVector<BasicBlock *, 8> UniqueSuccessors; for (auto &Case : SI->cases()) { auto *Successor = Case.getCaseSuccessor(); - if (DTU) + if (DTU) { + if (!NumPerSuccessorCases.count(Successor)) + UniqueSuccessors.push_back(Successor); ++NumPerSuccessorCases[Successor]; + } const APInt &CaseVal = Case.getCaseValue()->getValue(); if (Known.Zero.intersects(CaseVal) || !Known.One.isSubsetOf(CaseVal) || (CaseVal.getMinSignedBits() > MaxSignificantBitsInCond)) { @@ -4973,9 +4997,9 @@ static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU, if (DTU) { std::vector<DominatorTree::UpdateType> Updates; - for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases) - if (I.second == 0) - Updates.push_back({DominatorTree::Delete, SI->getParent(), I.first}); + for (auto *Successor : UniqueSuccessors) + if (NumPerSuccessorCases[Successor] == 0) + Updates.push_back({DominatorTree::Delete, SI->getParent(), Successor}); DTU->applyUpdates(Updates); } @@ -6040,15 +6064,13 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, if (Succ == SI->getDefaultDest()) continue; Succ->removePredecessor(BB); - RemovedSuccessors.insert(Succ); + if (DTU && RemovedSuccessors.insert(Succ).second) + Updates.push_back({DominatorTree::Delete, BB, Succ}); } SI->eraseFromParent(); - if (DTU) { - for (BasicBlock *RemovedSuccessor : RemovedSuccessors) - Updates.push_back({DominatorTree::Delete, BB, RemovedSuccessor}); + if (DTU) DTU->applyUpdates(Updates); - } ++NumLookupTables; if (NeedMask) @@ -6215,7 +6237,7 @@ bool SimplifyCFGOpt::simplifyIndirectBr(IndirectBrInst *IBI) { // Eliminate redundant destinations. SmallPtrSet<Value *, 8> Succs; - SmallPtrSet<BasicBlock *, 8> RemovedSuccs; + SmallSetVector<BasicBlock *, 8> RemovedSuccs; for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) { BasicBlock *Dest = IBI->getDestination(i); if (!Dest->hasAddressTaken() || !Succs.insert(Dest).second) { @@ -6305,8 +6327,8 @@ static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI, // We've found an identical block. Update our predecessors to take that // path instead and make ourselves dead. - SmallPtrSet<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB)); - for (BasicBlock *Pred : Preds) { + SmallSetVector<BasicBlock *, 16> UniquePreds(pred_begin(BB), pred_end(BB)); + for (BasicBlock *Pred : UniquePreds) { InvokeInst *II = cast<InvokeInst>(Pred->getTerminator()); assert(II->getNormalDest() != BB && II->getUnwindDest() == BB && "unexpected successor"); @@ -6323,8 +6345,8 @@ static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI, if (isa<DbgInfoIntrinsic>(Inst)) Inst.eraseFromParent(); - SmallPtrSet<BasicBlock *, 16> Succs(succ_begin(BB), succ_end(BB)); - for (BasicBlock *Succ : Succs) { + SmallSetVector<BasicBlock *, 16> UniqueSuccs(succ_begin(BB), succ_end(BB)); + for (BasicBlock *Succ : UniqueSuccs) { Succ->removePredecessor(BB); if (DTU) Updates.push_back({DominatorTree::Delete, BB, Succ}); diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 23bb6f0860c9..5ca0adb4242c 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -473,18 +473,10 @@ public: /// handle the more complex control flow around the loops. virtual BasicBlock *createVectorizedLoopSkeleton(); - /// Widen a single instruction within the innermost loop. - void widenInstruction(Instruction &I, VPValue *Def, VPUser &Operands, - VPTransformState &State); - /// Widen a single call instruction within the innermost loop. void widenCallInstruction(CallInst &I, VPValue *Def, VPUser &ArgOperands, VPTransformState &State); - /// Widen a single select instruction within the innermost loop. - void widenSelectInstruction(SelectInst &I, VPValue *VPDef, VPUser &Operands, - bool InvariantCond, VPTransformState &State); - /// Fix the vectorized code, taking care of header phi's, live-outs, and more. void fixVectorizedLoop(VPTransformState &State); @@ -496,12 +488,6 @@ public: /// new unrolled loop, where UF is the unroll factor. using VectorParts = SmallVector<Value *, 2>; - /// Vectorize a single GetElementPtrInst based on information gathered and - /// decisions taken during planning. - void widenGEP(GetElementPtrInst *GEP, VPValue *VPDef, VPUser &Indices, - unsigned UF, ElementCount VF, bool IsPtrLoopInvariant, - SmallBitVector &IsIndexLoopInvariant, VPTransformState &State); - /// Vectorize a single first-order recurrence or pointer induction PHINode in /// a block. This method handles the induction variable canonicalization. It /// supports both VF = 1 for unrolled loops and arbitrary length vectors. @@ -511,9 +497,9 @@ public: /// A helper function to scalarize a single Instruction in the innermost loop. /// Generates a sequence of scalar instances for each lane between \p MinLane /// and \p MaxLane, times each part between \p MinPart and \p MaxPart, - /// inclusive. Uses the VPValue operands from \p Operands instead of \p + /// inclusive. Uses the VPValue operands from \p RepRecipe instead of \p /// Instr's operands. - void scalarizeInstruction(Instruction *Instr, VPValue *Def, VPUser &Operands, + void scalarizeInstruction(Instruction *Instr, VPReplicateRecipe *RepRecipe, const VPIteration &Instance, bool IfPredicateInstr, VPTransformState &State); @@ -538,15 +524,6 @@ public: ArrayRef<VPValue *> StoredValues, VPValue *BlockInMask = nullptr); - /// Vectorize Load and Store instructions with the base address given in \p - /// Addr, optionally masking the vector operations if \p BlockInMask is - /// non-null. Use \p State to translate given VPValues to IR values in the - /// vectorized loop. - void vectorizeMemoryInstruction(Instruction *Instr, VPTransformState &State, - VPValue *Def, VPValue *Addr, - VPValue *StoredValue, VPValue *BlockInMask, - bool ConsecutiveStride, bool Reverse); - /// Set the debug location in the builder \p Ptr using the debug location in /// \p V. If \p Ptr is None then it uses the class member's Builder. void setDebugLocFromInst(const Value *V, @@ -566,6 +543,17 @@ public: /// element. virtual Value *getBroadcastInstrs(Value *V); + /// Add metadata from one instruction to another. + /// + /// This includes both the original MDs from \p From and additional ones (\see + /// addNewMetadata). Use this for *newly created* instructions in the vector + /// loop. + void addMetadata(Instruction *To, Instruction *From); + + /// Similar to the previous function but it adds the metadata to a + /// vector of instructions. + void addMetadata(ArrayRef<Value *> To, Instruction *From); + protected: friend class LoopVectorizationPlanner; @@ -741,16 +729,16 @@ protected: /// vector loop. void addNewMetadata(Instruction *To, const Instruction *Orig); - /// Add metadata from one instruction to another. - /// - /// This includes both the original MDs from \p From and additional ones (\see - /// addNewMetadata). Use this for *newly created* instructions in the vector - /// loop. - void addMetadata(Instruction *To, Instruction *From); - - /// Similar to the previous function but it adds the metadata to a - /// vector of instructions. - void addMetadata(ArrayRef<Value *> To, Instruction *From); + /// Collect poison-generating recipes that may generate a poison value that is + /// used after vectorization, even when their operands are not poison. Those + /// recipes meet the following conditions: + /// * Contribute to the address computation of a recipe generating a widen + /// memory load/store (VPWidenMemoryInstructionRecipe or + /// VPInterleaveRecipe). + /// * Such a widen memory load/store has at least one underlying Instruction + /// that is in a basic block that needs predication and after vectorization + /// the generated instruction won't be predicated. + void collectPoisonGeneratingRecipes(VPTransformState &State); /// Allow subclasses to override and print debug traces before/after vplan /// execution, when trace information is requested. @@ -1173,6 +1161,84 @@ void InnerLoopVectorizer::addNewMetadata(Instruction *To, LVer->annotateInstWithNoAlias(To, Orig); } +void InnerLoopVectorizer::collectPoisonGeneratingRecipes( + VPTransformState &State) { + + // Collect recipes in the backward slice of `Root` that may generate a poison + // value that is used after vectorization. + SmallPtrSet<VPRecipeBase *, 16> Visited; + auto collectPoisonGeneratingInstrsInBackwardSlice([&](VPRecipeBase *Root) { + SmallVector<VPRecipeBase *, 16> Worklist; + Worklist.push_back(Root); + + // Traverse the backward slice of Root through its use-def chain. + while (!Worklist.empty()) { + VPRecipeBase *CurRec = Worklist.back(); + Worklist.pop_back(); + + if (!Visited.insert(CurRec).second) + continue; + + // Prune search if we find another recipe generating a widen memory + // instruction. Widen memory instructions involved in address computation + // will lead to gather/scatter instructions, which don't need to be + // handled. + if (isa<VPWidenMemoryInstructionRecipe>(CurRec) || + isa<VPInterleaveRecipe>(CurRec)) + continue; + + // This recipe contributes to the address computation of a widen + // load/store. Collect recipe if its underlying instruction has + // poison-generating flags. + Instruction *Instr = CurRec->getUnderlyingInstr(); + if (Instr && Instr->hasPoisonGeneratingFlags()) + State.MayGeneratePoisonRecipes.insert(CurRec); + + // Add new definitions to the worklist. + for (VPValue *operand : CurRec->operands()) + if (VPDef *OpDef = operand->getDef()) + Worklist.push_back(cast<VPRecipeBase>(OpDef)); + } + }); + + // Traverse all the recipes in the VPlan and collect the poison-generating + // recipes in the backward slice starting at the address of a VPWidenRecipe or + // VPInterleaveRecipe. + auto Iter = depth_first( + VPBlockRecursiveTraversalWrapper<VPBlockBase *>(State.Plan->getEntry())); + for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Iter)) { + for (VPRecipeBase &Recipe : *VPBB) { + if (auto *WidenRec = dyn_cast<VPWidenMemoryInstructionRecipe>(&Recipe)) { + Instruction *UnderlyingInstr = WidenRec->getUnderlyingInstr(); + VPDef *AddrDef = WidenRec->getAddr()->getDef(); + if (AddrDef && WidenRec->isConsecutive() && UnderlyingInstr && + Legal->blockNeedsPredication(UnderlyingInstr->getParent())) + collectPoisonGeneratingInstrsInBackwardSlice( + cast<VPRecipeBase>(AddrDef)); + } else if (auto *InterleaveRec = dyn_cast<VPInterleaveRecipe>(&Recipe)) { + VPDef *AddrDef = InterleaveRec->getAddr()->getDef(); + if (AddrDef) { + // Check if any member of the interleave group needs predication. + const InterleaveGroup<Instruction> *InterGroup = + InterleaveRec->getInterleaveGroup(); + bool NeedPredication = false; + for (int I = 0, NumMembers = InterGroup->getNumMembers(); + I < NumMembers; ++I) { + Instruction *Member = InterGroup->getMember(I); + if (Member) + NeedPredication |= + Legal->blockNeedsPredication(Member->getParent()); + } + + if (NeedPredication) + collectPoisonGeneratingInstrsInBackwardSlice( + cast<VPRecipeBase>(AddrDef)); + } + } + } + } +} + void InnerLoopVectorizer::addMetadata(Instruction *To, Instruction *From) { propagateMetadata(To, From); @@ -1541,7 +1607,16 @@ public: // Returns true if \p I is an instruction that will be predicated either // through scalar predication or masked load/store or masked gather/scatter. // Superset of instructions that return true for isScalarWithPredication. - bool isPredicatedInst(Instruction *I) { + bool isPredicatedInst(Instruction *I, bool IsKnownUniform = false) { + // When we know the load is uniform and the original scalar loop was not + // predicated we don't need to mark it as a predicated instruction. Any + // vectorised blocks created when tail-folding are something artificial we + // have introduced and we know there is always at least one active lane. + // That's why we call Legal->blockNeedsPredication here because it doesn't + // query tail-folding. + if (IsKnownUniform && isa<LoadInst>(I) && + !Legal->blockNeedsPredication(I->getParent())) + return false; if (!blockNeedsPredicationForAnyReason(I->getParent())) return false; // Loads and stores that need some form of masked operation are predicated @@ -1816,9 +1891,11 @@ private: /// Collect the instructions that are scalar after vectorization. An /// instruction is scalar if it is known to be uniform or will be scalarized - /// during vectorization. Non-uniform scalarized instructions will be - /// represented by VF values in the vectorized loop, each corresponding to an - /// iteration of the original scalar loop. + /// during vectorization. collectLoopScalars should only add non-uniform nodes + /// to the list if they are used by a load/store instruction that is marked as + /// CM_Scalarize. Non-uniform scalarized instructions will be represented by + /// VF values in the vectorized loop, each corresponding to an iteration of + /// the original scalar loop. void collectLoopScalars(ElementCount VF); /// Keeps cost model vectorization decision and cost for instructions. @@ -2918,132 +2995,8 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup( } } -void InnerLoopVectorizer::vectorizeMemoryInstruction( - Instruction *Instr, VPTransformState &State, VPValue *Def, VPValue *Addr, - VPValue *StoredValue, VPValue *BlockInMask, bool ConsecutiveStride, - bool Reverse) { - // Attempt to issue a wide load. - LoadInst *LI = dyn_cast<LoadInst>(Instr); - StoreInst *SI = dyn_cast<StoreInst>(Instr); - - assert((LI || SI) && "Invalid Load/Store instruction"); - assert((!SI || StoredValue) && "No stored value provided for widened store"); - assert((!LI || !StoredValue) && "Stored value provided for widened load"); - - Type *ScalarDataTy = getLoadStoreType(Instr); - - auto *DataTy = VectorType::get(ScalarDataTy, VF); - const Align Alignment = getLoadStoreAlignment(Instr); - bool CreateGatherScatter = !ConsecutiveStride; - - VectorParts BlockInMaskParts(UF); - bool isMaskRequired = BlockInMask; - if (isMaskRequired) - for (unsigned Part = 0; Part < UF; ++Part) - BlockInMaskParts[Part] = State.get(BlockInMask, Part); - - const auto CreateVecPtr = [&](unsigned Part, Value *Ptr) -> Value * { - // Calculate the pointer for the specific unroll-part. - GetElementPtrInst *PartPtr = nullptr; - - bool InBounds = false; - if (auto *gep = dyn_cast<GetElementPtrInst>(Ptr->stripPointerCasts())) - InBounds = gep->isInBounds(); - if (Reverse) { - // If the address is consecutive but reversed, then the - // wide store needs to start at the last vector element. - // RunTimeVF = VScale * VF.getKnownMinValue() - // For fixed-width VScale is 1, then RunTimeVF = VF.getKnownMinValue() - Value *RunTimeVF = getRuntimeVF(Builder, Builder.getInt32Ty(), VF); - // NumElt = -Part * RunTimeVF - Value *NumElt = Builder.CreateMul(Builder.getInt32(-Part), RunTimeVF); - // LastLane = 1 - RunTimeVF - Value *LastLane = Builder.CreateSub(Builder.getInt32(1), RunTimeVF); - PartPtr = - cast<GetElementPtrInst>(Builder.CreateGEP(ScalarDataTy, Ptr, NumElt)); - PartPtr->setIsInBounds(InBounds); - PartPtr = cast<GetElementPtrInst>( - Builder.CreateGEP(ScalarDataTy, PartPtr, LastLane)); - PartPtr->setIsInBounds(InBounds); - if (isMaskRequired) // Reverse of a null all-one mask is a null mask. - BlockInMaskParts[Part] = reverseVector(BlockInMaskParts[Part]); - } else { - Value *Increment = - createStepForVF(Builder, Builder.getInt32Ty(), VF, Part); - PartPtr = cast<GetElementPtrInst>( - Builder.CreateGEP(ScalarDataTy, Ptr, Increment)); - PartPtr->setIsInBounds(InBounds); - } - - unsigned AddressSpace = Ptr->getType()->getPointerAddressSpace(); - return Builder.CreateBitCast(PartPtr, DataTy->getPointerTo(AddressSpace)); - }; - - // Handle Stores: - if (SI) { - setDebugLocFromInst(SI); - - for (unsigned Part = 0; Part < UF; ++Part) { - Instruction *NewSI = nullptr; - Value *StoredVal = State.get(StoredValue, Part); - if (CreateGatherScatter) { - Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr; - Value *VectorGep = State.get(Addr, Part); - NewSI = Builder.CreateMaskedScatter(StoredVal, VectorGep, Alignment, - MaskPart); - } else { - if (Reverse) { - // If we store to reverse consecutive memory locations, then we need - // to reverse the order of elements in the stored value. - StoredVal = reverseVector(StoredVal); - // We don't want to update the value in the map as it might be used in - // another expression. So don't call resetVectorValue(StoredVal). - } - auto *VecPtr = CreateVecPtr(Part, State.get(Addr, VPIteration(0, 0))); - if (isMaskRequired) - NewSI = Builder.CreateMaskedStore(StoredVal, VecPtr, Alignment, - BlockInMaskParts[Part]); - else - NewSI = Builder.CreateAlignedStore(StoredVal, VecPtr, Alignment); - } - addMetadata(NewSI, SI); - } - return; - } - - // Handle loads. - assert(LI && "Must have a load instruction"); - setDebugLocFromInst(LI); - for (unsigned Part = 0; Part < UF; ++Part) { - Value *NewLI; - if (CreateGatherScatter) { - Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr; - Value *VectorGep = State.get(Addr, Part); - NewLI = Builder.CreateMaskedGather(DataTy, VectorGep, Alignment, MaskPart, - nullptr, "wide.masked.gather"); - addMetadata(NewLI, LI); - } else { - auto *VecPtr = CreateVecPtr(Part, State.get(Addr, VPIteration(0, 0))); - if (isMaskRequired) - NewLI = Builder.CreateMaskedLoad( - DataTy, VecPtr, Alignment, BlockInMaskParts[Part], - PoisonValue::get(DataTy), "wide.masked.load"); - else - NewLI = - Builder.CreateAlignedLoad(DataTy, VecPtr, Alignment, "wide.load"); - - // Add metadata to the load, but setVectorValue to the reverse shuffle. - addMetadata(NewLI, LI); - if (Reverse) - NewLI = reverseVector(NewLI); - } - - State.set(Def, NewLI, Part); - } -} - -void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, VPValue *Def, - VPUser &User, +void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, + VPReplicateRecipe *RepRecipe, const VPIteration &Instance, bool IfPredicateInstr, VPTransformState &State) { @@ -3064,17 +3017,26 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, VPValue *Def, if (!IsVoidRetTy) Cloned->setName(Instr->getName() + ".cloned"); + // If the scalarized instruction contributes to the address computation of a + // widen masked load/store which was in a basic block that needed predication + // and is not predicated after vectorization, we can't propagate + // poison-generating flags (nuw/nsw, exact, inbounds, etc.). The scalarized + // instruction could feed a poison value to the base address of the widen + // load/store. + if (State.MayGeneratePoisonRecipes.count(RepRecipe) > 0) + Cloned->dropPoisonGeneratingFlags(); + State.Builder.SetInsertPoint(Builder.GetInsertBlock(), Builder.GetInsertPoint()); // Replace the operands of the cloned instructions with their scalar // equivalents in the new loop. - for (unsigned op = 0, e = User.getNumOperands(); op != e; ++op) { + for (unsigned op = 0, e = RepRecipe->getNumOperands(); op != e; ++op) { auto *Operand = dyn_cast<Instruction>(Instr->getOperand(op)); auto InputInstance = Instance; if (!Operand || !OrigLoop->contains(Operand) || (Cost->isUniformAfterVectorization(Operand, State.VF))) InputInstance.Lane = VPLane::getFirstLane(); - auto *NewOp = State.get(User.getOperand(op), InputInstance); + auto *NewOp = State.get(RepRecipe->getOperand(op), InputInstance); Cloned->setOperand(op, NewOp); } addNewMetadata(Cloned, Instr); @@ -3082,7 +3044,7 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, VPValue *Def, // Place the cloned scalar in the new loop. Builder.Insert(Cloned); - State.set(Def, Cloned, Instance); + State.set(RepRecipe, Cloned, Instance); // If we just cloned a new assumption, add it the assumption cache. if (auto *II = dyn_cast<AssumeInst>(Cloned)) @@ -4615,77 +4577,6 @@ bool InnerLoopVectorizer::useOrderedReductions(RecurrenceDescriptor &RdxDesc) { return Cost->useOrderedReductions(RdxDesc); } -void InnerLoopVectorizer::widenGEP(GetElementPtrInst *GEP, VPValue *VPDef, - VPUser &Operands, unsigned UF, - ElementCount VF, bool IsPtrLoopInvariant, - SmallBitVector &IsIndexLoopInvariant, - VPTransformState &State) { - // Construct a vector GEP by widening the operands of the scalar GEP as - // necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP - // results in a vector of pointers when at least one operand of the GEP - // is vector-typed. Thus, to keep the representation compact, we only use - // vector-typed operands for loop-varying values. - - if (VF.isVector() && IsPtrLoopInvariant && IsIndexLoopInvariant.all()) { - // If we are vectorizing, but the GEP has only loop-invariant operands, - // the GEP we build (by only using vector-typed operands for - // loop-varying values) would be a scalar pointer. Thus, to ensure we - // produce a vector of pointers, we need to either arbitrarily pick an - // operand to broadcast, or broadcast a clone of the original GEP. - // Here, we broadcast a clone of the original. - // - // TODO: If at some point we decide to scalarize instructions having - // loop-invariant operands, this special case will no longer be - // required. We would add the scalarization decision to - // collectLoopScalars() and teach getVectorValue() to broadcast - // the lane-zero scalar value. - auto *Clone = Builder.Insert(GEP->clone()); - for (unsigned Part = 0; Part < UF; ++Part) { - Value *EntryPart = Builder.CreateVectorSplat(VF, Clone); - State.set(VPDef, EntryPart, Part); - addMetadata(EntryPart, GEP); - } - } else { - // If the GEP has at least one loop-varying operand, we are sure to - // produce a vector of pointers. But if we are only unrolling, we want - // to produce a scalar GEP for each unroll part. Thus, the GEP we - // produce with the code below will be scalar (if VF == 1) or vector - // (otherwise). Note that for the unroll-only case, we still maintain - // values in the vector mapping with initVector, as we do for other - // instructions. - for (unsigned Part = 0; Part < UF; ++Part) { - // The pointer operand of the new GEP. If it's loop-invariant, we - // won't broadcast it. - auto *Ptr = IsPtrLoopInvariant - ? State.get(Operands.getOperand(0), VPIteration(0, 0)) - : State.get(Operands.getOperand(0), Part); - - // Collect all the indices for the new GEP. If any index is - // loop-invariant, we won't broadcast it. - SmallVector<Value *, 4> Indices; - for (unsigned I = 1, E = Operands.getNumOperands(); I < E; I++) { - VPValue *Operand = Operands.getOperand(I); - if (IsIndexLoopInvariant[I - 1]) - Indices.push_back(State.get(Operand, VPIteration(0, 0))); - else - Indices.push_back(State.get(Operand, Part)); - } - - // Create the new GEP. Note that this GEP may be a scalar if VF == 1, - // but it should be a vector, otherwise. - auto *NewGEP = - GEP->isInBounds() - ? Builder.CreateInBoundsGEP(GEP->getSourceElementType(), Ptr, - Indices) - : Builder.CreateGEP(GEP->getSourceElementType(), Ptr, Indices); - assert((VF.isScalar() || NewGEP->getType()->isVectorTy()) && - "NewGEP is not a pointer vector"); - State.set(VPDef, NewGEP, Part); - addMetadata(NewGEP, GEP); - } - } -} - void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, VPWidenPHIRecipe *PhiR, VPTransformState &State) { @@ -4745,38 +4636,14 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, // iteration. If the instruction is uniform, we only need to generate the // first lane. Otherwise, we generate all VF values. bool IsUniform = Cost->isUniformAfterVectorization(P, State.VF); - unsigned Lanes = IsUniform ? 1 : State.VF.getKnownMinValue(); - - bool NeedsVectorIndex = !IsUniform && VF.isScalable(); - Value *UnitStepVec = nullptr, *PtrIndSplat = nullptr; - if (NeedsVectorIndex) { - Type *VecIVTy = VectorType::get(PtrInd->getType(), VF); - UnitStepVec = Builder.CreateStepVector(VecIVTy); - PtrIndSplat = Builder.CreateVectorSplat(VF, PtrInd); - } + assert((IsUniform || !State.VF.isScalable()) && + "Cannot scalarize a scalable VF"); + unsigned Lanes = IsUniform ? 1 : State.VF.getFixedValue(); for (unsigned Part = 0; Part < UF; ++Part) { Value *PartStart = createStepForVF(Builder, PtrInd->getType(), VF, Part); - if (NeedsVectorIndex) { - // Here we cache the whole vector, which means we can support the - // extraction of any lane. However, in some cases the extractelement - // instruction that is generated for scalar uses of this vector (e.g. - // a load instruction) is not folded away. Therefore we still - // calculate values for the first n lanes to avoid redundant moves - // (when extracting the 0th element) and to produce scalar code (i.e. - // additional add/gep instructions instead of expensive extractelement - // instructions) when extracting higher-order elements. - Value *PartStartSplat = Builder.CreateVectorSplat(VF, PartStart); - Value *Indices = Builder.CreateAdd(PartStartSplat, UnitStepVec); - Value *GlobalIndices = Builder.CreateAdd(PtrIndSplat, Indices); - Value *SclrGep = - emitTransformedIndex(Builder, GlobalIndices, PSE.getSE(), DL, II); - SclrGep->setName("next.gep"); - State.set(PhiR, SclrGep, Part); - } - for (unsigned Lane = 0; Lane < Lanes; ++Lane) { Value *Idx = Builder.CreateAdd( PartStart, ConstantInt::get(PtrInd->getType(), Lane)); @@ -4858,114 +4725,6 @@ static bool mayDivideByZero(Instruction &I) { return !CInt || CInt->isZero(); } -void InnerLoopVectorizer::widenInstruction(Instruction &I, VPValue *Def, - VPUser &User, - VPTransformState &State) { - switch (I.getOpcode()) { - case Instruction::Call: - case Instruction::Br: - case Instruction::PHI: - case Instruction::GetElementPtr: - case Instruction::Select: - llvm_unreachable("This instruction is handled by a different recipe."); - case Instruction::UDiv: - case Instruction::SDiv: - case Instruction::SRem: - case Instruction::URem: - case Instruction::Add: - case Instruction::FAdd: - case Instruction::Sub: - case Instruction::FSub: - case Instruction::FNeg: - case Instruction::Mul: - case Instruction::FMul: - case Instruction::FDiv: - case Instruction::FRem: - case Instruction::Shl: - case Instruction::LShr: - case Instruction::AShr: - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: { - // Just widen unops and binops. - setDebugLocFromInst(&I); - - for (unsigned Part = 0; Part < UF; ++Part) { - SmallVector<Value *, 2> Ops; - for (VPValue *VPOp : User.operands()) - Ops.push_back(State.get(VPOp, Part)); - - Value *V = Builder.CreateNAryOp(I.getOpcode(), Ops); - - if (auto *VecOp = dyn_cast<Instruction>(V)) - VecOp->copyIRFlags(&I); - - // Use this vector value for all users of the original instruction. - State.set(Def, V, Part); - addMetadata(V, &I); - } - - break; - } - case Instruction::ICmp: - case Instruction::FCmp: { - // Widen compares. Generate vector compares. - bool FCmp = (I.getOpcode() == Instruction::FCmp); - auto *Cmp = cast<CmpInst>(&I); - setDebugLocFromInst(Cmp); - for (unsigned Part = 0; Part < UF; ++Part) { - Value *A = State.get(User.getOperand(0), Part); - Value *B = State.get(User.getOperand(1), Part); - Value *C = nullptr; - if (FCmp) { - // Propagate fast math flags. - IRBuilder<>::FastMathFlagGuard FMFG(Builder); - Builder.setFastMathFlags(Cmp->getFastMathFlags()); - C = Builder.CreateFCmp(Cmp->getPredicate(), A, B); - } else { - C = Builder.CreateICmp(Cmp->getPredicate(), A, B); - } - State.set(Def, C, Part); - addMetadata(C, &I); - } - - break; - } - - case Instruction::ZExt: - case Instruction::SExt: - case Instruction::FPToUI: - case Instruction::FPToSI: - case Instruction::FPExt: - case Instruction::PtrToInt: - case Instruction::IntToPtr: - case Instruction::SIToFP: - case Instruction::UIToFP: - case Instruction::Trunc: - case Instruction::FPTrunc: - case Instruction::BitCast: { - auto *CI = cast<CastInst>(&I); - setDebugLocFromInst(CI); - - /// Vectorize casts. - Type *DestTy = - (VF.isScalar()) ? CI->getType() : VectorType::get(CI->getType(), VF); - - for (unsigned Part = 0; Part < UF; ++Part) { - Value *A = State.get(User.getOperand(0), Part); - Value *Cast = Builder.CreateCast(CI->getOpcode(), A, DestTy); - State.set(Def, Cast, Part); - addMetadata(Cast, &I); - } - break; - } - default: - // This instruction is not vectorized by simple widening. - LLVM_DEBUG(dbgs() << "LV: Found an unhandled instruction: " << I); - llvm_unreachable("Unhandled instruction!"); - } // end of switch. -} - void InnerLoopVectorizer::widenCallInstruction(CallInst &I, VPValue *Def, VPUser &ArgOperands, VPTransformState &State) { @@ -5039,31 +4798,6 @@ void InnerLoopVectorizer::widenCallInstruction(CallInst &I, VPValue *Def, } } -void InnerLoopVectorizer::widenSelectInstruction(SelectInst &I, VPValue *VPDef, - VPUser &Operands, - bool InvariantCond, - VPTransformState &State) { - setDebugLocFromInst(&I); - - // The condition can be loop invariant but still defined inside the - // loop. This means that we can't just use the original 'cond' value. - // We have to take the 'vectorized' value and pick the first lane. - // Instcombine will make this a no-op. - auto *InvarCond = InvariantCond - ? State.get(Operands.getOperand(0), VPIteration(0, 0)) - : nullptr; - - for (unsigned Part = 0; Part < UF; ++Part) { - Value *Cond = - InvarCond ? InvarCond : State.get(Operands.getOperand(0), Part); - Value *Op0 = State.get(Operands.getOperand(1), Part); - Value *Op1 = State.get(Operands.getOperand(2), Part); - Value *Sel = Builder.CreateSelect(Cond, Op0, Op1); - State.set(VPDef, Sel, Part); - addMetadata(Sel, &I); - } -} - void LoopVectorizationCostModel::collectLoopScalars(ElementCount VF) { // We should not collect Scalars more than once per VF. Right now, this // function is called from collectUniformsAndScalars(), which already does @@ -5103,38 +4837,11 @@ void LoopVectorizationCostModel::collectLoopScalars(ElementCount VF) { !TheLoop->isLoopInvariant(V); }; - auto isScalarPtrInduction = [&](Instruction *MemAccess, Value *Ptr) { - if (!isa<PHINode>(Ptr) || - !Legal->getInductionVars().count(cast<PHINode>(Ptr))) - return false; - auto &Induction = Legal->getInductionVars()[cast<PHINode>(Ptr)]; - if (Induction.getKind() != InductionDescriptor::IK_PtrInduction) - return false; - return isScalarUse(MemAccess, Ptr); - }; - - // A helper that evaluates a memory access's use of a pointer. If the - // pointer is actually the pointer induction of a loop, it is being - // inserted into Worklist. If the use will be a scalar use, and the - // pointer is only used by memory accesses, we place the pointer in - // ScalarPtrs. Otherwise, the pointer is placed in PossibleNonScalarPtrs. + // A helper that evaluates a memory access's use of a pointer. If the use will + // be a scalar use and the pointer is only used by memory accesses, we place + // the pointer in ScalarPtrs. Otherwise, the pointer is placed in + // PossibleNonScalarPtrs. auto evaluatePtrUse = [&](Instruction *MemAccess, Value *Ptr) { - if (isScalarPtrInduction(MemAccess, Ptr)) { - Worklist.insert(cast<Instruction>(Ptr)); - LLVM_DEBUG(dbgs() << "LV: Found new scalar instruction: " << *Ptr - << "\n"); - - Instruction *Update = cast<Instruction>( - cast<PHINode>(Ptr)->getIncomingValueForBlock(Latch)); - - // If there is more than one user of Update (Ptr), we shouldn't assume it - // will be scalar after vectorisation as other users of the instruction - // may require widening. Otherwise, add it to ScalarPtrs. - if (Update->hasOneUse() && cast<Value>(*Update->user_begin()) == Ptr) { - ScalarPtrs.insert(Update); - return; - } - } // We only care about bitcast and getelementptr instructions contained in // the loop. if (!isLoopVaryingBitCastOrGEP(Ptr)) @@ -5226,11 +4933,22 @@ void LoopVectorizationCostModel::collectLoopScalars(ElementCount VF) { if (Ind == Legal->getPrimaryInduction() && foldTailByMasking()) continue; + // Returns true if \p Indvar is a pointer induction that is used directly by + // load/store instruction \p I. + auto IsDirectLoadStoreFromPtrIndvar = [&](Instruction *Indvar, + Instruction *I) { + return Induction.second.getKind() == + InductionDescriptor::IK_PtrInduction && + (isa<LoadInst>(I) || isa<StoreInst>(I)) && + Indvar == getLoadStorePointerOperand(I) && isScalarUse(I, Indvar); + }; + // Determine if all users of the induction variable are scalar after // vectorization. auto ScalarInd = llvm::all_of(Ind->users(), [&](User *U) -> bool { auto *I = cast<Instruction>(U); - return I == IndUpdate || !TheLoop->contains(I) || Worklist.count(I); + return I == IndUpdate || !TheLoop->contains(I) || Worklist.count(I) || + IsDirectLoadStoreFromPtrIndvar(Ind, I); }); if (!ScalarInd) continue; @@ -5240,7 +4958,8 @@ void LoopVectorizationCostModel::collectLoopScalars(ElementCount VF) { auto ScalarIndUpdate = llvm::all_of(IndUpdate->users(), [&](User *U) -> bool { auto *I = cast<Instruction>(U); - return I == Ind || !TheLoop->contains(I) || Worklist.count(I); + return I == Ind || !TheLoop->contains(I) || Worklist.count(I) || + IsDirectLoadStoreFromPtrIndvar(IndUpdate, I); }); if (!ScalarIndUpdate) continue; @@ -7079,6 +6798,8 @@ LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I, unsigned AS = getLoadStoreAddressSpace(I); Value *Ptr = getLoadStorePointerOperand(I); Type *PtrTy = ToVectorTy(Ptr->getType(), VF); + // NOTE: PtrTy is a vector to signal `TTI::getAddressComputationCost` + // that it is being called from this specific place. // Figure out whether the access is strided and get the stride value // if it's known in compile time @@ -7286,6 +7007,12 @@ Optional<InstructionCost> LoopVectorizationCostModel::getReductionPatternCost( InstructionCost BaseCost = TTI.getArithmeticReductionCost( RdxDesc.getOpcode(), VectorTy, RdxDesc.getFastMathFlags(), CostKind); + // For a call to the llvm.fmuladd intrinsic we need to add the cost of a + // normal fmul instruction to the cost of the fadd reduction. + if (RdxDesc.getRecurrenceKind() == RecurKind::FMulAdd) + BaseCost += + TTI.getArithmeticInstrCost(Instruction::FMul, VectorTy, CostKind); + // If we're using ordered reductions then we can just return the base cost // here, since getArithmeticReductionCost calculates the full ordered // reduction cost when FP reassociation is not allowed. @@ -7962,6 +7689,9 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF, return TTI.getCastInstrCost(Opcode, VectorTy, SrcVecTy, CCH, CostKind, I); } case Instruction::Call: { + if (RecurrenceDescriptor::isFMulAddIntrinsic(I)) + if (auto RedCost = getReductionPatternCost(I, VF, VectorTy, CostKind)) + return *RedCost; bool NeedToScalarize; CallInst *CI = cast<CallInst>(I); InstructionCost CallCost = getVectorCallCost(CI, VF, NeedToScalarize); @@ -8260,6 +7990,7 @@ void LoopVectorizationPlanner::executePlan(ElementCount BestVF, unsigned BestUF, State.CFG.PrevBB = ILV.createVectorizedLoopSkeleton(); State.TripCount = ILV.getOrCreateTripCount(nullptr); State.CanonicalIV = ILV.Induction; + ILV.collectPoisonGeneratingRecipes(State); ILV.printDebugTracesAtStart(); @@ -8468,7 +8199,8 @@ void EpilogueVectorizerMainLoop::printDebugTracesAtStart() { void EpilogueVectorizerMainLoop::printDebugTracesAtEnd() { DEBUG_WITH_TYPE(VerboseDebug, { - dbgs() << "intermediate fn:\n" << *Induction->getFunction() << "\n"; + dbgs() << "intermediate fn:\n" + << *OrigLoop->getHeader()->getParent() << "\n"; }); } @@ -8666,7 +8398,7 @@ void EpilogueVectorizerEpilogueLoop::printDebugTracesAtStart() { void EpilogueVectorizerEpilogueLoop::printDebugTracesAtEnd() { DEBUG_WITH_TYPE(VerboseDebug, { - dbgs() << "final fn:\n" << *Induction->getFunction() << "\n"; + dbgs() << "final fn:\n" << *OrigLoop->getHeader()->getParent() << "\n"; }); } @@ -9052,7 +8784,8 @@ VPBasicBlock *VPRecipeBuilder::handleReplication( Range); bool IsPredicated = LoopVectorizationPlanner::getDecisionAndClampRange( - [&](ElementCount VF) { return CM.isPredicatedInst(I); }, Range); + [&](ElementCount VF) { return CM.isPredicatedInst(I, IsUniform); }, + Range); // Even if the instruction is not marked as uniform, there are certain // intrinsic calls that can be effectively treated as such, so we check for @@ -9354,7 +9087,9 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( if (VPBB) VPBlockUtils::insertBlockAfter(FirstVPBBForBB, VPBB); else { - Plan->setEntry(FirstVPBBForBB); + auto *TopRegion = new VPRegionBlock("vector loop"); + TopRegion->setEntry(FirstVPBBForBB); + Plan->setEntry(TopRegion); HeaderVPBB = FirstVPBBForBB; } VPBB = FirstVPBBForBB; @@ -9426,9 +9161,11 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( } } - assert(isa<VPBasicBlock>(Plan->getEntry()) && + assert(isa<VPRegionBlock>(Plan->getEntry()) && !Plan->getEntry()->getEntryBasicBlock()->empty() && - "entry block must be set to a non-empty VPBasicBlock"); + "entry block must be set to a VPRegionBlock having a non-empty entry " + "VPBasicBlock"); + cast<VPRegionBlock>(Plan->getEntry())->setExit(VPBB); RecipeBuilder.fixHeaderPhis(); // --------------------------------------------------------------------------- @@ -9653,12 +9390,17 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( unsigned FirstOpId; assert(!RecurrenceDescriptor::isSelectCmpRecurrenceKind(Kind) && "Only min/max recurrences allowed for inloop reductions"); + // Recognize a call to the llvm.fmuladd intrinsic. + bool IsFMulAdd = (Kind == RecurKind::FMulAdd); + assert((!IsFMulAdd || RecurrenceDescriptor::isFMulAddIntrinsic(R)) && + "Expected instruction to be a call to the llvm.fmuladd intrinsic"); if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind)) { assert(isa<VPWidenSelectRecipe>(WidenRecipe) && "Expected to replace a VPWidenSelectSC"); FirstOpId = 1; } else { - assert((MinVF.isScalar() || isa<VPWidenRecipe>(WidenRecipe)) && + assert((MinVF.isScalar() || isa<VPWidenRecipe>(WidenRecipe) || + (IsFMulAdd && isa<VPWidenCallRecipe>(WidenRecipe))) && "Expected to replace a VPWidenSC"); FirstOpId = 0; } @@ -9669,8 +9411,20 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( auto *CondOp = CM.foldTailByMasking() ? RecipeBuilder.createBlockInMask(R->getParent(), Plan) : nullptr; - VPReductionRecipe *RedRecipe = new VPReductionRecipe( - &RdxDesc, R, ChainOp, VecOp, CondOp, TTI); + + if (IsFMulAdd) { + // If the instruction is a call to the llvm.fmuladd intrinsic then we + // need to create an fmul recipe to use as the vector operand for the + // fadd reduction. + VPInstruction *FMulRecipe = new VPInstruction( + Instruction::FMul, {VecOp, Plan->getVPValue(R->getOperand(1))}); + FMulRecipe->setFastMathFlags(R->getFastMathFlags()); + WidenRecipe->getParent()->insert(FMulRecipe, + WidenRecipe->getIterator()); + VecOp = FMulRecipe; + } + VPReductionRecipe *RedRecipe = + new VPReductionRecipe(&RdxDesc, R, ChainOp, VecOp, CondOp, TTI); WidenRecipe->getVPSingleValue()->replaceAllUsesWith(RedRecipe); Plan->removeVPValueFor(R); Plan->addVPValue(R, RedRecipe); @@ -9744,18 +9498,218 @@ void VPWidenCallRecipe::execute(VPTransformState &State) { } void VPWidenSelectRecipe::execute(VPTransformState &State) { - State.ILV->widenSelectInstruction(*cast<SelectInst>(getUnderlyingInstr()), - this, *this, InvariantCond, State); + auto &I = *cast<SelectInst>(getUnderlyingInstr()); + State.ILV->setDebugLocFromInst(&I); + + // The condition can be loop invariant but still defined inside the + // loop. This means that we can't just use the original 'cond' value. + // We have to take the 'vectorized' value and pick the first lane. + // Instcombine will make this a no-op. + auto *InvarCond = + InvariantCond ? State.get(getOperand(0), VPIteration(0, 0)) : nullptr; + + for (unsigned Part = 0; Part < State.UF; ++Part) { + Value *Cond = InvarCond ? InvarCond : State.get(getOperand(0), Part); + Value *Op0 = State.get(getOperand(1), Part); + Value *Op1 = State.get(getOperand(2), Part); + Value *Sel = State.Builder.CreateSelect(Cond, Op0, Op1); + State.set(this, Sel, Part); + State.ILV->addMetadata(Sel, &I); + } } void VPWidenRecipe::execute(VPTransformState &State) { - State.ILV->widenInstruction(*getUnderlyingInstr(), this, *this, State); + auto &I = *cast<Instruction>(getUnderlyingValue()); + auto &Builder = State.Builder; + switch (I.getOpcode()) { + case Instruction::Call: + case Instruction::Br: + case Instruction::PHI: + case Instruction::GetElementPtr: + case Instruction::Select: + llvm_unreachable("This instruction is handled by a different recipe."); + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::SRem: + case Instruction::URem: + case Instruction::Add: + case Instruction::FAdd: + case Instruction::Sub: + case Instruction::FSub: + case Instruction::FNeg: + case Instruction::Mul: + case Instruction::FMul: + case Instruction::FDiv: + case Instruction::FRem: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: { + // Just widen unops and binops. + State.ILV->setDebugLocFromInst(&I); + + for (unsigned Part = 0; Part < State.UF; ++Part) { + SmallVector<Value *, 2> Ops; + for (VPValue *VPOp : operands()) + Ops.push_back(State.get(VPOp, Part)); + + Value *V = Builder.CreateNAryOp(I.getOpcode(), Ops); + + if (auto *VecOp = dyn_cast<Instruction>(V)) { + VecOp->copyIRFlags(&I); + + // If the instruction is vectorized and was in a basic block that needed + // predication, we can't propagate poison-generating flags (nuw/nsw, + // exact, etc.). The control flow has been linearized and the + // instruction is no longer guarded by the predicate, which could make + // the flag properties to no longer hold. + if (State.MayGeneratePoisonRecipes.count(this) > 0) + VecOp->dropPoisonGeneratingFlags(); + } + + // Use this vector value for all users of the original instruction. + State.set(this, V, Part); + State.ILV->addMetadata(V, &I); + } + + break; + } + case Instruction::ICmp: + case Instruction::FCmp: { + // Widen compares. Generate vector compares. + bool FCmp = (I.getOpcode() == Instruction::FCmp); + auto *Cmp = cast<CmpInst>(&I); + State.ILV->setDebugLocFromInst(Cmp); + for (unsigned Part = 0; Part < State.UF; ++Part) { + Value *A = State.get(getOperand(0), Part); + Value *B = State.get(getOperand(1), Part); + Value *C = nullptr; + if (FCmp) { + // Propagate fast math flags. + IRBuilder<>::FastMathFlagGuard FMFG(Builder); + Builder.setFastMathFlags(Cmp->getFastMathFlags()); + C = Builder.CreateFCmp(Cmp->getPredicate(), A, B); + } else { + C = Builder.CreateICmp(Cmp->getPredicate(), A, B); + } + State.set(this, C, Part); + State.ILV->addMetadata(C, &I); + } + + break; + } + + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::FPExt: + case Instruction::PtrToInt: + case Instruction::IntToPtr: + case Instruction::SIToFP: + case Instruction::UIToFP: + case Instruction::Trunc: + case Instruction::FPTrunc: + case Instruction::BitCast: { + auto *CI = cast<CastInst>(&I); + State.ILV->setDebugLocFromInst(CI); + + /// Vectorize casts. + Type *DestTy = (State.VF.isScalar()) + ? CI->getType() + : VectorType::get(CI->getType(), State.VF); + + for (unsigned Part = 0; Part < State.UF; ++Part) { + Value *A = State.get(getOperand(0), Part); + Value *Cast = Builder.CreateCast(CI->getOpcode(), A, DestTy); + State.set(this, Cast, Part); + State.ILV->addMetadata(Cast, &I); + } + break; + } + default: + // This instruction is not vectorized by simple widening. + LLVM_DEBUG(dbgs() << "LV: Found an unhandled instruction: " << I); + llvm_unreachable("Unhandled instruction!"); + } // end of switch. } void VPWidenGEPRecipe::execute(VPTransformState &State) { - State.ILV->widenGEP(cast<GetElementPtrInst>(getUnderlyingInstr()), this, - *this, State.UF, State.VF, IsPtrLoopInvariant, - IsIndexLoopInvariant, State); + auto *GEP = cast<GetElementPtrInst>(getUnderlyingInstr()); + // Construct a vector GEP by widening the operands of the scalar GEP as + // necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP + // results in a vector of pointers when at least one operand of the GEP + // is vector-typed. Thus, to keep the representation compact, we only use + // vector-typed operands for loop-varying values. + + if (State.VF.isVector() && IsPtrLoopInvariant && IsIndexLoopInvariant.all()) { + // If we are vectorizing, but the GEP has only loop-invariant operands, + // the GEP we build (by only using vector-typed operands for + // loop-varying values) would be a scalar pointer. Thus, to ensure we + // produce a vector of pointers, we need to either arbitrarily pick an + // operand to broadcast, or broadcast a clone of the original GEP. + // Here, we broadcast a clone of the original. + // + // TODO: If at some point we decide to scalarize instructions having + // loop-invariant operands, this special case will no longer be + // required. We would add the scalarization decision to + // collectLoopScalars() and teach getVectorValue() to broadcast + // the lane-zero scalar value. + auto *Clone = State.Builder.Insert(GEP->clone()); + for (unsigned Part = 0; Part < State.UF; ++Part) { + Value *EntryPart = State.Builder.CreateVectorSplat(State.VF, Clone); + State.set(this, EntryPart, Part); + State.ILV->addMetadata(EntryPart, GEP); + } + } else { + // If the GEP has at least one loop-varying operand, we are sure to + // produce a vector of pointers. But if we are only unrolling, we want + // to produce a scalar GEP for each unroll part. Thus, the GEP we + // produce with the code below will be scalar (if VF == 1) or vector + // (otherwise). Note that for the unroll-only case, we still maintain + // values in the vector mapping with initVector, as we do for other + // instructions. + for (unsigned Part = 0; Part < State.UF; ++Part) { + // The pointer operand of the new GEP. If it's loop-invariant, we + // won't broadcast it. + auto *Ptr = IsPtrLoopInvariant + ? State.get(getOperand(0), VPIteration(0, 0)) + : State.get(getOperand(0), Part); + + // Collect all the indices for the new GEP. If any index is + // loop-invariant, we won't broadcast it. + SmallVector<Value *, 4> Indices; + for (unsigned I = 1, E = getNumOperands(); I < E; I++) { + VPValue *Operand = getOperand(I); + if (IsIndexLoopInvariant[I - 1]) + Indices.push_back(State.get(Operand, VPIteration(0, 0))); + else + Indices.push_back(State.get(Operand, Part)); + } + + // If the GEP instruction is vectorized and was in a basic block that + // needed predication, we can't propagate the poison-generating 'inbounds' + // flag. The control flow has been linearized and the GEP is no longer + // guarded by the predicate, which could make the 'inbounds' properties to + // no longer hold. + bool IsInBounds = + GEP->isInBounds() && State.MayGeneratePoisonRecipes.count(this) == 0; + + // Create the new GEP. Note that this GEP may be a scalar if VF == 1, + // but it should be a vector, otherwise. + auto *NewGEP = IsInBounds + ? State.Builder.CreateInBoundsGEP( + GEP->getSourceElementType(), Ptr, Indices) + : State.Builder.CreateGEP(GEP->getSourceElementType(), + Ptr, Indices); + assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) && + "NewGEP is not a pointer vector"); + State.set(this, NewGEP, Part); + State.ILV->addMetadata(NewGEP, GEP); + } + } } void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) { @@ -9867,8 +9821,8 @@ void VPReductionRecipe::execute(VPTransformState &State) { void VPReplicateRecipe::execute(VPTransformState &State) { if (State.Instance) { // Generate a single instance. assert(!State.VF.isScalable() && "Can't scalarize a scalable vector"); - State.ILV->scalarizeInstruction(getUnderlyingInstr(), this, *this, - *State.Instance, IsPredicated, State); + State.ILV->scalarizeInstruction(getUnderlyingInstr(), this, *State.Instance, + IsPredicated, State); // Insert scalar instance packing it into a vector. if (AlsoPack && State.VF.isVector()) { // If we're constructing lane 0, initialize to start from poison. @@ -9891,7 +9845,7 @@ void VPReplicateRecipe::execute(VPTransformState &State) { "Can't scalarize a scalable vector"); for (unsigned Part = 0; Part < State.UF; ++Part) for (unsigned Lane = 0; Lane < EndLane; ++Lane) - State.ILV->scalarizeInstruction(getUnderlyingInstr(), this, *this, + State.ILV->scalarizeInstruction(getUnderlyingInstr(), this, VPIteration(Part, Lane), IsPredicated, State); } @@ -9970,9 +9924,129 @@ void VPPredInstPHIRecipe::execute(VPTransformState &State) { void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) { VPValue *StoredValue = isStore() ? getStoredValue() : nullptr; - State.ILV->vectorizeMemoryInstruction( - &Ingredient, State, StoredValue ? nullptr : getVPSingleValue(), getAddr(), - StoredValue, getMask(), Consecutive, Reverse); + + // Attempt to issue a wide load. + LoadInst *LI = dyn_cast<LoadInst>(&Ingredient); + StoreInst *SI = dyn_cast<StoreInst>(&Ingredient); + + assert((LI || SI) && "Invalid Load/Store instruction"); + assert((!SI || StoredValue) && "No stored value provided for widened store"); + assert((!LI || !StoredValue) && "Stored value provided for widened load"); + + Type *ScalarDataTy = getLoadStoreType(&Ingredient); + + auto *DataTy = VectorType::get(ScalarDataTy, State.VF); + const Align Alignment = getLoadStoreAlignment(&Ingredient); + bool CreateGatherScatter = !Consecutive; + + auto &Builder = State.Builder; + InnerLoopVectorizer::VectorParts BlockInMaskParts(State.UF); + bool isMaskRequired = getMask(); + if (isMaskRequired) + for (unsigned Part = 0; Part < State.UF; ++Part) + BlockInMaskParts[Part] = State.get(getMask(), Part); + + const auto CreateVecPtr = [&](unsigned Part, Value *Ptr) -> Value * { + // Calculate the pointer for the specific unroll-part. + GetElementPtrInst *PartPtr = nullptr; + + bool InBounds = false; + if (auto *gep = dyn_cast<GetElementPtrInst>(Ptr->stripPointerCasts())) + InBounds = gep->isInBounds(); + if (Reverse) { + // If the address is consecutive but reversed, then the + // wide store needs to start at the last vector element. + // RunTimeVF = VScale * VF.getKnownMinValue() + // For fixed-width VScale is 1, then RunTimeVF = VF.getKnownMinValue() + Value *RunTimeVF = getRuntimeVF(Builder, Builder.getInt32Ty(), State.VF); + // NumElt = -Part * RunTimeVF + Value *NumElt = Builder.CreateMul(Builder.getInt32(-Part), RunTimeVF); + // LastLane = 1 - RunTimeVF + Value *LastLane = Builder.CreateSub(Builder.getInt32(1), RunTimeVF); + PartPtr = + cast<GetElementPtrInst>(Builder.CreateGEP(ScalarDataTy, Ptr, NumElt)); + PartPtr->setIsInBounds(InBounds); + PartPtr = cast<GetElementPtrInst>( + Builder.CreateGEP(ScalarDataTy, PartPtr, LastLane)); + PartPtr->setIsInBounds(InBounds); + if (isMaskRequired) // Reverse of a null all-one mask is a null mask. + BlockInMaskParts[Part] = + Builder.CreateVectorReverse(BlockInMaskParts[Part], "reverse"); + } else { + Value *Increment = + createStepForVF(Builder, Builder.getInt32Ty(), State.VF, Part); + PartPtr = cast<GetElementPtrInst>( + Builder.CreateGEP(ScalarDataTy, Ptr, Increment)); + PartPtr->setIsInBounds(InBounds); + } + + unsigned AddressSpace = Ptr->getType()->getPointerAddressSpace(); + return Builder.CreateBitCast(PartPtr, DataTy->getPointerTo(AddressSpace)); + }; + + // Handle Stores: + if (SI) { + State.ILV->setDebugLocFromInst(SI); + + for (unsigned Part = 0; Part < State.UF; ++Part) { + Instruction *NewSI = nullptr; + Value *StoredVal = State.get(StoredValue, Part); + if (CreateGatherScatter) { + Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr; + Value *VectorGep = State.get(getAddr(), Part); + NewSI = Builder.CreateMaskedScatter(StoredVal, VectorGep, Alignment, + MaskPart); + } else { + if (Reverse) { + // If we store to reverse consecutive memory locations, then we need + // to reverse the order of elements in the stored value. + StoredVal = Builder.CreateVectorReverse(StoredVal, "reverse"); + // We don't want to update the value in the map as it might be used in + // another expression. So don't call resetVectorValue(StoredVal). + } + auto *VecPtr = + CreateVecPtr(Part, State.get(getAddr(), VPIteration(0, 0))); + if (isMaskRequired) + NewSI = Builder.CreateMaskedStore(StoredVal, VecPtr, Alignment, + BlockInMaskParts[Part]); + else + NewSI = Builder.CreateAlignedStore(StoredVal, VecPtr, Alignment); + } + State.ILV->addMetadata(NewSI, SI); + } + return; + } + + // Handle loads. + assert(LI && "Must have a load instruction"); + State.ILV->setDebugLocFromInst(LI); + for (unsigned Part = 0; Part < State.UF; ++Part) { + Value *NewLI; + if (CreateGatherScatter) { + Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr; + Value *VectorGep = State.get(getAddr(), Part); + NewLI = Builder.CreateMaskedGather(DataTy, VectorGep, Alignment, MaskPart, + nullptr, "wide.masked.gather"); + State.ILV->addMetadata(NewLI, LI); + } else { + auto *VecPtr = + CreateVecPtr(Part, State.get(getAddr(), VPIteration(0, 0))); + if (isMaskRequired) + NewLI = Builder.CreateMaskedLoad( + DataTy, VecPtr, Alignment, BlockInMaskParts[Part], + PoisonValue::get(DataTy), "wide.masked.load"); + else + NewLI = + Builder.CreateAlignedLoad(DataTy, VecPtr, Alignment, "wide.load"); + + // Add metadata to the load, but setVectorValue to the reverse shuffle. + State.ILV->addMetadata(NewLI, LI); + if (Reverse) + NewLI = Builder.CreateVectorReverse(NewLI, "reverse"); + } + + State.set(getVPSingleValue(), NewLI, Part); + } } // Determine how to lower the scalar epilogue, which depends on 1) optimising diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index e3ef0b794f68..95061e9053fa 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -283,6 +283,26 @@ static bool isCommutative(Instruction *I) { return false; } +/// Checks if the given value is actually an undefined constant vector. +static bool isUndefVector(const Value *V) { + if (isa<UndefValue>(V)) + return true; + auto *C = dyn_cast<Constant>(V); + if (!C) + return false; + if (!C->containsUndefOrPoisonElement()) + return false; + auto *VecTy = dyn_cast<FixedVectorType>(C->getType()); + if (!VecTy) + return false; + for (unsigned I = 0, E = VecTy->getNumElements(); I != E; ++I) { + if (Constant *Elem = C->getAggregateElement(I)) + if (!isa<UndefValue>(Elem)) + return false; + } + return true; +} + /// Checks if the vector of instructions can be represented as a shuffle, like: /// %x0 = extractelement <4 x i8> %x, i32 0 /// %x3 = extractelement <4 x i8> %x, i32 3 @@ -327,7 +347,11 @@ static bool isCommutative(Instruction *I) { /// TargetTransformInfo::getInstructionThroughput? static Optional<TargetTransformInfo::ShuffleKind> isFixedVectorShuffle(ArrayRef<Value *> VL, SmallVectorImpl<int> &Mask) { - auto *EI0 = cast<ExtractElementInst>(VL[0]); + const auto *It = + find_if(VL, [](Value *V) { return isa<ExtractElementInst>(V); }); + if (It == VL.end()) + return None; + auto *EI0 = cast<ExtractElementInst>(*It); if (isa<ScalableVectorType>(EI0->getVectorOperandType())) return None; unsigned Size = @@ -336,33 +360,41 @@ isFixedVectorShuffle(ArrayRef<Value *> VL, SmallVectorImpl<int> &Mask) { Value *Vec2 = nullptr; enum ShuffleMode { Unknown, Select, Permute }; ShuffleMode CommonShuffleMode = Unknown; + Mask.assign(VL.size(), UndefMaskElem); for (unsigned I = 0, E = VL.size(); I < E; ++I) { + // Undef can be represented as an undef element in a vector. + if (isa<UndefValue>(VL[I])) + continue; auto *EI = cast<ExtractElementInst>(VL[I]); + if (isa<ScalableVectorType>(EI->getVectorOperandType())) + return None; auto *Vec = EI->getVectorOperand(); + // We can extractelement from undef or poison vector. + if (isUndefVector(Vec)) + continue; // All vector operands must have the same number of vector elements. if (cast<FixedVectorType>(Vec->getType())->getNumElements() != Size) return None; + if (isa<UndefValue>(EI->getIndexOperand())) + continue; auto *Idx = dyn_cast<ConstantInt>(EI->getIndexOperand()); if (!Idx) return None; // Undefined behavior if Idx is negative or >= Size. - if (Idx->getValue().uge(Size)) { - Mask.push_back(UndefMaskElem); + if (Idx->getValue().uge(Size)) continue; - } unsigned IntIdx = Idx->getValue().getZExtValue(); - Mask.push_back(IntIdx); - // We can extractelement from undef or poison vector. - if (isa<UndefValue>(Vec)) - continue; + Mask[I] = IntIdx; // For correct shuffling we have to have at most 2 different vector operands // in all extractelement instructions. - if (!Vec1 || Vec1 == Vec) + if (!Vec1 || Vec1 == Vec) { Vec1 = Vec; - else if (!Vec2 || Vec2 == Vec) + } else if (!Vec2 || Vec2 == Vec) { Vec2 = Vec; - else + Mask[I] += Size; + } else { return None; + } if (CommonShuffleMode == Permute) continue; // If the extract index is not the same as the operation number, it is a @@ -1680,6 +1712,28 @@ private: return IsSame(Scalars, ReuseShuffleIndices); } + /// \returns true if current entry has same operands as \p TE. + bool hasEqualOperands(const TreeEntry &TE) const { + if (TE.getNumOperands() != getNumOperands()) + return false; + SmallBitVector Used(getNumOperands()); + for (unsigned I = 0, E = getNumOperands(); I < E; ++I) { + unsigned PrevCount = Used.count(); + for (unsigned K = 0; K < E; ++K) { + if (Used.test(K)) + continue; + if (getOperand(K) == TE.getOperand(I)) { + Used.set(K); + break; + } + } + // Check if we actually found the matching operand. + if (PrevCount == Used.count()) + return false; + } + return true; + } + /// \return Final vectorization factor for the node. Defined by the total /// number of vectorized scalars, including those, used several times in the /// entry and counted in the \a ReuseShuffleIndices, if any. @@ -1773,6 +1827,12 @@ private: return Operands[OpIdx]; } + /// \returns the \p OpIdx operand of this TreeEntry. + ArrayRef<Value *> getOperand(unsigned OpIdx) const { + assert(OpIdx < Operands.size() && "Off bounds"); + return Operands[OpIdx]; + } + /// \returns the number of operands. unsigned getNumOperands() const { return Operands.size(); } @@ -2078,7 +2138,7 @@ private: SmallPtrSet<const Value *, 32> EphValues; /// Holds all of the instructions that we gathered. - SetVector<Instruction *> GatherSeq; + SetVector<Instruction *> GatherShuffleSeq; /// A list of blocks that we are going to CSE. SetVector<BasicBlock *> CSEBlocks; @@ -4386,15 +4446,19 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E, bool IsGather) { DenseMap<Value *, int> ExtractVectorsTys; for (auto *V : VL) { + if (isa<UndefValue>(V)) + continue; // If all users of instruction are going to be vectorized and this // instruction itself is not going to be vectorized, consider this // instruction as dead and remove its cost from the final cost of the // vectorized tree. - if (!areAllUsersVectorized(cast<Instruction>(V), VectorizedVals) || - (IsGather && ScalarToTreeEntry.count(V))) + if (!areAllUsersVectorized(cast<Instruction>(V), VectorizedVals)) continue; auto *EE = cast<ExtractElementInst>(V); - unsigned Idx = *getExtractIndex(EE); + Optional<unsigned> EEIdx = getExtractIndex(EE); + if (!EEIdx) + continue; + unsigned Idx = *EEIdx; if (TTIRef.getNumberOfParts(VecTy) != TTIRef.getNumberOfParts(EE->getVectorOperandType())) { auto It = @@ -4426,6 +4490,8 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E, for (const auto &Data : ExtractVectorsTys) { auto *EEVTy = cast<FixedVectorType>(Data.first->getType()); unsigned NumElts = VecTy->getNumElements(); + if (Data.second % NumElts == 0) + continue; if (TTIRef.getNumberOfParts(EEVTy) > TTIRef.getNumberOfParts(VecTy)) { unsigned Idx = (Data.second / NumElts) * NumElts; unsigned EENumElts = EEVTy->getNumElements(); @@ -4488,10 +4554,12 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E, // broadcast. return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy); } - if (E->getOpcode() == Instruction::ExtractElement && allSameType(VL) && - allSameBlock(VL) && - !isa<ScalableVectorType>( - cast<ExtractElementInst>(E->getMainOp())->getVectorOperandType())) { + if ((E->getOpcode() == Instruction::ExtractElement || + all_of(E->Scalars, + [](Value *V) { + return isa<ExtractElementInst, UndefValue>(V); + })) && + allSameType(VL)) { // Check that gather of extractelements can be represented as just a // shuffle of a single/two vectors the scalars are extracted from. SmallVector<int> Mask; @@ -4738,7 +4806,7 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E, return !is_contained(E->Scalars, cast<Instruction>(V)->getOperand(0)); })); - if (isa<UndefValue>(FirstInsert->getOperand(0))) { + if (isUndefVector(FirstInsert->getOperand(0))) { Cost += TTI->getShuffleCost(TTI::SK_PermuteSingleSrc, SrcVecTy, Mask); } else { SmallVector<int> InsertMask(NumElts); @@ -5016,7 +5084,30 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E, // VecCost is equal to sum of the cost of creating 2 vectors // and the cost of creating shuffle. InstructionCost VecCost = 0; - if (Instruction::isBinaryOp(E->getOpcode())) { + // Try to find the previous shuffle node with the same operands and same + // main/alternate ops. + auto &&TryFindNodeWithEqualOperands = [this, E]() { + for (const std::unique_ptr<TreeEntry> &TE : VectorizableTree) { + if (TE.get() == E) + break; + if (TE->isAltShuffle() && + ((TE->getOpcode() == E->getOpcode() && + TE->getAltOpcode() == E->getAltOpcode()) || + (TE->getOpcode() == E->getAltOpcode() && + TE->getAltOpcode() == E->getOpcode())) && + TE->hasEqualOperands(*E)) + return true; + } + return false; + }; + if (TryFindNodeWithEqualOperands()) { + LLVM_DEBUG({ + dbgs() << "SLP: diamond match for alternate node found.\n"; + E->dump(); + }); + // No need to add new vector costs here since we're going to reuse + // same main/alternate vector ops, just do different shuffling. + } else if (Instruction::isBinaryOp(E->getOpcode())) { VecCost = TTI->getArithmeticInstrCost(E->getOpcode(), VecTy, CostKind); VecCost += TTI->getArithmeticInstrCost(E->getAltOpcode(), VecTy, CostKind); @@ -5060,7 +5151,11 @@ bool BoUpSLP::isFullyVectorizableTinyTree(bool ForReduction) const { [this](Value *V) { return EphValues.contains(V); }) && (allConstant(TE->Scalars) || isSplat(TE->Scalars) || TE->Scalars.size() < Limit || - (TE->getOpcode() == Instruction::ExtractElement && + ((TE->getOpcode() == Instruction::ExtractElement || + all_of(TE->Scalars, + [](Value *V) { + return isa<ExtractElementInst, UndefValue>(V); + })) && isFixedVectorShuffle(TE->Scalars, Mask)) || (TE->State == TreeEntry::NeedToGather && TE->getOpcode() == Instruction::Load && !TE->isAltShuffle())); @@ -5280,6 +5375,42 @@ InstructionCost BoUpSLP::getSpillCost() const { return Cost; } +/// Check if two insertelement instructions are from the same buildvector. +static bool areTwoInsertFromSameBuildVector(InsertElementInst *VU, + InsertElementInst *V) { + // Instructions must be from the same basic blocks. + if (VU->getParent() != V->getParent()) + return false; + // Checks if 2 insertelements are from the same buildvector. + if (VU->getType() != V->getType()) + return false; + // Multiple used inserts are separate nodes. + if (!VU->hasOneUse() && !V->hasOneUse()) + return false; + auto *IE1 = VU; + auto *IE2 = V; + // Go through the vector operand of insertelement instructions trying to find + // either VU as the original vector for IE2 or V as the original vector for + // IE1. + do { + if (IE2 == VU || IE1 == V) + return true; + if (IE1) { + if (IE1 != VU && !IE1->hasOneUse()) + IE1 = nullptr; + else + IE1 = dyn_cast<InsertElementInst>(IE1->getOperand(0)); + } + if (IE2) { + if (IE2 != V && !IE2->hasOneUse()) + IE2 = nullptr; + else + IE2 = dyn_cast<InsertElementInst>(IE2->getOperand(0)); + } + } while (IE1 || IE2); + return false; +} + InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) { InstructionCost Cost = 0; LLVM_DEBUG(dbgs() << "SLP: Calculating cost for tree of size " @@ -5306,7 +5437,8 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) { SmallVector<APInt> DemandedElts; for (ExternalUser &EU : ExternalUses) { // We only add extract cost once for the same scalar. - if (!ExtractCostCalculated.insert(EU.Scalar).second) + if (!isa_and_nonnull<InsertElementInst>(EU.User) && + !ExtractCostCalculated.insert(EU.Scalar).second) continue; // Uses by ephemeral values are free (because the ephemeral value will be @@ -5326,35 +5458,35 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) { // If found user is an insertelement, do not calculate extract cost but try // to detect it as a final shuffled/identity match. - if (isa_and_nonnull<InsertElementInst>(EU.User)) { - if (auto *FTy = dyn_cast<FixedVectorType>(EU.User->getType())) { - Optional<int> InsertIdx = getInsertIndex(EU.User, 0); + if (auto *VU = dyn_cast_or_null<InsertElementInst>(EU.User)) { + if (auto *FTy = dyn_cast<FixedVectorType>(VU->getType())) { + Optional<int> InsertIdx = getInsertIndex(VU, 0); if (!InsertIdx || *InsertIdx == UndefMaskElem) continue; - Value *VU = EU.User; auto *It = find_if(FirstUsers, [VU](Value *V) { - // Checks if 2 insertelements are from the same buildvector. - if (VU->getType() != V->getType()) - return false; - auto *IE1 = cast<InsertElementInst>(VU); - auto *IE2 = cast<InsertElementInst>(V); - // Go through of insertelement instructions trying to find either VU - // as the original vector for IE2 or V as the original vector for IE1. - do { - if (IE1 == VU || IE2 == V) - return true; - if (IE1) - IE1 = dyn_cast<InsertElementInst>(IE1->getOperand(0)); - if (IE2) - IE2 = dyn_cast<InsertElementInst>(IE2->getOperand(0)); - } while (IE1 || IE2); - return false; + return areTwoInsertFromSameBuildVector(VU, + cast<InsertElementInst>(V)); }); int VecId = -1; if (It == FirstUsers.end()) { VF.push_back(FTy->getNumElements()); ShuffleMask.emplace_back(VF.back(), UndefMaskElem); - FirstUsers.push_back(EU.User); + // Find the insertvector, vectorized in tree, if any. + Value *Base = VU; + while (isa<InsertElementInst>(Base)) { + // Build the mask for the vectorized insertelement instructions. + if (const TreeEntry *E = getTreeEntry(Base)) { + VU = cast<InsertElementInst>(Base); + do { + int Idx = E->findLaneForValue(Base); + ShuffleMask.back()[Idx] = Idx; + Base = cast<InsertElementInst>(Base)->getOperand(0); + } while (E == getTreeEntry(Base)); + break; + } + Base = cast<InsertElementInst>(Base)->getOperand(0); + } + FirstUsers.push_back(VU); DemandedElts.push_back(APInt::getZero(VF.back())); VecId = FirstUsers.size() - 1; } else { @@ -5363,6 +5495,7 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) { int Idx = *InsertIdx; ShuffleMask[VecId][Idx] = EU.Lane; DemandedElts[VecId].setBit(Idx); + continue; } } @@ -5386,47 +5519,86 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) { InstructionCost SpillCost = getSpillCost(); Cost += SpillCost + ExtractCost; - for (int I = 0, E = FirstUsers.size(); I < E; ++I) { - // For the very first element - simple shuffle of the source vector. - int Limit = ShuffleMask[I].size() * 2; - if (I == 0 && - all_of(ShuffleMask[I], [Limit](int Idx) { return Idx < Limit; }) && - !ShuffleVectorInst::isIdentityMask(ShuffleMask[I])) { + if (FirstUsers.size() == 1) { + int Limit = ShuffleMask.front().size() * 2; + if (all_of(ShuffleMask.front(), [Limit](int Idx) { return Idx < Limit; }) && + !ShuffleVectorInst::isIdentityMask(ShuffleMask.front())) { InstructionCost C = TTI->getShuffleCost( TTI::SK_PermuteSingleSrc, - cast<FixedVectorType>(FirstUsers[I]->getType()), ShuffleMask[I]); + cast<FixedVectorType>(FirstUsers.front()->getType()), + ShuffleMask.front()); LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C << " for final shuffle of insertelement external users " << *VectorizableTree.front()->Scalars.front() << ".\n" << "SLP: Current total cost = " << Cost << "\n"); Cost += C; - continue; } - // Other elements - permutation of 2 vectors (the initial one and the next - // Ith incoming vector). - unsigned VF = ShuffleMask[I].size(); - for (unsigned Idx = 0; Idx < VF; ++Idx) { - int &Mask = ShuffleMask[I][Idx]; - Mask = Mask == UndefMaskElem ? Idx : VF + Mask; - } - InstructionCost C = TTI->getShuffleCost( - TTI::SK_PermuteTwoSrc, cast<FixedVectorType>(FirstUsers[I]->getType()), - ShuffleMask[I]); - LLVM_DEBUG( - dbgs() - << "SLP: Adding cost " << C - << " for final shuffle of vector node and external insertelement users " - << *VectorizableTree.front()->Scalars.front() << ".\n" - << "SLP: Current total cost = " << Cost << "\n"); - Cost += C; InstructionCost InsertCost = TTI->getScalarizationOverhead( - cast<FixedVectorType>(FirstUsers[I]->getType()), DemandedElts[I], - /*Insert*/ true, - /*Extract*/ false); + cast<FixedVectorType>(FirstUsers.front()->getType()), + DemandedElts.front(), /*Insert*/ true, /*Extract*/ false); + LLVM_DEBUG(dbgs() << "SLP: subtracting the cost " << InsertCost + << " for insertelements gather.\n" + << "SLP: Current total cost = " << Cost << "\n"); Cost -= InsertCost; + } else if (FirstUsers.size() >= 2) { + unsigned MaxVF = *std::max_element(VF.begin(), VF.end()); + // Combined masks of the first 2 vectors. + SmallVector<int> CombinedMask(MaxVF, UndefMaskElem); + copy(ShuffleMask.front(), CombinedMask.begin()); + APInt CombinedDemandedElts = DemandedElts.front().zextOrSelf(MaxVF); + auto *VecTy = FixedVectorType::get( + cast<VectorType>(FirstUsers.front()->getType())->getElementType(), + MaxVF); + for (int I = 0, E = ShuffleMask[1].size(); I < E; ++I) { + if (ShuffleMask[1][I] != UndefMaskElem) { + CombinedMask[I] = ShuffleMask[1][I] + MaxVF; + CombinedDemandedElts.setBit(I); + } + } + InstructionCost C = + TTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy, CombinedMask); + LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C + << " for final shuffle of vector node and external " + "insertelement users " + << *VectorizableTree.front()->Scalars.front() << ".\n" + << "SLP: Current total cost = " << Cost << "\n"); + Cost += C; + InstructionCost InsertCost = TTI->getScalarizationOverhead( + VecTy, CombinedDemandedElts, /*Insert*/ true, /*Extract*/ false); LLVM_DEBUG(dbgs() << "SLP: subtracting the cost " << InsertCost << " for insertelements gather.\n" << "SLP: Current total cost = " << Cost << "\n"); + Cost -= InsertCost; + for (int I = 2, E = FirstUsers.size(); I < E; ++I) { + // Other elements - permutation of 2 vectors (the initial one and the + // next Ith incoming vector). + unsigned VF = ShuffleMask[I].size(); + for (unsigned Idx = 0; Idx < VF; ++Idx) { + int Mask = ShuffleMask[I][Idx]; + if (Mask != UndefMaskElem) + CombinedMask[Idx] = MaxVF + Mask; + else if (CombinedMask[Idx] != UndefMaskElem) + CombinedMask[Idx] = Idx; + } + for (unsigned Idx = VF; Idx < MaxVF; ++Idx) + if (CombinedMask[Idx] != UndefMaskElem) + CombinedMask[Idx] = Idx; + InstructionCost C = + TTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy, CombinedMask); + LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C + << " for final shuffle of vector node and external " + "insertelement users " + << *VectorizableTree.front()->Scalars.front() << ".\n" + << "SLP: Current total cost = " << Cost << "\n"); + Cost += C; + InstructionCost InsertCost = TTI->getScalarizationOverhead( + cast<FixedVectorType>(FirstUsers[I]->getType()), DemandedElts[I], + /*Insert*/ true, /*Extract*/ false); + LLVM_DEBUG(dbgs() << "SLP: subtracting the cost " << InsertCost + << " for insertelements gather.\n" + << "SLP: Current total cost = " << Cost << "\n"); + Cost -= InsertCost; + } } #ifndef NDEBUG @@ -5728,7 +5900,7 @@ Value *BoUpSLP::gather(ArrayRef<Value *> VL) { auto *InsElt = dyn_cast<InsertElementInst>(Vec); if (!InsElt) return Vec; - GatherSeq.insert(InsElt); + GatherShuffleSeq.insert(InsElt); CSEBlocks.insert(InsElt->getParent()); // Add to our 'need-to-extract' list. if (TreeEntry *Entry = getTreeEntry(V)) { @@ -5771,10 +5943,17 @@ class ShuffleInstructionBuilder { const unsigned VF = 0; bool IsFinalized = false; SmallVector<int, 4> Mask; + /// Holds all of the instructions that we gathered. + SetVector<Instruction *> &GatherShuffleSeq; + /// A list of blocks that we are going to CSE. + SetVector<BasicBlock *> &CSEBlocks; public: - ShuffleInstructionBuilder(IRBuilderBase &Builder, unsigned VF) - : Builder(Builder), VF(VF) {} + ShuffleInstructionBuilder(IRBuilderBase &Builder, unsigned VF, + SetVector<Instruction *> &GatherShuffleSeq, + SetVector<BasicBlock *> &CSEBlocks) + : Builder(Builder), VF(VF), GatherShuffleSeq(GatherShuffleSeq), + CSEBlocks(CSEBlocks) {} /// Adds a mask, inverting it before applying. void addInversedMask(ArrayRef<unsigned> SubMask) { @@ -5804,7 +5983,12 @@ public: if (VF == ValueVF && ShuffleVectorInst::isIdentityMask(Mask)) return V; - return Builder.CreateShuffleVector(V, Mask, "shuffle"); + Value *Vec = Builder.CreateShuffleVector(V, Mask, "shuffle"); + if (auto *I = dyn_cast<Instruction>(Vec)) { + GatherShuffleSeq.insert(I); + CSEBlocks.insert(I->getParent()); + } + return Vec; } ~ShuffleInstructionBuilder() { @@ -5862,6 +6046,10 @@ Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL) { std::iota(UniformMask.begin(), UniformMask.end(), 0); V = Builder.CreateShuffleVector(V, UniformMask, "shrink.shuffle"); } + if (auto *I = dyn_cast<Instruction>(V)) { + GatherShuffleSeq.insert(I); + CSEBlocks.insert(I->getParent()); + } } return V; } @@ -5909,15 +6097,12 @@ Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL) { VL = UniqueValues; } - ShuffleInstructionBuilder ShuffleBuilder(Builder, VF); + ShuffleInstructionBuilder ShuffleBuilder(Builder, VF, GatherShuffleSeq, + CSEBlocks); Value *Vec = gather(VL); if (!ReuseShuffleIndicies.empty()) { ShuffleBuilder.addMask(ReuseShuffleIndicies); Vec = ShuffleBuilder.finalize(Vec); - if (auto *I = dyn_cast<Instruction>(Vec)) { - GatherSeq.insert(I); - CSEBlocks.insert(I->getParent()); - } } return Vec; } @@ -5932,7 +6117,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { bool NeedToShuffleReuses = !E->ReuseShuffleIndices.empty(); unsigned VF = E->getVectorFactor(); - ShuffleInstructionBuilder ShuffleBuilder(Builder, VF); + ShuffleInstructionBuilder ShuffleBuilder(Builder, VF, GatherShuffleSeq, + CSEBlocks); if (E->State == TreeEntry::NeedToGather) { if (E->getMainOp()) setInsertPointAfterBundle(E); @@ -5946,16 +6132,16 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { "Expected shuffle of 1 or 2 entries."); Vec = Builder.CreateShuffleVector(Entries.front()->VectorizedValue, Entries.back()->VectorizedValue, Mask); + if (auto *I = dyn_cast<Instruction>(Vec)) { + GatherShuffleSeq.insert(I); + CSEBlocks.insert(I->getParent()); + } } else { Vec = gather(E->Scalars); } if (NeedToShuffleReuses) { ShuffleBuilder.addMask(E->ReuseShuffleIndices); Vec = ShuffleBuilder.finalize(Vec); - if (auto *I = dyn_cast<Instruction>(Vec)) { - GatherSeq.insert(I); - CSEBlocks.insert(I->getParent()); - } } E->VectorizedValue = Vec; return Vec; @@ -6072,11 +6258,16 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { IsIdentity &= *InsertIdx - Offset == I; Mask[*InsertIdx - Offset] = I; } - if (!IsIdentity || NumElts != NumScalars) + if (!IsIdentity || NumElts != NumScalars) { V = Builder.CreateShuffleVector(V, Mask); + if (auto *I = dyn_cast<Instruction>(V)) { + GatherShuffleSeq.insert(I); + CSEBlocks.insert(I->getParent()); + } + } if ((!IsIdentity || Offset != 0 || - !isa<UndefValue>(FirstInsert->getOperand(0))) && + !isUndefVector(FirstInsert->getOperand(0))) && NumElts != NumScalars) { SmallVector<int> InsertMask(NumElts); std::iota(InsertMask.begin(), InsertMask.end(), 0); @@ -6088,6 +6279,10 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { V = Builder.CreateShuffleVector( FirstInsert->getOperand(0), V, InsertMask, cast<Instruction>(E->Scalars.back())->getName()); + if (auto *I = dyn_cast<Instruction>(V)) { + GatherShuffleSeq.insert(I); + CSEBlocks.insert(I->getParent()); + } } ++NumVectorInstructions; @@ -6444,6 +6639,14 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { V1 = Builder.CreateCast( static_cast<Instruction::CastOps>(E->getAltOpcode()), LHS, VecTy); } + // Add V0 and V1 to later analysis to try to find and remove matching + // instruction, if any. + for (Value *V : {V0, V1}) { + if (auto *I = dyn_cast<Instruction>(V)) { + GatherShuffleSeq.insert(I); + CSEBlocks.insert(I->getParent()); + } + } // Create shuffle to take alternate operations from the vector. // Also, gather up main and alt scalar ops to propagate IR flags to @@ -6462,8 +6665,11 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { propagateIRFlags(V1, AltScalars); Value *V = Builder.CreateShuffleVector(V0, V1, Mask); - if (Instruction *I = dyn_cast<Instruction>(V)) + if (auto *I = dyn_cast<Instruction>(V)) { V = propagateMetadata(I, E->Scalars); + GatherShuffleSeq.insert(I); + CSEBlocks.insert(I->getParent()); + } V = ShuffleBuilder.finalize(V); E->VectorizedValue = V; @@ -6657,10 +6863,10 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) { } void BoUpSLP::optimizeGatherSequence() { - LLVM_DEBUG(dbgs() << "SLP: Optimizing " << GatherSeq.size() + LLVM_DEBUG(dbgs() << "SLP: Optimizing " << GatherShuffleSeq.size() << " gather sequences instructions.\n"); // LICM InsertElementInst sequences. - for (Instruction *I : GatherSeq) { + for (Instruction *I : GatherShuffleSeq) { if (isDeleted(I)) continue; @@ -6677,11 +6883,10 @@ void BoUpSLP::optimizeGatherSequence() { // If the vector or the element that we insert into it are // instructions that are defined in this basic block then we can't // hoist this instruction. - auto *Op0 = dyn_cast<Instruction>(I->getOperand(0)); - auto *Op1 = dyn_cast<Instruction>(I->getOperand(1)); - if (Op0 && L->contains(Op0)) - continue; - if (Op1 && L->contains(Op1)) + if (any_of(I->operands(), [L](Value *V) { + auto *OpI = dyn_cast<Instruction>(V); + return OpI && L->contains(OpI); + })) continue; // We can hoist this instruction. Move it to the pre-header. @@ -6705,7 +6910,50 @@ void BoUpSLP::optimizeGatherSequence() { return A->getDFSNumIn() < B->getDFSNumIn(); }); - // Perform O(N^2) search over the gather sequences and merge identical + // Less defined shuffles can be replaced by the more defined copies. + // Between two shuffles one is less defined if it has the same vector operands + // and its mask indeces are the same as in the first one or undefs. E.g. + // shuffle %0, poison, <0, 0, 0, undef> is less defined than shuffle %0, + // poison, <0, 0, 0, 0>. + auto &&IsIdenticalOrLessDefined = [this](Instruction *I1, Instruction *I2, + SmallVectorImpl<int> &NewMask) { + if (I1->getType() != I2->getType()) + return false; + auto *SI1 = dyn_cast<ShuffleVectorInst>(I1); + auto *SI2 = dyn_cast<ShuffleVectorInst>(I2); + if (!SI1 || !SI2) + return I1->isIdenticalTo(I2); + if (SI1->isIdenticalTo(SI2)) + return true; + for (int I = 0, E = SI1->getNumOperands(); I < E; ++I) + if (SI1->getOperand(I) != SI2->getOperand(I)) + return false; + // Check if the second instruction is more defined than the first one. + NewMask.assign(SI2->getShuffleMask().begin(), SI2->getShuffleMask().end()); + ArrayRef<int> SM1 = SI1->getShuffleMask(); + // Count trailing undefs in the mask to check the final number of used + // registers. + unsigned LastUndefsCnt = 0; + for (int I = 0, E = NewMask.size(); I < E; ++I) { + if (SM1[I] == UndefMaskElem) + ++LastUndefsCnt; + else + LastUndefsCnt = 0; + if (NewMask[I] != UndefMaskElem && SM1[I] != UndefMaskElem && + NewMask[I] != SM1[I]) + return false; + if (NewMask[I] == UndefMaskElem) + NewMask[I] = SM1[I]; + } + // Check if the last undefs actually change the final number of used vector + // registers. + return SM1.size() - LastUndefsCnt > 1 && + TTI->getNumberOfParts(SI1->getType()) == + TTI->getNumberOfParts( + FixedVectorType::get(SI1->getType()->getElementType(), + SM1.size() - LastUndefsCnt)); + }; + // Perform O(N^2) search over the gather/shuffle sequences and merge identical // instructions. TODO: We can further optimize this scan if we split the // instructions into different buckets based on the insert lane. SmallVector<Instruction *, 16> Visited; @@ -6719,17 +6967,35 @@ void BoUpSLP::optimizeGatherSequence() { if (isDeleted(&In)) continue; if (!isa<InsertElementInst>(&In) && !isa<ExtractElementInst>(&In) && - !isa<ShuffleVectorInst>(&In)) + !isa<ShuffleVectorInst>(&In) && !GatherShuffleSeq.contains(&In)) continue; // Check if we can replace this instruction with any of the // visited instructions. bool Replaced = false; - for (Instruction *v : Visited) { - if (In.isIdenticalTo(v) && - DT->dominates(v->getParent(), In.getParent())) { - In.replaceAllUsesWith(v); + for (Instruction *&V : Visited) { + SmallVector<int> NewMask; + if (IsIdenticalOrLessDefined(&In, V, NewMask) && + DT->dominates(V->getParent(), In.getParent())) { + In.replaceAllUsesWith(V); eraseInstruction(&In); + if (auto *SI = dyn_cast<ShuffleVectorInst>(V)) + if (!NewMask.empty()) + SI->setShuffleMask(NewMask); + Replaced = true; + break; + } + if (isa<ShuffleVectorInst>(In) && isa<ShuffleVectorInst>(V) && + GatherShuffleSeq.contains(V) && + IsIdenticalOrLessDefined(V, &In, NewMask) && + DT->dominates(In.getParent(), V->getParent())) { + In.moveAfter(V); + V->replaceAllUsesWith(&In); + eraseInstruction(V); + if (auto *SI = dyn_cast<ShuffleVectorInst>(&In)) + if (!NewMask.empty()) + SI->setShuffleMask(NewMask); + V = &In; Replaced = true; break; } @@ -6741,7 +7007,7 @@ void BoUpSLP::optimizeGatherSequence() { } } CSEBlocks.clear(); - GatherSeq.clear(); + GatherShuffleSeq.clear(); } // Groups the instructions to a bundle (which is then a single scheduling entity) @@ -8791,6 +9057,8 @@ private: assert(VectorizedValue && "Need to have a vectorized tree node"); assert(isPowerOf2_32(ReduxWidth) && "We only handle power-of-two reductions for now"); + assert(RdxKind != RecurKind::FMulAdd && + "A call to the llvm.fmuladd intrinsic is not handled yet"); ++NumVectorInstructions; return createSimpleTargetReduction(Builder, TTI, VectorizedValue, RdxKind, @@ -9123,8 +9391,9 @@ bool SLPVectorizerPass::vectorizeInsertElementInst(InsertElementInst *IEI, SmallVector<Value *, 16> BuildVectorOpds; SmallVector<int> Mask; if (!findBuildAggregate(IEI, TTI, BuildVectorOpds, BuildVectorInsts) || - (llvm::all_of(BuildVectorOpds, - [](Value *V) { return isa<ExtractElementInst>(V); }) && + (llvm::all_of( + BuildVectorOpds, + [](Value *V) { return isa<ExtractElementInst, UndefValue>(V); }) && isFixedVectorShuffle(BuildVectorOpds, Mask))) return false; @@ -9132,44 +9401,6 @@ bool SLPVectorizerPass::vectorizeInsertElementInst(InsertElementInst *IEI, return tryToVectorizeList(BuildVectorInsts, R); } -bool SLPVectorizerPass::vectorizeSimpleInstructions( - SmallVectorImpl<Instruction *> &Instructions, BasicBlock *BB, BoUpSLP &R, - bool AtTerminator) { - bool OpsChanged = false; - SmallVector<Instruction *, 4> PostponedCmps; - for (auto *I : reverse(Instructions)) { - if (R.isDeleted(I)) - continue; - if (auto *LastInsertValue = dyn_cast<InsertValueInst>(I)) - OpsChanged |= vectorizeInsertValueInst(LastInsertValue, BB, R); - else if (auto *LastInsertElem = dyn_cast<InsertElementInst>(I)) - OpsChanged |= vectorizeInsertElementInst(LastInsertElem, BB, R); - else if (isa<CmpInst>(I)) - PostponedCmps.push_back(I); - } - if (AtTerminator) { - // Try to find reductions first. - for (Instruction *I : PostponedCmps) { - if (R.isDeleted(I)) - continue; - for (Value *Op : I->operands()) - OpsChanged |= vectorizeRootInstruction(nullptr, Op, BB, R, TTI); - } - // Try to vectorize operands as vector bundles. - for (Instruction *I : PostponedCmps) { - if (R.isDeleted(I)) - continue; - OpsChanged |= tryToVectorize(I, R); - } - Instructions.clear(); - } else { - // Insert in reverse order since the PostponedCmps vector was filled in - // reverse order. - Instructions.assign(PostponedCmps.rbegin(), PostponedCmps.rend()); - } - return OpsChanged; -} - template <typename T> static bool tryToVectorizeSequence(SmallVectorImpl<T *> &Incoming, @@ -9242,6 +9473,101 @@ tryToVectorizeSequence(SmallVectorImpl<T *> &Incoming, return Changed; } +bool SLPVectorizerPass::vectorizeSimpleInstructions( + SmallVectorImpl<Instruction *> &Instructions, BasicBlock *BB, BoUpSLP &R, + bool AtTerminator) { + bool OpsChanged = false; + SmallVector<Instruction *, 4> PostponedCmps; + for (auto *I : reverse(Instructions)) { + if (R.isDeleted(I)) + continue; + if (auto *LastInsertValue = dyn_cast<InsertValueInst>(I)) + OpsChanged |= vectorizeInsertValueInst(LastInsertValue, BB, R); + else if (auto *LastInsertElem = dyn_cast<InsertElementInst>(I)) + OpsChanged |= vectorizeInsertElementInst(LastInsertElem, BB, R); + else if (isa<CmpInst>(I)) + PostponedCmps.push_back(I); + } + if (AtTerminator) { + // Try to find reductions first. + for (Instruction *I : PostponedCmps) { + if (R.isDeleted(I)) + continue; + for (Value *Op : I->operands()) + OpsChanged |= vectorizeRootInstruction(nullptr, Op, BB, R, TTI); + } + // Try to vectorize operands as vector bundles. + for (Instruction *I : PostponedCmps) { + if (R.isDeleted(I)) + continue; + OpsChanged |= tryToVectorize(I, R); + } + // Try to vectorize list of compares. + // Sort by type, compare predicate, etc. + // TODO: Add analysis on the operand opcodes (profitable to vectorize + // instructions with same/alternate opcodes/const values). + auto &&CompareSorter = [&R](Value *V, Value *V2) { + auto *CI1 = cast<CmpInst>(V); + auto *CI2 = cast<CmpInst>(V2); + if (R.isDeleted(CI2) || !isValidElementType(CI2->getType())) + return false; + if (CI1->getOperand(0)->getType()->getTypeID() < + CI2->getOperand(0)->getType()->getTypeID()) + return true; + if (CI1->getOperand(0)->getType()->getTypeID() > + CI2->getOperand(0)->getType()->getTypeID()) + return false; + return CI1->getPredicate() < CI2->getPredicate() || + (CI1->getPredicate() > CI2->getPredicate() && + CI1->getPredicate() < + CmpInst::getSwappedPredicate(CI2->getPredicate())); + }; + + auto &&AreCompatibleCompares = [&R](Value *V1, Value *V2) { + if (V1 == V2) + return true; + auto *CI1 = cast<CmpInst>(V1); + auto *CI2 = cast<CmpInst>(V2); + if (R.isDeleted(CI2) || !isValidElementType(CI2->getType())) + return false; + if (CI1->getOperand(0)->getType() != CI2->getOperand(0)->getType()) + return false; + return CI1->getPredicate() == CI2->getPredicate() || + CI1->getPredicate() == + CmpInst::getSwappedPredicate(CI2->getPredicate()); + }; + auto Limit = [&R](Value *V) { + unsigned EltSize = R.getVectorElementSize(V); + return std::max(2U, R.getMaxVecRegSize() / EltSize); + }; + + SmallVector<Value *> Vals(PostponedCmps.begin(), PostponedCmps.end()); + OpsChanged |= tryToVectorizeSequence<Value>( + Vals, Limit, CompareSorter, AreCompatibleCompares, + [this, &R](ArrayRef<Value *> Candidates, bool LimitForRegisterSize) { + // Exclude possible reductions from other blocks. + bool ArePossiblyReducedInOtherBlock = + any_of(Candidates, [](Value *V) { + return any_of(V->users(), [V](User *U) { + return isa<SelectInst>(U) && + cast<SelectInst>(U)->getParent() != + cast<Instruction>(V)->getParent(); + }); + }); + if (ArePossiblyReducedInOtherBlock) + return false; + return tryToVectorizeList(Candidates, R, LimitForRegisterSize); + }, + /*LimitForRegisterSize=*/true); + Instructions.clear(); + } else { + // Insert in reverse order since the PostponedCmps vector was filled in + // reverse order. + Instructions.assign(PostponedCmps.rbegin(), PostponedCmps.rend()); + } + return OpsChanged; +} + bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { bool Changed = false; SmallVector<Value *, 4> Incoming; diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 638467f94e1c..44b5e1df0839 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -718,6 +718,8 @@ void VPInstruction::generateInstruction(VPTransformState &State, void VPInstruction::execute(VPTransformState &State) { assert(!State.Instance && "VPInstruction executing an Instance"); + IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder); + State.Builder.setFastMathFlags(FMF); for (unsigned Part = 0; Part < State.UF; ++Part) generateInstruction(State, Part); } @@ -760,6 +762,8 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent, O << Instruction::getOpcodeName(getOpcode()); } + O << FMF; + for (const VPValue *Operand : operands()) { O << " "; Operand->printAsOperand(O, SlotTracker); @@ -767,6 +771,16 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent, } #endif +void VPInstruction::setFastMathFlags(FastMathFlags FMFNew) { + // Make sure the VPInstruction is a floating-point operation. + assert((Opcode == Instruction::FAdd || Opcode == Instruction::FMul || + Opcode == Instruction::FNeg || Opcode == Instruction::FSub || + Opcode == Instruction::FDiv || Opcode == Instruction::FRem || + Opcode == Instruction::FCmp) && + "this op can't take fast-math flags"); + FMF = FMFNew; +} + /// Generate the code inside the body of the vectorized loop. Assumes a single /// LoopVectorBody basic-block was created for this. Introduce additional /// basic-blocks as needed, and fill them all. @@ -1196,8 +1210,10 @@ void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent, printAsOperand(O, SlotTracker); O << " = "; getChainOp()->printAsOperand(O, SlotTracker); - O << " + reduce." << Instruction::getOpcodeName(RdxDesc->getOpcode()) - << " ("; + O << " +"; + if (isa<FPMathOperator>(getUnderlyingInstr())) + O << getUnderlyingInstr()->getFastMathFlags(); + O << " reduce." << Instruction::getOpcodeName(RdxDesc->getOpcode()) << " ("; getVecOp()->printAsOperand(O, SlotTracker); if (getCondOp()) { O << ", "; diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 00ee31007cb7..810dd5030f95 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -59,6 +59,7 @@ class Value; class VPBasicBlock; class VPRegionBlock; class VPlan; +class VPReplicateRecipe; class VPlanSlp; /// Returns a calculation for the total number of elements for a given \p VF. @@ -346,6 +347,10 @@ struct VPTransformState { /// Pointer to the VPlan code is generated for. VPlan *Plan; + + /// Holds recipes that may generate a poison value that is used after + /// vectorization, even when their operands are not poison. + SmallPtrSet<VPRecipeBase *, 16> MayGeneratePoisonRecipes; }; /// VPUsers instance used by VPBlockBase to manage CondBit and the block @@ -789,6 +794,7 @@ public: private: typedef unsigned char OpcodeTy; OpcodeTy Opcode; + FastMathFlags FMF; /// Utility method serving execute(): generates a single instance of the /// modeled instruction. @@ -802,13 +808,6 @@ public: : VPRecipeBase(VPRecipeBase::VPInstructionSC, Operands), VPValue(VPValue::VPVInstructionSC, nullptr, this), Opcode(Opcode) {} - VPInstruction(unsigned Opcode, ArrayRef<VPInstruction *> Operands) - : VPRecipeBase(VPRecipeBase::VPInstructionSC, {}), - VPValue(VPValue::VPVInstructionSC, nullptr, this), Opcode(Opcode) { - for (auto *I : Operands) - addOperand(I->getVPSingleValue()); - } - VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands) : VPInstruction(Opcode, ArrayRef<VPValue *>(Operands)) {} @@ -870,6 +869,9 @@ public: return true; } } + + /// Set the fast-math flags. + void setFastMathFlags(FastMathFlags FMFNew); }; /// VPWidenRecipe is a recipe for producing a copy of vector type its @@ -1511,7 +1513,7 @@ public: /// - For store: Address, stored value, optional mask /// TODO: We currently execute only per-part unless a specific instance is /// provided. -class VPWidenMemoryInstructionRecipe : public VPRecipeBase { +class VPWidenMemoryInstructionRecipe : public VPRecipeBase, public VPValue { Instruction &Ingredient; // Whether the loaded-from / stored-to addresses are consecutive. @@ -1533,10 +1535,10 @@ class VPWidenMemoryInstructionRecipe : public VPRecipeBase { public: VPWidenMemoryInstructionRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask, bool Consecutive, bool Reverse) - : VPRecipeBase(VPWidenMemoryInstructionSC, {Addr}), Ingredient(Load), + : VPRecipeBase(VPWidenMemoryInstructionSC, {Addr}), + VPValue(VPValue::VPVMemoryInstructionSC, &Load, this), Ingredient(Load), Consecutive(Consecutive), Reverse(Reverse) { assert((Consecutive || !Reverse) && "Reverse implies consecutive"); - new VPValue(VPValue::VPVMemoryInstructionSC, &Load, this); setMask(Mask); } @@ -1544,6 +1546,7 @@ public: VPValue *StoredValue, VPValue *Mask, bool Consecutive, bool Reverse) : VPRecipeBase(VPWidenMemoryInstructionSC, {Addr, StoredValue}), + VPValue(VPValue::VPVMemoryInstructionSC, &Store, this), Ingredient(Store), Consecutive(Consecutive), Reverse(Reverse) { assert((Consecutive || !Reverse) && "Reverse implies consecutive"); setMask(Mask); diff --git a/llvm/tools/llvm-cov/CodeCoverage.cpp b/llvm/tools/llvm-cov/CodeCoverage.cpp index 5c9ff41a2d5d..d357ad7c9e10 100644 --- a/llvm/tools/llvm-cov/CodeCoverage.cpp +++ b/llvm/tools/llvm-cov/CodeCoverage.cpp @@ -176,8 +176,8 @@ private: std::vector<std::pair<std::string, std::unique_ptr<MemoryBuffer>>> LoadedSourceFiles; - /// Whitelist from -name-whitelist to be used for filtering. - std::unique_ptr<SpecialCaseList> NameWhitelist; + /// Allowlist from -name-allowlist to be used for filtering. + std::unique_ptr<SpecialCaseList> NameAllowlist; }; } @@ -668,11 +668,18 @@ int CodeCoverageTool::run(Command Cmd, int argc, const char **argv) { cl::ZeroOrMore, cl::cat(FilteringCategory)); cl::list<std::string> NameFilterFiles( - "name-whitelist", cl::Optional, + "name-allowlist", cl::Optional, cl::desc("Show code coverage only for functions listed in the given " "file"), cl::ZeroOrMore, cl::cat(FilteringCategory)); + // Allow for accepting previous option name. + cl::list<std::string> NameFilterFilesDeprecated( + "name-whitelist", cl::Optional, cl::Hidden, + cl::desc("Show code coverage only for functions listed in the given " + "file. Deprecated, use -name-allowlist instead"), + cl::ZeroOrMore, cl::cat(FilteringCategory)); + cl::list<std::string> NameRegexFilters( "name-regex", cl::Optional, cl::desc("Show code coverage only for functions that match the given " @@ -809,23 +816,34 @@ int CodeCoverageTool::run(Command Cmd, int argc, const char **argv) { ViewOpts.DemanglerOpts.swap(DemanglerOpts); } - // Read in -name-whitelist files. - if (!NameFilterFiles.empty()) { + // Read in -name-allowlist files. + if (!NameFilterFiles.empty() || !NameFilterFilesDeprecated.empty()) { std::string SpecialCaseListErr; - NameWhitelist = SpecialCaseList::create( - NameFilterFiles, *vfs::getRealFileSystem(), SpecialCaseListErr); - if (!NameWhitelist) + if (!NameFilterFiles.empty()) + NameAllowlist = SpecialCaseList::create( + NameFilterFiles, *vfs::getRealFileSystem(), SpecialCaseListErr); + if (!NameFilterFilesDeprecated.empty()) + NameAllowlist = SpecialCaseList::create(NameFilterFilesDeprecated, + *vfs::getRealFileSystem(), + SpecialCaseListErr); + + if (!NameAllowlist) error(SpecialCaseListErr); } // Create the function filters - if (!NameFilters.empty() || NameWhitelist || !NameRegexFilters.empty()) { + if (!NameFilters.empty() || NameAllowlist || !NameRegexFilters.empty()) { auto NameFilterer = std::make_unique<CoverageFilters>(); for (const auto &Name : NameFilters) NameFilterer->push_back(std::make_unique<NameCoverageFilter>(Name)); - if (NameWhitelist) - NameFilterer->push_back( - std::make_unique<NameWhitelistCoverageFilter>(*NameWhitelist)); + if (NameAllowlist) { + if (!NameFilterFiles.empty()) + NameFilterer->push_back( + std::make_unique<NameAllowlistCoverageFilter>(*NameAllowlist)); + if (!NameFilterFilesDeprecated.empty()) + NameFilterer->push_back( + std::make_unique<NameWhitelistCoverageFilter>(*NameAllowlist)); + } for (const auto &Regex : NameRegexFilters) NameFilterer->push_back( std::make_unique<NameRegexCoverageFilter>(Regex)); diff --git a/llvm/tools/llvm-cov/CoverageFilters.cpp b/llvm/tools/llvm-cov/CoverageFilters.cpp index fac7518d7da2..b7998647cc57 100644 --- a/llvm/tools/llvm-cov/CoverageFilters.cpp +++ b/llvm/tools/llvm-cov/CoverageFilters.cpp @@ -34,6 +34,13 @@ bool NameRegexCoverageFilter::matchesFilename(StringRef Filename) const { return llvm::Regex(Regex).match(Filename); } +bool NameAllowlistCoverageFilter::matches( + const coverage::CoverageMapping &, + const coverage::FunctionRecord &Function) const { + return Allowlist.inSection("llvmcov", "allowlist_fun", Function.Name); +} + +// TODO: remove this when -name-whitelist option is removed. bool NameWhitelistCoverageFilter::matches( const coverage::CoverageMapping &, const coverage::FunctionRecord &Function) const { diff --git a/llvm/tools/llvm-cov/CoverageFilters.h b/llvm/tools/llvm-cov/CoverageFilters.h index 33fd9929c59a..3040fe74f7cf 100644 --- a/llvm/tools/llvm-cov/CoverageFilters.h +++ b/llvm/tools/llvm-cov/CoverageFilters.h @@ -67,7 +67,19 @@ public: }; /// Matches functions whose name appears in a SpecialCaseList in the -/// whitelist_fun section. +/// allowlist_fun section. +class NameAllowlistCoverageFilter : public CoverageFilter { + const SpecialCaseList &Allowlist; + +public: + NameAllowlistCoverageFilter(const SpecialCaseList &Allowlist) + : Allowlist(Allowlist) {} + + bool matches(const coverage::CoverageMapping &CM, + const coverage::FunctionRecord &Function) const override; +}; + +// TODO: Remove this class when -name-whitelist option is removed. class NameWhitelistCoverageFilter : public CoverageFilter { const SpecialCaseList &Whitelist; diff --git a/llvm/tools/llvm-diff/lib/DifferenceEngine.cpp b/llvm/tools/llvm-diff/lib/DifferenceEngine.cpp index eb746cd2a865..4bdefcdc1758 100644 --- a/llvm/tools/llvm-diff/lib/DifferenceEngine.cpp +++ b/llvm/tools/llvm-diff/lib/DifferenceEngine.cpp @@ -269,15 +269,35 @@ class FunctionDifferenceEngine { } else if (isa<CallInst>(L)) { return diffCallSites(cast<CallInst>(*L), cast<CallInst>(*R), Complain); } else if (isa<PHINode>(L)) { - // FIXME: implement. + const PHINode &LI = cast<PHINode>(*L); + const PHINode &RI = cast<PHINode>(*R); // This is really weird; type uniquing is broken? - if (L->getType() != R->getType()) { - if (!L->getType()->isPointerTy() || !R->getType()->isPointerTy()) { + if (LI.getType() != RI.getType()) { + if (!LI.getType()->isPointerTy() || !RI.getType()->isPointerTy()) { if (Complain) Engine.log("different phi types"); return true; } } + + if (LI.getNumIncomingValues() != RI.getNumIncomingValues()) { + if (Complain) + Engine.log("PHI node # of incoming values differ"); + return true; + } + + for (unsigned I = 0; I < LI.getNumIncomingValues(); ++I) { + if (TryUnify) + tryUnify(LI.getIncomingBlock(I), RI.getIncomingBlock(I)); + + if (!equivalentAsOperands(LI.getIncomingValue(I), + RI.getIncomingValue(I))) { + if (Complain) + Engine.log("PHI node incoming values differ"); + return true; + } + } + return false; // Terminators. diff --git a/llvm/tools/llvm-dwarfdump/Statistics.cpp b/llvm/tools/llvm-dwarfdump/Statistics.cpp index b237e014038d..5c08e43b4b09 100644 --- a/llvm/tools/llvm-dwarfdump/Statistics.cpp +++ b/llvm/tools/llvm-dwarfdump/Statistics.cpp @@ -60,6 +60,19 @@ struct SaturatingUINT64 { } }; +/// Utility struct to store the full location of a DIE - its CU and offset. +struct DIELocation { + DWARFUnit *DwUnit; + uint64_t DIEOffset; + DIELocation(DWARFUnit *_DwUnit, uint64_t _DIEOffset) + : DwUnit(_DwUnit), DIEOffset(_DIEOffset) {} +}; +/// This represents DWARF locations of CrossCU referencing DIEs. +using CrossCUReferencingDIELocationTy = llvm::SmallVector<DIELocation>; + +/// This maps function DIE offset to its DWARF CU. +using FunctionDIECUTyMap = llvm::DenseMap<uint64_t, DWARFUnit *>; + /// Holds statistics for one function (or other entity that has a PC range and /// contains variables, such as a compile unit). struct PerFunctionStats { @@ -450,15 +463,18 @@ static void collectStatsForDie(DWARFDie Die, const std::string &FnPrefix, /// Recursively collect variables from subprogram with DW_AT_inline attribute. static void collectAbstractOriginFnInfo( DWARFDie Die, uint64_t SPOffset, - AbstractOriginVarsTyMap &GlobalAbstractOriginFnInfo) { + AbstractOriginVarsTyMap &GlobalAbstractOriginFnInfo, + AbstractOriginVarsTyMap &LocalAbstractOriginFnInfo) { DWARFDie Child = Die.getFirstChild(); while (Child) { const dwarf::Tag ChildTag = Child.getTag(); if (ChildTag == dwarf::DW_TAG_formal_parameter || - ChildTag == dwarf::DW_TAG_variable) + ChildTag == dwarf::DW_TAG_variable) { GlobalAbstractOriginFnInfo[SPOffset].push_back(Child.getOffset()); - else if (ChildTag == dwarf::DW_TAG_lexical_block) - collectAbstractOriginFnInfo(Child, SPOffset, GlobalAbstractOriginFnInfo); + LocalAbstractOriginFnInfo[SPOffset].push_back(Child.getOffset()); + } else if (ChildTag == dwarf::DW_TAG_lexical_block) + collectAbstractOriginFnInfo(Child, SPOffset, GlobalAbstractOriginFnInfo, + LocalAbstractOriginFnInfo); Child = Child.getSibling(); } } @@ -468,8 +484,9 @@ static void collectStatsRecursive( DWARFDie Die, std::string FnPrefix, std::string VarPrefix, uint64_t BytesInScope, uint32_t InlineDepth, StringMap<PerFunctionStats> &FnStatMap, GlobalStats &GlobalStats, - LocationStats &LocStats, + LocationStats &LocStats, FunctionDIECUTyMap &AbstractOriginFnCUs, AbstractOriginVarsTyMap &GlobalAbstractOriginFnInfo, + AbstractOriginVarsTyMap &LocalAbstractOriginFnInfo, FunctionsWithAbstractOriginTy &FnsWithAbstractOriginToBeProcessed, AbstractOriginVarsTy *AbstractOriginVarsPtr = nullptr) { // Skip NULL nodes. @@ -499,11 +516,12 @@ static void collectStatsRecursive( auto OffsetFn = Die.find(dwarf::DW_AT_abstract_origin); if (OffsetFn) { uint64_t OffsetOfInlineFnCopy = (*OffsetFn).getRawUValue(); - if (GlobalAbstractOriginFnInfo.count(OffsetOfInlineFnCopy)) { - AbstractOriginVars = GlobalAbstractOriginFnInfo[OffsetOfInlineFnCopy]; + if (LocalAbstractOriginFnInfo.count(OffsetOfInlineFnCopy)) { + AbstractOriginVars = LocalAbstractOriginFnInfo[OffsetOfInlineFnCopy]; AbstractOriginVarsPtr = &AbstractOriginVars; } else { - // This means that the DW_AT_inline fn copy is out of order, + // This means that the DW_AT_inline fn copy is out of order + // or that the abstract_origin references another CU, // so this abstract origin instance will be processed later. FnsWithAbstractOriginToBeProcessed.push_back(Die.getOffset()); AbstractOriginVarsPtr = nullptr; @@ -543,7 +561,9 @@ static void collectStatsRecursive( // for inlined instancies. if (Die.find(dwarf::DW_AT_inline)) { uint64_t SPOffset = Die.getOffset(); - collectAbstractOriginFnInfo(Die, SPOffset, GlobalAbstractOriginFnInfo); + AbstractOriginFnCUs[SPOffset] = Die.getDwarfUnit(); + collectAbstractOriginFnInfo(Die, SPOffset, GlobalAbstractOriginFnInfo, + LocalAbstractOriginFnInfo); return; } @@ -597,8 +617,9 @@ static void collectStatsRecursive( collectStatsRecursive( Child, FnPrefix, ChildVarPrefix, BytesInScope, InlineDepth, FnStatMap, - GlobalStats, LocStats, GlobalAbstractOriginFnInfo, - FnsWithAbstractOriginToBeProcessed, AbstractOriginVarsPtr); + GlobalStats, LocStats, AbstractOriginFnCUs, GlobalAbstractOriginFnInfo, + LocalAbstractOriginFnInfo, FnsWithAbstractOriginToBeProcessed, + AbstractOriginVarsPtr); Child = Child.getSibling(); } @@ -733,16 +754,24 @@ static void updateVarsWithAbstractOriginLocCovInfo( /// the DW_TAG_subprogram) with an abstract_origin attribute. static void collectZeroLocCovForVarsWithAbstractOrigin( DWARFUnit *DwUnit, GlobalStats &GlobalStats, LocationStats &LocStats, - AbstractOriginVarsTyMap &GlobalAbstractOriginFnInfo, + AbstractOriginVarsTyMap &LocalAbstractOriginFnInfo, FunctionsWithAbstractOriginTy &FnsWithAbstractOriginToBeProcessed) { + // The next variable is used to filter out functions that have been processed, + // leaving FnsWithAbstractOriginToBeProcessed with just CrossCU references. + FunctionsWithAbstractOriginTy ProcessedFns; for (auto FnOffset : FnsWithAbstractOriginToBeProcessed) { DWARFDie FnDieWithAbstractOrigin = DwUnit->getDIEForOffset(FnOffset); auto FnCopy = FnDieWithAbstractOrigin.find(dwarf::DW_AT_abstract_origin); AbstractOriginVarsTy AbstractOriginVars; if (!FnCopy) continue; - - AbstractOriginVars = GlobalAbstractOriginFnInfo[(*FnCopy).getRawUValue()]; + uint64_t FnCopyRawUValue = (*FnCopy).getRawUValue(); + // If there is no entry within LocalAbstractOriginFnInfo for the given + // FnCopyRawUValue, function isn't out-of-order in DWARF. Rather, we have + // CrossCU referencing. + if (!LocalAbstractOriginFnInfo.count(FnCopyRawUValue)) + continue; + AbstractOriginVars = LocalAbstractOriginFnInfo[FnCopyRawUValue]; updateVarsWithAbstractOriginLocCovInfo(FnDieWithAbstractOrigin, AbstractOriginVars); @@ -758,6 +787,46 @@ static void collectZeroLocCovForVarsWithAbstractOrigin( LocStats.LocalVarLocStats[ZeroCoverageBucket]++; } } + ProcessedFns.push_back(FnOffset); + } + for (auto ProcessedFn : ProcessedFns) + llvm::erase_value(FnsWithAbstractOriginToBeProcessed, ProcessedFn); +} + +/// Collect zero location coverage for inlined variables which refer to +/// a DW_AT_inline copy of subprogram that is in a different CU. +static void collectZeroLocCovForVarsWithCrossCUReferencingAbstractOrigin( + LocationStats &LocStats, FunctionDIECUTyMap AbstractOriginFnCUs, + AbstractOriginVarsTyMap &GlobalAbstractOriginFnInfo, + CrossCUReferencingDIELocationTy &CrossCUReferencesToBeResolved) { + for (const auto &CrossCUReferenceToBeResolved : + CrossCUReferencesToBeResolved) { + DWARFUnit *DwUnit = CrossCUReferenceToBeResolved.DwUnit; + DWARFDie FnDIEWithCrossCUReferencing = + DwUnit->getDIEForOffset(CrossCUReferenceToBeResolved.DIEOffset); + auto FnCopy = + FnDIEWithCrossCUReferencing.find(dwarf::DW_AT_abstract_origin); + if (!FnCopy) + continue; + uint64_t FnCopyRawUValue = (*FnCopy).getRawUValue(); + AbstractOriginVarsTy AbstractOriginVars = + GlobalAbstractOriginFnInfo[FnCopyRawUValue]; + updateVarsWithAbstractOriginLocCovInfo(FnDIEWithCrossCUReferencing, + AbstractOriginVars); + for (auto Offset : AbstractOriginVars) { + LocStats.NumVarParam++; + LocStats.VarParamLocStats[ZeroCoverageBucket]++; + auto Tag = (AbstractOriginFnCUs[FnCopyRawUValue]) + ->getDIEForOffset(Offset) + .getTag(); + if (Tag == dwarf::DW_TAG_formal_parameter) { + LocStats.NumParam++; + LocStats.ParamLocStats[ZeroCoverageBucket]++; + } else if (Tag == dwarf::DW_TAG_variable) { + LocStats.NumVar++; + LocStats.LocalVarLocStats[ZeroCoverageBucket]++; + } + } } } @@ -778,28 +847,46 @@ bool dwarfdump::collectStatsForObjectFile(ObjectFile &Obj, DWARFContext &DICtx, GlobalStats GlobalStats; LocationStats LocStats; StringMap<PerFunctionStats> Statistics; + // This variable holds variable information for functions with + // abstract_origin globally, across all CUs. + AbstractOriginVarsTyMap GlobalAbstractOriginFnInfo; + // This variable holds information about the CU of a function with + // abstract_origin. + FunctionDIECUTyMap AbstractOriginFnCUs; + CrossCUReferencingDIELocationTy CrossCUReferencesToBeResolved; for (const auto &CU : static_cast<DWARFContext *>(&DICtx)->compile_units()) { if (DWARFDie CUDie = CU->getNonSkeletonUnitDIE(false)) { - // These variables are being reset for each CU, since there could be - // a situation where we have two subprogram DIEs with the same offsets - // in two diferent CUs, and we can end up using wrong variables info - // when trying to resolve abstract_origin attribute. - // TODO: Handle LTO cases where the abstract origin of - // the function is in a different CU than the one it's - // referenced from or inlined into. - AbstractOriginVarsTyMap GlobalAbstractOriginFnInfo; + // This variable holds variable information for functions with + // abstract_origin, but just for the current CU. + AbstractOriginVarsTyMap LocalAbstractOriginFnInfo; FunctionsWithAbstractOriginTy FnsWithAbstractOriginToBeProcessed; - collectStatsRecursive(CUDie, "/", "g", 0, 0, Statistics, GlobalStats, - LocStats, GlobalAbstractOriginFnInfo, - FnsWithAbstractOriginToBeProcessed); + collectStatsRecursive( + CUDie, "/", "g", 0, 0, Statistics, GlobalStats, LocStats, + AbstractOriginFnCUs, GlobalAbstractOriginFnInfo, + LocalAbstractOriginFnInfo, FnsWithAbstractOriginToBeProcessed); + // collectZeroLocCovForVarsWithAbstractOrigin will filter out all + // out-of-order DWARF functions that have been processed within it, + // leaving FnsWithAbstractOriginToBeProcessed with only CrossCU + // references. collectZeroLocCovForVarsWithAbstractOrigin( CUDie.getDwarfUnit(), GlobalStats, LocStats, - GlobalAbstractOriginFnInfo, FnsWithAbstractOriginToBeProcessed); + LocalAbstractOriginFnInfo, FnsWithAbstractOriginToBeProcessed); + + // Collect all CrossCU references into CrossCUReferencesToBeResolved. + for (auto CrossCUReferencingDIEOffset : + FnsWithAbstractOriginToBeProcessed) + CrossCUReferencesToBeResolved.push_back( + DIELocation(CUDie.getDwarfUnit(), CrossCUReferencingDIEOffset)); } } + /// Resolve CrossCU references. + collectZeroLocCovForVarsWithCrossCUReferencingAbstractOrigin( + LocStats, AbstractOriginFnCUs, GlobalAbstractOriginFnInfo, + CrossCUReferencesToBeResolved); + /// Collect the sizes of debug sections. SectionSizes Sizes; calculateSectionSizes(Obj, Sizes, Filename); diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp index fd67cac3cdd2..7208011c9866 100644 --- a/llvm/tools/llvm-profdata/llvm-profdata.cpp +++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -17,6 +17,7 @@ #include "llvm/ProfileData/InstrProfReader.h" #include "llvm/ProfileData/InstrProfWriter.h" #include "llvm/ProfileData/ProfileCommon.h" +#include "llvm/ProfileData/RawMemProfReader.h" #include "llvm/ProfileData/SampleProfReader.h" #include "llvm/ProfileData/SampleProfWriter.h" #include "llvm/Support/CommandLine.h" @@ -80,8 +81,8 @@ static void exitWithError(Error E, StringRef Whence = "") { instrprof_error instrError = IPE.get(); StringRef Hint = ""; if (instrError == instrprof_error::unrecognized_format) { - // Hint for common error of forgetting --sample for sample profiles. - Hint = "Perhaps you forgot to use the --sample option?"; + // Hint in case user missed specifying the profile type. + Hint = "Perhaps you forgot to use the --sample or --memory option?"; } exitWithError(IPE.message(), std::string(Whence), std::string(Hint)); }); @@ -95,7 +96,7 @@ static void exitWithErrorCode(std::error_code EC, StringRef Whence = "") { } namespace { -enum ProfileKinds { instr, sample }; +enum ProfileKinds { instr, sample, memory }; enum FailureMode { failIfAnyAreInvalid, failIfAllAreInvalid }; } @@ -2447,6 +2448,17 @@ static int showSampleProfile(const std::string &Filename, bool ShowCounts, return 0; } +static int showMemProfProfile(const std::string &Filename, raw_fd_ostream &OS) { + auto ReaderOr = llvm::memprof::RawMemProfReader::create(Filename); + if (Error E = ReaderOr.takeError()) + exitWithError(std::move(E), Filename); + + std::unique_ptr<llvm::memprof::RawMemProfReader> Reader( + ReaderOr.get().release()); + Reader->printSummaries(OS); + return 0; +} + static int show_main(int argc, const char *argv[]) { cl::opt<std::string> Filename(cl::Positional, cl::Required, cl::desc("<profdata-file>")); @@ -2487,7 +2499,8 @@ static int show_main(int argc, const char *argv[]) { cl::opt<ProfileKinds> ProfileKind( cl::desc("Profile kind:"), cl::init(instr), cl::values(clEnumVal(instr, "Instrumentation profile (default)"), - clEnumVal(sample, "Sample profile"))); + clEnumVal(sample, "Sample profile"), + clEnumVal(memory, "MemProf memory access profile"))); cl::opt<uint32_t> TopNFunctions( "topn", cl::init(0), cl::desc("Show the list of functions with the largest internal counts")); @@ -2532,11 +2545,12 @@ static int show_main(int argc, const char *argv[]) { ShowMemOPSizes, ShowDetailedSummary, DetailedSummaryCutoffs, ShowAllFunctions, ShowCS, ValueCutoff, OnlyListBelow, ShowFunction, TextFormat, ShowBinaryIds, OS); - else + if (ProfileKind == sample) return showSampleProfile(Filename, ShowCounts, TopNFunctions, ShowAllFunctions, ShowDetailedSummary, ShowFunction, ShowProfileSymbolList, ShowSectionInfoOnly, ShowHotFuncList, OS); + return showMemProfProfile(Filename, OS); } int main(int argc, const char *argv[]) { diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index 4abea0b1d23d..9dd777dd98e7 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -5333,6 +5333,13 @@ const NoteType FreeBSDNoteTypes[] = { "NT_FREEBSD_FEATURE_CTL (FreeBSD feature control)"}, }; +const NoteType NetBSDCoreNoteTypes[] = { + {ELF::NT_NETBSDCORE_PROCINFO, + "NT_NETBSDCORE_PROCINFO (procinfo structure)"}, + {ELF::NT_NETBSDCORE_AUXV, "NT_NETBSDCORE_AUXV (ELF auxiliary vector data)"}, + {ELF::NT_NETBSDCORE_LWPSTATUS, "PT_LWPSTATUS (ptrace_lwpstatus structure)"}, +}; + const NoteType OpenBSDCoreNoteTypes[] = { {ELF::NT_OPENBSD_PROCINFO, "NT_OPENBSD_PROCINFO (procinfo structure)"}, {ELF::NT_OPENBSD_AUXV, "NT_OPENBSD_AUXV (ELF auxiliary vector data)"}, @@ -5453,6 +5460,12 @@ StringRef getNoteTypeName(const typename ELFT::Note &Note, unsigned ELFType) { return FindNote(FreeBSDNoteTypes); } } + if (ELFType == ELF::ET_CORE && Name.startswith("NetBSD-CORE")) { + StringRef Result = FindNote(NetBSDCoreNoteTypes); + if (!Result.empty()) + return Result; + return FindNote(CoreNoteTypes); + } if (Name.startswith("OpenBSD") && ELFType == ELF::ET_CORE) { // OpenBSD also places the generic core notes in the OpenBSD namespace. StringRef Result = FindNote(OpenBSDCoreNoteTypes); diff --git a/llvm/tools/llvm-tli-checker/llvm-tli-checker.cpp b/llvm/tools/llvm-tli-checker/llvm-tli-checker.cpp index bf25efc0b0bd..4a69f96a597a 100644 --- a/llvm/tools/llvm-tli-checker/llvm-tli-checker.cpp +++ b/llvm/tools/llvm-tli-checker/llvm-tli-checker.cpp @@ -39,7 +39,7 @@ enum ID { #include "Opts.inc" #undef PREFIX -const opt::OptTable::Info InfoTable[] = { +static const opt::OptTable::Info InfoTable[] = { #define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ HELPTEXT, METAVAR, VALUES) \ { \ @@ -55,7 +55,7 @@ class TLICheckerOptTable : public opt::OptTable { public: TLICheckerOptTable() : OptTable(InfoTable) {} }; -} // namespace +} // end anonymous namespace // We have three levels of reporting. enum class ReportKind { @@ -66,13 +66,14 @@ enum class ReportKind { }; // Most of the ObjectFile interfaces return an Expected<T>, so make it easy -// to ignore those. -template <typename T> T unwrapIgnoreError(Expected<T> E) { +// to ignore errors. +template <typename T> +static T unwrapIgnoreError(Expected<T> E, T Default = T()) { if (E) return std::move(*E); // Sink the error and return a nothing value. consumeError(E.takeError()); - return T(); + return Default; } static void fail(const Twine &Message) { @@ -99,13 +100,14 @@ static void reportArchiveChildIssue(const object::Archive::Child &C, int Index, } // Return Name, and if Name is mangled, append "aka" and the demangled name. -static std::string PrintableName(StringRef Name) { +static std::string getPrintableName(StringRef Name) { std::string OutputName = "'"; OutputName += Name; OutputName += "'"; - if (Name.startswith("_Z") || Name.startswith("??")) { + std::string DemangledName(demangle(Name.str())); + if (Name != DemangledName) { OutputName += " aka "; - OutputName += demangle(Name.str()); + OutputName += DemangledName; } return OutputName; } @@ -119,7 +121,7 @@ struct TLINameList : std::vector<std::pair<StringRef, bool>> { // Print out what we found. void dump(); }; -TLINameList TLINames; +static TLINameList TLINames; void TLINameList::initialize(StringRef TargetTriple) { Triple T(TargetTriple); @@ -146,7 +148,7 @@ void TLINameList::dump() { // output as a header. So, for example, no need to repeat the triple. for (auto &TLIName : TLINames) { outs() << (TLIName.second ? " " : "not ") - << "available: " << PrintableName(TLIName.first) << '\n'; + << "available: " << getPrintableName(TLIName.first) << '\n'; } } @@ -159,24 +161,27 @@ class SDKNameMap : public StringMap<bool> { public: void populateFromFile(StringRef LibDir, StringRef LibName); }; -SDKNameMap SDKNames; +static SDKNameMap SDKNames; // Given an ObjectFile, extract the global function symbols. void SDKNameMap::populateFromObject(ObjectFile *O) { - // FIXME: Support COFF. + // FIXME: Support other formats. if (!O->isELF()) { - WithColor::warning() << "Only ELF-format files are supported\n"; + WithColor::warning() << O->getFileName() + << ": only ELF-format files are supported\n"; return; } - auto *ELF = cast<const ELFObjectFileBase>(O); + const auto *ELF = cast<ELFObjectFileBase>(O); - for (auto I = ELF->getDynamicSymbolIterators().begin(); - I != ELF->getDynamicSymbolIterators().end(); ++I) { - // We want only global function symbols. - SymbolRef::Type Type = unwrapIgnoreError(I->getType()); - uint32_t Flags = unwrapIgnoreError(I->getFlags()); - StringRef Name = unwrapIgnoreError(I->getName()); - if (Type == SymbolRef::ST_Function && (Flags & SymbolRef::SF_Global)) + for (auto &S : ELF->getDynamicSymbolIterators()) { + // We want only defined global function symbols. + SymbolRef::Type Type = unwrapIgnoreError(S.getType()); + uint32_t Flags = unwrapIgnoreError(S.getFlags()); + section_iterator Section = unwrapIgnoreError(S.getSection(), + /*Default=*/O->section_end()); + StringRef Name = unwrapIgnoreError(S.getName()); + if (Type == SymbolRef::ST_Function && (Flags & SymbolRef::SF_Global) && + Section != O->section_end()) insert({Name, true}); } } @@ -211,7 +216,7 @@ void SDKNameMap::populateFromFile(StringRef LibDir, StringRef LibName) { SmallString<255> Filepath(LibDir); sys::path::append(Filepath, LibName); if (!sys::fs::exists(Filepath)) { - WithColor::warning() << "Could not find '" << StringRef(Filepath) << "'\n"; + WithColor::warning() << StringRef(Filepath) << ": not found\n"; return; } outs() << "\nLooking for symbols in '" << StringRef(Filepath) << "'\n"; @@ -229,13 +234,12 @@ void SDKNameMap::populateFromFile(StringRef LibDir, StringRef LibName) { else if (ObjectFile *O = dyn_cast<ObjectFile>(&Binary)) populateFromObject(O); else { - WithColor::warning() << "Not an Archive or ObjectFile: '" - << StringRef(Filepath) << "'\n"; + WithColor::warning() << StringRef(Filepath) + << ": not an archive or object file\n"; return; } if (Precount == size()) - WithColor::warning() << "No symbols found in '" << StringRef(Filepath) - << "'\n"; + WithColor::warning() << StringRef(Filepath) << ": no symbols found\n"; else outs() << "Found " << size() - Precount << " global function symbols in '" << StringRef(Filepath) << "'\n"; @@ -268,10 +272,8 @@ int main(int argc, char *argv[]) { } std::vector<std::string> LibList = Args.getAllArgValues(OPT_INPUT); - if (LibList.empty()) { - WithColor::error() << "No input files\n"; - exit(EXIT_FAILURE); - } + if (LibList.empty()) + fail("no input files\n"); StringRef LibDir = Args.getLastArgValue(OPT_libdir_EQ); bool SeparateMode = Args.hasArg(OPT_separate); @@ -283,10 +285,8 @@ int main(int argc, char *argv[]) { .Case("discrepancy", ReportKind::Discrepancy) .Case("full", ReportKind::Full) .Default(ReportKind::Error); - if (ReportLevel == ReportKind::Error) { - WithColor::error() << "invalid option for --report: " << A->getValue(); - exit(EXIT_FAILURE); - } + if (ReportLevel == ReportKind::Error) + fail(Twine("invalid option for --report: ", StringRef(A->getValue()))); } for (size_t I = 0; I < LibList.size(); ++I) { @@ -330,7 +330,8 @@ int main(int argc, char *argv[]) { constexpr char YesNo[2][4] = {"no ", "yes"}; constexpr char Indicator[4][3] = {"!!", ">>", "<<", "=="}; outs() << Indicator[Which] << " TLI " << YesNo[TLIHas] << " SDK " - << YesNo[SDKHas] << ": " << PrintableName(TLIName.first) << '\n'; + << YesNo[SDKHas] << ": " << getPrintableName(TLIName.first) + << '\n'; } } diff --git a/llvm/utils/TableGen/AsmWriterInst.cpp b/llvm/utils/TableGen/AsmWriterInst.cpp index cf24f79334ca..887abbac9d3b 100644 --- a/llvm/utils/TableGen/AsmWriterInst.cpp +++ b/llvm/utils/TableGen/AsmWriterInst.cpp @@ -147,8 +147,7 @@ AsmWriterInst::AsmWriterInst(const CodeGenInstruction &CGI, unsigned CGIIndex, std::string::size_type ModifierStart = VarEnd; while (VarEnd < AsmString.size() && isIdentChar(AsmString[VarEnd])) ++VarEnd; - Modifier = std::string(AsmString.begin()+ModifierStart, - AsmString.begin()+VarEnd); + Modifier = AsmString.substr(ModifierStart, VarEnd - ModifierStart); if (Modifier.empty()) PrintFatalError(CGI.TheDef->getLoc(), "Bad operand modifier name in '" + diff --git a/llvm/utils/TableGen/CodeGenTarget.cpp b/llvm/utils/TableGen/CodeGenTarget.cpp index 137f99078faf..d3beaf61989e 100644 --- a/llvm/utils/TableGen/CodeGenTarget.cpp +++ b/llvm/utils/TableGen/CodeGenTarget.cpp @@ -676,12 +676,11 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R, isSpeculatable = false; hasSideEffects = false; - if (DefName.size() <= 4 || - std::string(DefName.begin(), DefName.begin() + 4) != "int_") + if (DefName.size() <= 4 || DefName.substr(0, 4) != "int_") PrintFatalError(DefLoc, "Intrinsic '" + DefName + "' does not start with 'int_'!"); - EnumName = std::string(DefName.begin()+4, DefName.end()); + EnumName = DefName.substr(4); if (R->getValue("GCCBuiltinName")) // Ignore a missing GCCBuiltinName field. GCCBuiltinName = std::string(R->getValueAsString("GCCBuiltinName")); @@ -699,8 +698,7 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R, Name += (EnumName[i] == '_') ? '.' : EnumName[i]; } else { // Verify it starts with "llvm.". - if (Name.size() <= 5 || - std::string(Name.begin(), Name.begin() + 5) != "llvm.") + if (Name.size() <= 5 || Name.substr(0, 5) != "llvm.") PrintFatalError(DefLoc, "Intrinsic '" + DefName + "'s name does not start with 'llvm.'!"); } @@ -709,8 +707,7 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R, // "llvm.<targetprefix>.". if (!TargetPrefix.empty()) { if (Name.size() < 6+TargetPrefix.size() || - std::string(Name.begin() + 5, Name.begin() + 6 + TargetPrefix.size()) - != (TargetPrefix + ".")) + Name.substr(5, 1 + TargetPrefix.size()) != (TargetPrefix + ".")) PrintFatalError(DefLoc, "Intrinsic '" + DefName + "' does not start with 'llvm." + TargetPrefix + ".'!"); |